No gradients provided for any variable. TF - python

I am trying to build an implicit quantile network. I build a custom loss function but do not get it working. I get the error 'no gradients available' but I belief I only use functions that should provide gradients, like tf.tile and stuff. I dont explicityly cast something in my loss_kv_iq() function.
Below I provide the code for my custom layer ( IQNlayer ) , the network I use (IQN), and my custom loss function. Also a small piece of code in the main that should be able to reproduce the error.
TF version: 2.1.0
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
class IQN(keras.Model):
def __init__(self, quantile_dims, fc_dims, n_actions, n_quantiles):
super(IQN, self).__init__()
self.n_quantiles = n_quantiles
initializer = keras.initializers.he_uniform()
self.iq = IQNlayer(quantile_dims, n_quantiles)
self.dense = keras.layers.Dense(fc_dims, activation='relu', kernel_initializer = initializer)
self.out = keras.layers.Dense(n_actions, activation = None)
def call(self, state, tau):
batch_size, state_size = state.shape
x = self.iq(state, tau)
x = self.dense(x)
x = self.out(x)
x = tf.transpose(tf.split(x, batch_size, axis=0), perm=[0, 2, 1])
return x
class IQNlayer(keras.layers.Layer):
def __init__(self, quantile_dims, n_quantiles):
super(IQNlayer, self).__init__()
self.quantile_dims = quantile_dims
self.n_quantiles = n_quantiles
self.fc1 = keras.layers.Dense(self.quantile_dims, activation = tf.nn.selu)
self.fc2 = keras.layers.Dense(self.quantile_dims, activation = tf.nn.relu)
def call(self, state, tau):
batch_size, state_size = state.shape
state_tile = tf.tile(state, [1, self.n_quantiles])
state_reshape = tf.reshape(state_tile, [-1, state_size])
state_net = self.fc1(state_reshape)
tau = tf.reshape(tau, [-1, 1])
pi_mtx = tf.constant(np.expand_dims(np.pi * np.arange(0, 64), axis=0), dtype=tf.float32)
cos_tau = tf.cos(tf.matmul(tau, pi_mtx))
phi = self.fc2(cos_tau)
net = tf.multiply(state_net, phi)
return net
def loss_kv_iq(x, tau, action_hot, theta_target):
expand_dim_action = tf.expand_dims(action_hot, -1)
main_support = tf.reduce_sum(x * expand_dim_action, axis=1)
theta_loss_tile = tf.tile(tf.expand_dims(main_support, axis=2), [1, 1, N_QUANTILES])
logit_valid_tile = tf.tile(tf.expand_dims(theta_target, axis=1), [1, N_QUANTILES, 1])
Huber_loss = hloss(logit_valid_tile, theta_loss_tile)
inv_tau = 1 - tau
tau = tf.tile(tf.expand_dims(tau, axis=1), [1, N_QUANTILES, 1])
inv_tau = tf.tile(tf.expand_dims(inv_tau, axis=1), [1, N_QUANTILES, 1])
error_loss = logit_valid_tile - theta_loss_tile
Loss = tf.where(tf.less(error_loss, 0.0), inv_tau * Huber_loss, tau * Huber_loss)
loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(Loss, axis=2), axis=1))
return loss
if __name__ == '__main__':
hloss = tf.keras.losses.Huber(reduction = tf.keras.losses.Reduction.NONE)
N_QUANTILES = 10
BATCH_SIZE = 2
ACTION_SIZE = 5
STATE_SIZE = 16
# FOR EXAMPLE: RANDOM BATCH
cs = np.random.rand(BATCH_SIZE,STATE_SIZE)
a = np.random.randint(0,5,size=(2))
r = np.random.randint(0,500,size=(2))
ns = np.random.rand(BATCH_SIZE,STATE_SIZE)
tau = np.random.uniform(size=(BATCH_SIZE, N_QUANTILES))
tau = tau.astype('float32')
iq = IQN(128,128,ACTION_SIZE,N_QUANTILES)
action_hot = np.zeros((BATCH_SIZE,ACTION_SIZE), dtype = np.float32)
action_hot[np.arange(BATCH_SIZE), a] = 1
Q = iq(ns, tau)
theta_target = np.random.rand(BATCH_SIZE,N_QUANTILES)
theta_target = theta_target.astype('float32')
optimizer = tf.keras.optimizers.Adam(lr = 1e-3)
with tf.GradientTape() as tape:
loss = loss_kv_iq(Q, tau, action_hot, theta_target)
grads = tape.gradient(loss, iq.trainable_weights)
optimizer.apply_gradients(zip(grads,iq.trainable_weights))
Error:
Traceback (most recent call last):
File "C:\Users\rensj\.spyder-py3\Thesis\test.py", line 106, in <module>
optimizer.apply_gradients(zip(grads,iq.trainable_weights))
File "C:\Users\rensj\Anaconda3\envs\tfnew\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 426, in apply_gradients
grads_and_vars = _filter_grads(grads_and_vars)
File "C:\Users\rensj\Anaconda3\envs\tfnew\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 1039, in _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['iqn_4/iq_nlayer_4/dense_16/kernel:0', 'iqn_4/iq_nlayer_4/dense_16/bias:0', 'iqn_4/iq_nlayer_4/dense_17/kernel:0', 'iqn_4/iq_nlayer_4/dense_17/bias:0', 'iqn_4/dense_18/kernel:0', 'iqn_4/dense_18/bias:0', 'iqn_4/dense_19/kernel:0', 'iqn_4/dense_19/bias:0'].
EDIT:
As mister Agrawal pointed out, I use numpy operation in pi_mtx. I changed these to their tensorflow counterparts, and together with some other small change to the same line, this becomes:
pi_mtx = tf.constant(tf.expand_dims(tf.constant(np.pi) * tf.range(0, 64, dtype=tf.float32), axis=0), dtype=tf.float32)
However, I keep having the same ValueError: No gradients provided

In the line
pi_mtx = tf.constant(np.expand_dims(np.pi * np.arange(0, 64), axis=0), dtype=tf.float32)
You're using numpy functions. Change them to their tensorflow counterparts.
np.expand_dims -> tf.expand_dims
np.arange -> tf.keras.backend.arange OR tf.range
You can use np.pi, since that is a constant, not an operation.

Related

Tensorflow : Trainable variable not getting learnt

I am trying to implement a custom modified ReLU in Tensorflow 1, in which I use two learnable parameters. But the parameters are not getting learnt even after running 1000 training steps, as suggested by printing their values before and after training. I have observed that inside the function, when I do not split x, i.e. execute the commented lines, then the coefficients are learnt. Could anyone suggest why splitting the input results in the trainable coefficients not being learnt and how this can be resolved?
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
def weight_variable(shape,vari_name):
initial = tf.truncated_normal(shape, stddev=0.1,dtype=tf.float32)
return tf.Variable(initial,name = vari_name)
def init_Prelu_coefficient(var1, var2):
coeff = tf.truncated_normal(([1]), stddev=0.1,dtype=tf.float32)
coeff1 = tf.truncated_normal(([1]), stddev=0.1,dtype=tf.float32)
return tf.Variable(coeff, trainable=True, name=var1), tf.Variable(coeff1, trainable=True, name=var2)
def Prelu(x, coeff, coeff1):
s = int(x.shape[-1])
sop = x[:,:,:,:s//2]*coeff+x[:,:,:,s//2:]*coeff1
sop1 = x[:,:,:,:s//2]*coeff-x[:,:,:,s//2:]*coeff1
copied_variable = tf.concat([sop, sop1], axis=-1)
copied_variable = tf.math.maximum(copied_variable,0)/copied_variable
# copied_variable = tf.identity(x)
# copied_variable = tf.math.maximum(copied_variable*coeff+copied_variable*coeff1,0)/copied_variable
# copied_variable = tf.multiply(copied_variable,x)
return copied_variable
def conv2d_dilate(x, W, dilate_rate):
return tf.nn.convolution(x, W,padding='VALID',dilation_rate = [1,dilate_rate])
matr = np.random.rand(1, 60, 40, 8)
target = np.random.rand(1, 58, 36, 8)
def learning(sess):
# define placeholder for inputs to network
Input = tf.placeholder(tf.float32, [1, 60, 40, 8])
input_Target = tf.placeholder(tf.float32, [1, 58, 36, 8])
kernel = weight_variable([3, 3, 8, 8],'G1')
coeff, coeff1 = init_Prelu_coefficient('alpha', 'alpha1')
conv = Prelu(conv2d_dilate(Input, kernel , 2), coeff, coeff1)
error_norm = 1*tf.norm(input_Target - conv)
print("MOMENTUM LEARNING")
train_step = tf.train.MomentumOptimizer(learning_rate=0.001,momentum=0.9,use_nesterov=False).minimize(error_norm)
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)
print("INIT coefficient ", sess.run(coeff), sess.run(coeff1))
init_var = tf.trainable_variables()
error_prev = 1 # initial error, we set 1 and it began to decrease.
for i in range(1000):
sess.run(train_step, feed_dict={Input: matr, input_Target: target})
if i % 100 == 0:
error_now=sess.run(error_norm,feed_dict={Input : matr, input_Target: target})
print('The',i,'th iteration gives an error',error_now)
error = sess.run(error_norm,feed_dict={Input: matr, input_Target: target})
print(sess.run(kernel))
print("LEARNT coefficient ", sess.run(coeff), sess.run(coeff1))
sess = tf.Session()
learning(sess)

Tensorflow Attribute Error: 'method' object has no attribute '_from_serialized'

I'm trying to train a model in Tensorflow and get the error:
Attribute Error: 'method' object has no attribute '_from_serialized'
This is code that I have copied and seen work.
It seems that it has something to do with the compatibility of my tensorflow version and python version. I'm able to run other models, but this error seems to occur when I'm trying to track custom metrics.
What is the most recent compatible versions of Tensorflow-GPU and python can that run models while tracking custom metrics?
I've checked the table that Tensorflow provides and these versions should be compatible.
My current version of Tensorflow is 2.10.0
Python version is 3.9.6.
Is there something else that might cause this errors. I've created multiple environments with different versions and still receive this error.
import os
import pickle
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization, \
Flatten, Dense, Reshape, Conv2DTranspose, Activation, Lambda
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
import numpy as np
import tensorflow as tf
class VAE:
"""
VAE represents a Deep Convolutional variational autoencoder architecture
with mirrored encoder and decoder components.
"""
def __init__(self,
input_shape,
conv_filters,
conv_kernels,
conv_strides,
latent_space_dim):
self.input_shape = input_shape # [28, 28, 1]
self.conv_filters = conv_filters # [2, 4, 8]
self.conv_kernels = conv_kernels # [3, 5, 3]
self.conv_strides = conv_strides # [1, 2, 2]
self.latent_space_dim = latent_space_dim # 2
self.reconstruction_loss_weight = 1000
self.encoder = None
self.decoder = None
self.model = None
self._num_conv_layers = len(conv_filters)
self._shape_before_bottleneck = None
self._model_input = None
self._build()
def summary(self):
self.encoder.summary()
self.decoder.summary()
self.model.summary()
def compile(self, learning_rate=0.0001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[self._calculate_reconstruction_loss,
self._calculate_kl_loss])
def train(self, x_train, batch_size, num_epochs):
self.model.fit(x_train,
x_train,
batch_size=batch_size,
epochs=num_epochs,
shuffle=True)
def save(self, save_folder="."):
self._create_folder_if_it_doesnt_exist(save_folder)
self._save_parameters(save_folder)
self._save_weights(save_folder)
def load_weights(self, weights_path):
self.model.load_weights(weights_path)
def reconstruct(self, images):
latent_representations = self.encoder.predict(images)
reconstructed_images = self.decoder.predict(latent_representations)
return reconstructed_images, latent_representations
#classmethod
def load(cls, save_folder="."):
parameters_path = os.path.join(save_folder, "parameters.pkl")
with open(parameters_path, "rb") as f:
parameters = pickle.load(f)
autoencoder = VAE(*parameters)
weights_path = os.path.join(save_folder, "weights.h5")
autoencoder.load_weights(weights_path)
return autoencoder
def _calculate_combined_loss(self, y_target, y_predicted):
reconstruction_loss = self._calculate_reconstruction_loss(y_target, y_predicted)
kl_loss = self._calculate_kl_loss(y_target, y_predicted)
combined_loss = self.reconstruction_loss_weight * reconstruction_loss\
+ kl_loss
return combined_loss
def _calculate_reconstruction_loss(self, y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
def _calculate_kl_loss(self, y_target, y_predicted):
kl_loss = -0.5 * K.sum(1 + self.log_variance - K.square(self.mu) -
K.exp(self.log_variance), axis=1)
return kl_loss
def _create_folder_if_it_doesnt_exist(self, folder):
if not os.path.exists(folder):
os.makedirs(folder)
def _save_parameters(self, save_folder):
parameters = [
self.input_shape,
self.conv_filters,
self.conv_kernels,
self.conv_strides,
self.latent_space_dim
]
save_path = os.path.join(save_folder, "parameters.pkl")
with open(save_path, "wb") as f:
pickle.dump(parameters, f)
def _save_weights(self, save_folder):
save_path = os.path.join(save_folder, "weights.h5")
self.model.save_weights(save_path)
def _build(self):
self._build_encoder()
self._build_decoder()
self._build_autoencoder()
def _build_autoencoder(self):
model_input = self._model_input
model_output = self.decoder(self.encoder(model_input))
self.model = Model(model_input, model_output, name="autoencoder")
def _build_decoder(self):
decoder_input = self._add_decoder_input()
dense_layer = self._add_dense_layer(decoder_input)
reshape_layer = self._add_reshape_layer(dense_layer)
conv_transpose_layers = self._add_conv_transpose_layers(reshape_layer)
decoder_output = self._add_decoder_output(conv_transpose_layers)
self.decoder = Model(decoder_input, decoder_output, name="decoder")
def _add_decoder_input(self):
return Input(shape=self.latent_space_dim, name="decoder_input")
def _add_dense_layer(self, decoder_input):
num_neurons = np.prod(self._shape_before_bottleneck) # [1, 2, 4] -> 8
dense_layer = Dense(num_neurons, name="decoder_dense")(decoder_input)
return dense_layer
def _add_reshape_layer(self, dense_layer):
return Reshape(self._shape_before_bottleneck)(dense_layer)
def _add_conv_transpose_layers(self, x):
"""Add conv transpose blocks."""
# loop through all the conv layers in reverse order and stop at the
# first layer
for layer_index in reversed(range(1, self._num_conv_layers)):
x = self._add_conv_transpose_layer(layer_index, x)
return x
def _add_conv_transpose_layer(self, layer_index, x):
layer_num = self._num_conv_layers - layer_index
conv_transpose_layer = Conv2DTranspose(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"decoder_conv_transpose_layer_{layer_num}"
)
x = conv_transpose_layer(x)
x = ReLU(name=f"decoder_relu_{layer_num}")(x)
x = BatchNormalization(name=f"decoder_bn_{layer_num}")(x)
return x
def _add_decoder_output(self, x):
conv_transpose_layer = Conv2DTranspose(
filters=1,
kernel_size=self.conv_kernels[0],
strides=self.conv_strides[0],
padding="same",
name=f"decoder_conv_transpose_layer_{self._num_conv_layers}"
)
x = conv_transpose_layer(x)
output_layer = Activation("sigmoid", name="sigmoid_layer")(x)
return output_layer
def _build_encoder(self):
encoder_input = self._add_encoder_input()
conv_layers = self._add_conv_layers(encoder_input)
bottleneck = self._add_bottleneck(conv_layers)
self._model_input = encoder_input
self.encoder = Model(encoder_input, bottleneck, name="encoder")
def _add_encoder_input(self):
return Input(shape=self.input_shape, name="encoder_input")
def _add_conv_layers(self, encoder_input):
"""Create all convolutional blocks in encoder."""
x = encoder_input
for layer_index in range(self._num_conv_layers):
x = self._add_conv_layer(layer_index, x)
return x
def _add_conv_layer(self, layer_index, x):
"""Add a convolutional block to a graph of layers, consisting of
conv 2d + ReLU + batch normalization.
"""
layer_number = layer_index + 1
conv_layer = Conv2D(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"encoder_conv_layer_{layer_number}"
)
x = conv_layer(x)
x = ReLU(name=f"encoder_relu_{layer_number}")(x)
x = BatchNormalization(name=f"encoder_bn_{layer_number}")(x)
return x
def _add_bottleneck(self, x):
"""Flatten data and add bottleneck with Guassian sampling (Dense
layer).
"""
self._shape_before_bottleneck = K.int_shape(x)[1:]
x = Flatten()(x)
self.mu = Dense(self.latent_space_dim, name="mu")(x)
self.log_variance = Dense(self.latent_space_dim,
name="log_variance")(x)
def sample_point_from_normal_distribution(args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(self.mu), mean=0.,
stddev=1.)
sampled_point = mu + K.exp(log_variance / 2) * epsilon
return sampled_point
x = Lambda(sample_point_from_normal_distribution,
name="encoder_output")([self.mu, self.log_variance])
return x
if __name__ == "__main__":
autoencoder = VAE(
input_shape=(28, 28, 1),
conv_filters=(32, 64, 64, 64),
conv_kernels=(3, 3, 3, 3),
conv_strides=(1, 2, 2, 1),
latent_space_dim=2
)
autoencoder.summary()
LEARNING_RATE = 0.0005
BATCH_SIZE = 32
EPOCHS = 100
def load_mnist():
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
x_train = x_train.reshape(x_train.shape + (1,))
x_test = x_test.astype("float32") / 255
x_test = x_test.reshape(x_test.shape + (1,))
return x_train, y_train, x_test, y_test
def train(x_train, learning_rate, batch_size, epochs):
autoencoder = VAE(
input_shape=(28, 28, 1),
conv_filters=(32, 64, 64, 64),
conv_kernels=(3, 3, 3, 3),
conv_strides=(1, 2, 2, 1),
latent_space_dim=2
)
autoencoder.summary()
autoencoder.compile(learning_rate)
autoencoder.train(x_train, batch_size, epochs)
return autoencoder
if __name__ == "__main__":
x_train, _, _, _ = load_mnist()
autoencoder = train(x_train[:10000], LEARNING_RATE, BATCH_SIZE, EPOCHS)
autoencoder.save("model")
The attribute error here is raised because you can't set any attribute on a method object i.e;
class Foo:
def bar(self):
print("bar")
if __name__ == "__main__":
Foo().bar.baz = 1
Output:
Traceback (most recent call last): line 7, in <module>
Foo().bar.baz = 1
AttributeError: 'method' object has no attribute 'baz'
When collecting the metric information in training_utils_v1, the metrics specified when the model is compiled (model.compile(..., metrics=[..])) are iterated over, and for each metric, the attribute _from_serialized is set:
for i, metrics in enumerate(nested_metrics):
metrics_dict = collections.OrderedDict()
for metric in metrics:
metric_name = get_metric_name(metric, is_weighted)
metric_fn = get_metric_function(
metric, output_shape=output_shapes[i], loss_fn=loss_fns[i]
)
metric_fn._from_serialized = from_serialized
In the example provided, two metrics are supplied to model.compile, and each are methods of the VAE class:
def compile(self, learning_rate=0.0001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[self._calculate_reconstruction_loss,
self._calculate_kl_loss])
To test this, observe if metrics is entirely omitted, training will start successfully.
One of the metrics supplied, _calculate_reconstruction_loss is a method which does not need to be a method, as it does not refer to self in the body:
def _calculate_reconstruction_loss(self, y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
So that can be moved outside of the class and made into a function (some IDEs will recommend this to you in the form of a message to the effect of "Make function from method"):
def _calculate_reconstruction_loss(y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
The compile statement can then be revised:
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[_calculate_reconstruction_loss,
self._calculate_kl_loss])
The same exception will appear, since we're still referring to a method in the call (self.calculate_kl_loss) but if self.calculate_kl_loss is omitted, the training will start successfully:
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[_calculate_reconstruction_loss])
For completeness, reviewing what self.calculate_kl_loss is doing, it's a bit more tricky, but we can successfully use it as a metric by converting it into a function which takes a single argument model, which returns another function that takes arbitrary number of arguments (*args) such that it can be used both as a metric (which always expects a function of two arguments) and utilized in the loss function:
def calculate_kl_loss(model):
# wrap `_calculate_kl_loss` such that it takes the model as an argument,
# returns a function which can take arbitrary number of arguments
# (for compatibility with `metrics` and utility in the loss function)
# and returns the kl loss
def _calculate_kl_loss(*args):
kl_loss = -0.5 * K.sum(1 + model.log_variance - K.square(model.mu) -
K.exp(model.log_variance), axis=1)
return kl_loss
return _calculate_kl_loss
With these revisions, when training is started, the output is:
Epoch 1/100
10000/10000 [==============================] - 43s 4ms/sample - loss: 89.4243 - _calculate_reconstruction_loss: 0.0833 - _calculate_kl_loss: 6.1707
Epoch 2/100
10000/10000 [==============================] - 46s 5ms/sample - loss: 69.0131 - _calculate_reconstruction_loss: 0.0619 - _calculate_kl_loss: 7.1129
The entire snippet with revisions detailed above is:
import os
import pickle
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization, \
Flatten, Dense, Reshape, Conv2DTranspose, Activation, Lambda
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
import numpy as np
import tensorflow as tf
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()
def _calculate_reconstruction_loss(y_target, y_predicted):
error = y_target - y_predicted
reconstruction_loss = K.mean(K.square(error), axis=[1, 2, 3])
return reconstruction_loss
def calculate_kl_loss(model):
# wrap `_calculate_kl_loss` such that it takes the model as an argument,
# returns a function which can take arbitrary number of arguments
# (for compatibility with `metrics` and utility in the loss function)
# and returns the kl loss
def _calculate_kl_loss(*args):
kl_loss = -0.5 * K.sum(1 + model.log_variance - K.square(model.mu) -
K.exp(model.log_variance), axis=1)
return kl_loss
return _calculate_kl_loss
class VAE:
"""
VAE represents a Deep Convolutional variational autoencoder architecture
with mirrored encoder and decoder components.
"""
def __init__(self,
input_shape,
conv_filters,
conv_kernels,
conv_strides,
latent_space_dim):
self.input_shape = input_shape # [28, 28, 1]
self.conv_filters = conv_filters # [2, 4, 8]
self.conv_kernels = conv_kernels # [3, 5, 3]
self.conv_strides = conv_strides # [1, 2, 2]
self.latent_space_dim = latent_space_dim # 2
self.reconstruction_loss_weight = 1000
self.encoder = None
self.decoder = None
self.model = None
self._num_conv_layers = len(conv_filters)
self._shape_before_bottleneck = None
self._model_input = None
self._build()
def summary(self):
self.encoder.summary()
self.decoder.summary()
self.model.summary()
def compile(self, learning_rate=0.0001):
optimizer = Adam(learning_rate=learning_rate)
self.model.compile(optimizer=optimizer,
loss=self._calculate_combined_loss,
metrics=[_calculate_reconstruction_loss,
calculate_kl_loss(self)])
def train(self, x_train, batch_size, num_epochs):
self.model.fit(x_train,
x_train,
batch_size=batch_size,
epochs=num_epochs,
shuffle=True)
def save(self, save_folder="."):
self._create_folder_if_it_doesnt_exist(save_folder)
self._save_parameters(save_folder)
self._save_weights(save_folder)
def load_weights(self, weights_path):
self.model.load_weights(weights_path)
def reconstruct(self, images):
latent_representations = self.encoder.predict(images)
reconstructed_images = self.decoder.predict(latent_representations)
return reconstructed_images, latent_representations
#classmethod
def load(cls, save_folder="."):
parameters_path = os.path.join(save_folder, "parameters.pkl")
with open(parameters_path, "rb") as f:
parameters = pickle.load(f)
autoencoder = VAE(*parameters)
weights_path = os.path.join(save_folder, "weights.h5")
autoencoder.load_weights(weights_path)
return autoencoder
def _calculate_combined_loss(self, y_target, y_predicted):
reconstruction_loss = _calculate_reconstruction_loss(y_target, y_predicted)
kl_loss = calculate_kl_loss(self)()
combined_loss = self.reconstruction_loss_weight * reconstruction_loss\
+ kl_loss
return combined_loss
def _create_folder_if_it_doesnt_exist(self, folder):
if not os.path.exists(folder):
os.makedirs(folder)
def _save_parameters(self, save_folder):
parameters = [
self.input_shape,
self.conv_filters,
self.conv_kernels,
self.conv_strides,
self.latent_space_dim
]
save_path = os.path.join(save_folder, "parameters.pkl")
with open(save_path, "wb") as f:
pickle.dump(parameters, f)
def _save_weights(self, save_folder):
save_path = os.path.join(save_folder, "weights.h5")
self.model.save_weights(save_path)
def _build(self):
self._build_encoder()
self._build_decoder()
self._build_autoencoder()
def _build_autoencoder(self):
model_input = self._model_input
model_output = self.decoder(self.encoder(model_input))
self.model = Model(model_input, model_output, name="autoencoder")
def _build_decoder(self):
decoder_input = self._add_decoder_input()
dense_layer = self._add_dense_layer(decoder_input)
reshape_layer = self._add_reshape_layer(dense_layer)
conv_transpose_layers = self._add_conv_transpose_layers(reshape_layer)
decoder_output = self._add_decoder_output(conv_transpose_layers)
self.decoder = Model(decoder_input, decoder_output, name="decoder")
def _add_decoder_input(self):
return Input(shape=self.latent_space_dim, name="decoder_input")
def _add_dense_layer(self, decoder_input):
num_neurons = np.prod(self._shape_before_bottleneck) # [1, 2, 4] -> 8
dense_layer = Dense(num_neurons, name="decoder_dense")(decoder_input)
return dense_layer
def _add_reshape_layer(self, dense_layer):
return Reshape(self._shape_before_bottleneck)(dense_layer)
def _add_conv_transpose_layers(self, x):
"""Add conv transpose blocks."""
# loop through all the conv layers in reverse order and stop at the
# first layer
for layer_index in reversed(range(1, self._num_conv_layers)):
x = self._add_conv_transpose_layer(layer_index, x)
return x
def _add_conv_transpose_layer(self, layer_index, x):
layer_num = self._num_conv_layers - layer_index
conv_transpose_layer = Conv2DTranspose(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"decoder_conv_transpose_layer_{layer_num}"
)
x = conv_transpose_layer(x)
x = ReLU(name=f"decoder_relu_{layer_num}")(x)
x = BatchNormalization(name=f"decoder_bn_{layer_num}")(x)
return x
def _add_decoder_output(self, x):
conv_transpose_layer = Conv2DTranspose(
filters=1,
kernel_size=self.conv_kernels[0],
strides=self.conv_strides[0],
padding="same",
name=f"decoder_conv_transpose_layer_{self._num_conv_layers}"
)
x = conv_transpose_layer(x)
output_layer = Activation("sigmoid", name="sigmoid_layer")(x)
return output_layer
def _build_encoder(self):
encoder_input = self._add_encoder_input()
conv_layers = self._add_conv_layers(encoder_input)
bottleneck = self._add_bottleneck(conv_layers)
self._model_input = encoder_input
self.encoder = Model(encoder_input, bottleneck, name="encoder")
def _add_encoder_input(self):
return Input(shape=self.input_shape, name="encoder_input")
def _add_conv_layers(self, encoder_input):
"""Create all convolutional blocks in encoder."""
x = encoder_input
for layer_index in range(self._num_conv_layers):
x = self._add_conv_layer(layer_index, x)
return x
def _add_conv_layer(self, layer_index, x):
"""Add a convolutional block to a graph of layers, consisting of
conv 2d + ReLU + batch normalization.
"""
layer_number = layer_index + 1
conv_layer = Conv2D(
filters=self.conv_filters[layer_index],
kernel_size=self.conv_kernels[layer_index],
strides=self.conv_strides[layer_index],
padding="same",
name=f"encoder_conv_layer_{layer_number}"
)
x = conv_layer(x)
x = ReLU(name=f"encoder_relu_{layer_number}")(x)
x = BatchNormalization(name=f"encoder_bn_{layer_number}")(x)
return x
def _add_bottleneck(self, x):
"""Flatten data and add bottleneck with Guassian sampling (Dense
layer).
"""
self._shape_before_bottleneck = K.int_shape(x)[1:]
x = Flatten()(x)
self.mu = Dense(self.latent_space_dim, name="mu")(x)
self.log_variance = Dense(self.latent_space_dim,
name="log_variance")(x)
def sample_point_from_normal_distribution(args):
mu, log_variance = args
epsilon = K.random_normal(shape=K.shape(self.mu), mean=0.,
stddev=1.)
sampled_point = mu + K.exp(log_variance / 2) * epsilon
return sampled_point
x = Lambda(sample_point_from_normal_distribution,
name="encoder_output")([self.mu, self.log_variance])
return x
LEARNING_RATE = 0.0005
BATCH_SIZE = 32
EPOCHS = 100
def load_mnist():
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255
x_train = x_train.reshape(x_train.shape + (1,))
x_test = x_test.astype("float32") / 255
x_test = x_test.reshape(x_test.shape + (1,))
return x_train, y_train, x_test, y_test
def train(x_train, learning_rate, batch_size, epochs):
autoencoder = VAE(
input_shape=(28, 28, 1),
conv_filters=(32, 64, 64, 64),
conv_kernels=(3, 3, 3, 3),
conv_strides=(1, 2, 2, 1),
latent_space_dim=2
)
autoencoder.summary()
autoencoder.compile(learning_rate)
autoencoder.train(x_train, batch_size, epochs)
return autoencoder
if __name__ == "__main__":
x_train, _, _, _ = load_mnist()
autoencoder = train(x_train[:10000], LEARNING_RATE, BATCH_SIZE, EPOCHS)
autoencoder.save("model")
Maybe you forgot to call the method which should return the value the other method _from_serialized().
You did not specify your code but what the error infers is this:
# you probably passed this
my_method._from_serialized()
# instead of
my_method()._from_serialized()
Otherwise if it's an error regarding the loading of a model I found this issue on github that may help you.
To quote it:
I have the same problem in Keras version: 2.3.0 and in my case, this behaviour can be fixed by using tf.keras.models.load_model instead of direct load_model. I also change every import statement from 'keras' to 'tensorflow.keras' to avoid crash between old keras and new tensorflow.keras. hope it helps, cheers.

tensorflow.keras.layers.RNN: TypeError: cannot unpack non-iterable RNN object

This error was not happening with Tensorflow 1.0
cell_1 = tgcnCell(gru_units, adj, num_nodes=num_nodes)
cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([cell_1], state_is_tuple=True)
outputs, states = keras.layers.RNN(cell, return_sequences=True,
return_state=True)
Whole Code:
# -*- coding: utf-8 -*-
import pickle as pkl
import tensorflow as tf
import pandas as pd
import numpy as np
import math
import os
import numpy.linalg as la
from tensorflow import keras
from tensorflow.keras import layers
#from tensorflow.contrib.rnn import RNNCell
from tensorflow.compat.v1.nn.rnn_cell import RNNCell
import scipy.sparse as sp
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Layer
from sklearn.metrics import mean_squared_error, mean_absolute_error
import time
from keras import backend
def normalized_adj(adj):
adj = sp.coo_matrix(adj)
rowsum = np.array(adj.sum(1))
d_inv_sqrt = np.power(rowsum, -0.5).flatten()
d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
normalized_adj = adj.dot(d_mat_inv_sqrt).transpose().dot(
d_mat_inv_sqrt).tocoo()
normalized_adj = normalized_adj.astype(np.float32)
return normalized_adj
def sparse_to_tuple(mx):
mx = mx.tocoo()
coords = np.vstack((mx.row, mx.col)).transpose()
L = tf.SparseTensor(coords, mx.data, mx.shape)
return tf.sparse.reorder(L)
def calculate_laplacian(adj, lambda_max=1):
adj = normalized_adj(adj + sp.eye(adj.shape[0]))
adj = sp.csr_matrix(adj)
adj = adj.astype(np.float32)
return sparse_to_tuple(adj)
def weight_variable_glorot(input_dim, output_dim, name=""):
init_range = np.sqrt(6.0 / (input_dim + output_dim))
initial = tf.random.uniform([input_dim, output_dim], minval=-init_range,
maxval=init_range, dtype=tf.float32)
return tf.Variable(initial, name=name)
def load_los_data(dataset):
# los_adj = pd.read_csv(r'/content/drive/MyDrive/tgcn/data/adj_matrix_clustered.csv',header=None)
los_adj = pd.read_csv(r'adj_matrix_clustered.csv', header=None)
adj = np.mat(los_adj)
# los_tf = pd.read_csv(r'/content/drive/MyDrive/tgcn/data/Cluster_series_date.csv', parse_dates=['5 Minutes'], index_col='5 Minutes')
los_tf = pd.read_csv(r'Cluster_series_date.csv', parse_dates=[
'5 Minutes'], index_col='5 Minutes')
return los_tf, adj
def preprocess_data(data, time_len, rate, seq_len, pre_len):
train_size = int(time_len * rate)
train_data = data[0:train_size]
test_data = data[train_size:time_len]
trainX, trainY, testX, testY = [], [], [], []
for i in range(len(train_data) - seq_len - pre_len):
a = train_data[i: i + seq_len + pre_len]
trainX.append(a[0: seq_len])
trainY.append(a[seq_len: seq_len + pre_len])
for i in range(len(test_data) - seq_len - pre_len):
b = test_data[i: i + seq_len + pre_len]
testX.append(b[0: seq_len])
testY.append(b[seq_len: seq_len + pre_len])
trainX1 = np.array(trainX)
trainY1 = np.array(trainY)
testX1 = np.array(testX)
testY1 = np.array(testY)
return trainX1, trainY1, testX1, testY1
def _concat(prefix, suffix, static=False):
"""Concat that enables int, Tensor, or TensorShape values.
This function takes a size specification, which can be an integer, a
TensorShape, or a Tensor, and converts it into a concatenated Tensor
(if static = False) or a list of integers (if static = True).
Args:
prefix: The prefix; usually the batch size (and/or time step size).
(TensorShape, int, or Tensor.)
suffix: TensorShape, int, or Tensor.
static: If `True`, return a python list with possibly unknown dimensions.
Otherwise return a `Tensor`.
Returns:
shape: the concatenation of prefix and suffix.
Raises:
ValueError: if `suffix` is not a scalar or vector (or TensorShape).
ValueError: if prefix or suffix was `None` and asked for dynamic
Tensors out.
"""
if isinstance(prefix, tf.Tensor):
p = prefix
p_static = tf.get_static_value(prefix)
if p.shape.ndims == 0:
p = tf.compat.v1.expand_dims(p, 0)
elif p.shape.ndims != 1:
raise ValueError(
"Prefix tensor must be either a scalar or vector, "
f"but received tensor: {p}")
else:
p = tf.TensorShape(prefix)
p_static = p.as_list() if p.ndims is not None else None
p = (
tf.constant(p.as_list(), dtype=tf.int32)
if p.is_fully_defined() else None)
if isinstance(suffix, tf.Tensor):
s = suffix
s_static = tf.get_static_value(suffix)
if s.shape.ndims == 0:
s = tf.compat.v1.expand_dims(s, 0)
elif s.shape.ndims != 1:
raise ValueError("suffix tensor must be either a scalar or vector, "
f"but received tensor: {s}")
else:
s = tf.TensorShape(suffix)
s_static = s.as_list() if s.ndims is not None else None
s = (
tf.constant(s.as_list(), dtype=tf.int32)
if s.is_fully_defined() else None)
if static:
shape = tf.TensorShape(p_static).concatenate(s_static)
shape = shape.as_list() if shape.ndims is not None else None
else:
if p is None or s is None:
raise ValueError(
"Prefix or suffix can't be None. "
f"Received prefix = {prefix} and suffix = {suffix}")
shape = tf.concat((p, s), 0)
return shape
def _zero_state_tensors(state_size, batch_size, dtype):
"""Create tensors of zeros based on state_size, batch_size, and dtype."""
def get_state_shape(s):
"""Combine s with batch_size to get a proper tensor shape."""
c = _concat(batch_size, s)
size = tf.zeros(c, dtype=dtype)
if not tf.executing_eagerly():
c_static = _concat(batch_size, s, static=True)
size.set_shape(c_static)
return size
return tf.nest.map_structure(get_state_shape, state_size)
class tgcnCell(RNNCell):
"""Temporal Graph Convolutional Network """
def call(self, inputs, **kwargs):
pass
def __init__(self, num_units, adj, num_nodes, input_size=None,
act=tf.nn.tanh, reuse=None):
# self.state_size = num_units
super(tgcnCell, self).__init__(_reuse=reuse)
self._act = act
self._nodes = num_nodes
self._units = num_units
self._adj = []
self._adj.append(calculate_laplacian(adj))
#property
def state_size(self):
return self._nodes * self._units
#property
def output_size(self):
return self._units
def __call__(self, inputs, state, scope=None):
with tf.compat.v1.variable_scope(scope or "tgcn"):
with tf.compat.v1.variable_scope("gates"):
value = tf.nn.sigmoid(
self._gc(inputs, state, 2 * self._units, bias=1.0, scope=scope))
r, u = tf.split(value=value, num_or_size_splits=2, axis=1)
with tf.compat.v1.variable_scope("candidate"):
r_state = r * state
c = self._act(self._gc(inputs, r_state,
self._units, scope=scope))
new_h = u * state + (1 - u) * c
return new_h, new_h
def _gc(self, inputs, state, output_size, bias=0.0, scope=None):
# inputs:(-1,num_nodes)
inputs = tf.expand_dims(inputs, 2)
# state:(batch,num_node,gru_units)
state = tf.reshape(state, (-1, self._nodes, self._units))
# concat
x_s = tf.concat([inputs, state], axis=2)
# input_size = x_s.get_shape()[2].value
input_size = x_s.get_shape()[2]
# (num_node,input_size,-1)
x0 = tf.transpose(x_s, perm=[1, 2, 0])
x0 = tf.reshape(x0, shape=[self._nodes, -1])
scope = tf.compat.v1.get_variable_scope()
with tf.compat.v1.variable_scope(scope):
for m in self._adj:
x1 = tf.sparse.sparse_dense_matmul(m, x0)
# print(x1)
x = tf.reshape(x1, shape=[self._nodes, input_size, -1])
x = tf.transpose(x, perm=[2, 0, 1])
x = tf.reshape(x, shape=[-1, input_size])
weights = tf.compat.v1.get_variable(
'weights', [input_size, output_size], initializer=tf.initializers.GlorotNormal()
)
# (batch_size * self._nodes, output_size)
x = tf.matmul(x, weights)
biases = tf.compat.v1.get_variable(
"biases", [output_size], initializer=tf.constant_initializer(bias, dtype=tf.float32))
x = tf.nn.bias_add(x, biases)
x = tf.reshape(x, shape=[-1, self._nodes, output_size])
x = tf.reshape(x, shape=[-1, self._nodes * output_size])
return x
time_start = time.time()
###### Settings ######
#flags = tf.app.flags
#FLAGS = tf.app.flags.FLAGS
#flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
#flags.DEFINE_integer('training_epoch', 1, 'Number of epochs to train.')
#flags.DEFINE_integer('gru_units', 64, 'hidden units of gru.')
#flags.DEFINE_integer('seq_len',12 , ' time length of inputs.')
#flags.DEFINE_integer('pre_len', 3, 'time length of prediction.')
#flags.DEFINE_float('train_rate', 0.8, 'rate of training set.')
#flags.DEFINE_integer('batch_size', 32, 'batch size.')
#flags.DEFINE_string('dataset', 'los', 'sz or los.')
#flags.DEFINE_string('model_name', 'tgcn', 'tgcn')
model_name = 'tgcn'
data_name = 'los'
train_rate = 0.8
seq_len = 12
output_dim = pre_len = 3
batch_size = 32
lr = 0.001
training_epoch = 1
gru_units = 64
###### load data ######
if data_name == 'los':
data, adj = load_los_data('los')
time_len = data.shape[0]
num_nodes = data.shape[1]
data1 = np.mat(data, dtype=np.float32)
# normalization
max_value = np.max(data1)
data1 = data1/max_value
trainX, trainY, testX, testY = preprocess_data(
data1, time_len, train_rate, seq_len, pre_len)
totalbatch = int(trainX.shape[0]/batch_size)
training_data_count = len(trainX)
def TGCN(_X, _weights, _biases):
###
cell_1 = tgcnCell(gru_units, adj, num_nodes=num_nodes)
# cell = tf.nn.rnn_cell.MultiRNNCell([cell_1], state_is_tuple=True)
cell = tf.compat.v1.nn.rnn_cell.MultiRNNCell([cell_1], state_is_tuple=True)
_X = tf.unstack(_X, axis=1)
# outputs, states = tf.compat.v1.nn.static_rnn(cell, _X, dtype=tf.float32)
output, states = keras.layers.RNN(cell, return_sequences=True,
return_state=True)
m = []
for i in outputs:
o = tf.reshape(i, shape=[-1, num_nodes, gru_units])
o = tf.reshape(o, shape=[-1, gru_units])
m.append(o)
last_output = m[-1]
output = tf.matmul(last_output, _weights['out']) + _biases['out']
output = tf.reshape(output, shape=[-1, num_nodes, pre_len])
output = tf.transpose(output, perm=[0, 2, 1])
output = tf.reshape(output, shape=[-1, num_nodes])
return output, m, states
tf.compat.v1.disable_eager_execution()
###### placeholders ######
inputs = tf.compat.v1.placeholder(tf.float32, shape=[None, seq_len, num_nodes])
labels = tf.compat.v1.placeholder(tf.float32, shape=[None, pre_len, num_nodes])
#inputs = tf.keras.Input(type_spec=tf.TensorSpec(shape=[None, seq_len, num_nodes], dtype=tf.float32, name=None))
#labels = tf.keras.Input(type_spec=tf.TensorSpec(shape=[None, pre_len, num_nodes], dtype=tf.float32, name=None))
# inputs = tf.keras.Input(shape=(None, seq_len, num_nodes)) #this is alternative to using placeholder but gives error about shape of state with it
#labels = tf.keras.Input(shape=(None, pre_len, num_nodes))
# Graph weights
weights = {
'out': tf.Variable(tf.random.normal([gru_units, pre_len], mean=1.0), name='weight_o')}
biases = {
'out': tf.Variable(tf.random.normal([pre_len]), name='bias_o')}
if model_name == 'tgcn':
pred, ttts, ttto = TGCN(inputs, weights, biases)
y_pred = pred
###### optimizer ######
lambda_loss = 0.0015
Lreg = lambda_loss * sum(tf.nn.l2_loss(tf_var)
for tf_var in tf.trainable_variables())
label = tf.reshape(labels, [-1, num_nodes])
# loss
loss = tf.reduce_mean(tf.nn.l2_loss(y_pred-label) + Lreg)
# rmse
error = tf.sqrt(tf.reduce_mean(tf.square(y_pred-label)))
optimizer = tf.train.AdamOptimizer(lr).minimize(loss)
###### Initialize session ######
variables = tf.global_variables()
saver = tf.train.Saver(tf.global_variables())
#sess = tf.Session()
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
sess.run(tf.global_variables_initializer())
out = 'out/%s' % (model_name)
#out = 'out/%s_%s'%(model_name,'perturbation')
path1 = '%s_%s_lr%r_batch%r_unit%r_seq%r_pre%r_epoch%r' % (
model_name, data_name, lr, batch_size, gru_units, seq_len, pre_len, training_epoch)
path = os.path.join(out, path1)
if not os.path.exists(path):
os.makedirs(path)
###### evaluation ######
def evaluation(a, b):
rmse = math.sqrt(mean_squared_error(a, b))
mae = mean_absolute_error(a, b)
F_norm = la.norm(a-b, 'fro')/la.norm(a, 'fro')
r2 = 1-((a-b)**2).sum()/((a-a.mean())**2).sum()
var = 1-(np.var(a-b))/np.var(a)
return rmse, mae, 1-F_norm, r2, var
x_axe, batch_loss, batch_rmse, batch_pred = [], [], [], []
test_loss, test_rmse, test_mae, test_acc, test_r2, test_var, test_pred = [
], [], [], [], [], [], []
for epoch in range(training_epoch):
for m in range(totalbatch):
mini_batch = trainX[m * batch_size: (m+1) * batch_size]
mini_label = trainY[m * batch_size: (m+1) * batch_size]
_, loss1, rmse1, train_output = sess.run([optimizer, loss, error, y_pred],
feed_dict={inputs: mini_batch, labels: mini_label})
batch_loss.append(loss1)
batch_rmse.append(rmse1 * max_value)
# Test completely at every epoch
loss2, rmse2, test_output = sess.run([loss, error, y_pred],
feed_dict={inputs: testX, labels: testY})
test_label = np.reshape(testY, [-1, num_nodes])
rmse, mae, acc, r2_score, var_score = evaluation(test_label, test_output)
test_label1 = test_label * max_value
test_output1 = test_output * max_value
test_loss.append(loss2)
test_rmse.append(rmse * max_value)
test_mae.append(mae * max_value)
test_acc.append(acc)
test_r2.append(r2_score)
test_var.append(var_score)
test_pred.append(test_output1)
print('Iter:{}'.format(epoch),
'train_rmse:{:.4}'.format(batch_rmse[-1]),
'test_loss:{:.4}'.format(loss2),
'test_rmse:{:.4}'.format(rmse),
'test_acc:{:.4}'.format(acc))
if (epoch % 500 == 0):
saver.save(sess, path+'/model_100/TGCN_pre_%r' %
epoch, global_step=epoch)
time_end = time.time()
print(time_end-time_start, 's')
############## visualization ###############
b = int(len(batch_rmse)/totalbatch)
batch_rmse1 = [i for i in batch_rmse]
train_rmse = [(sum(batch_rmse1[i*totalbatch:(i+1)*totalbatch])/totalbatch)
for i in range(b)]
batch_loss1 = [i for i in batch_loss]
train_loss = [(sum(batch_loss1[i*totalbatch:(i+1)*totalbatch])/totalbatch)
for i in range(b)]
index = test_rmse.index(np.min(test_rmse))
test_result = test_pred[index]
var = pd.DataFrame(test_result)
var.to_csv(path+'/test_result.csv', index=False, header=False)
# plot_result(test_result,test_label1,path)
# plot_error(train_rmse,train_loss,test_rmse,test_acc,test_mae,path)
print('min_rmse:%r' % (np.min(test_rmse)),
'min_mae:%r' % (test_mae[index]),
'max_acc:%r' % (test_acc[index]),
'r2:%r' % (test_r2[index]),
'var:%r' % test_var[index])
def plot_result(test_result, test_label1, path):
# all test result visualization
fig1 = plt.figure(figsize=(7, 1.5))
a_pred = test_result[:, 0]
a_true = test_label1[:, 0]
plt.plot(a_pred, 'r-', label='prediction')
plt.plot(a_true, 'b-', label='true')
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/test_all.jpg')
plt.show()
# oneday test result visualization
fig1 = plt.figure(figsize=(7, 1.5))
a_pred = test_result[0:96, 0]
a_true = test_label1[0:96, 0]
plt.plot(a_pred, 'r-', label="prediction")
plt.plot(a_true, 'b-', label="true")
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/test_oneday.jpg')
plt.show()
def plot_error(train_rmse, train_loss, test_rmse, test_acc, test_mae, path):
###train_rmse & test_rmse
fig1 = plt.figure(figsize=(5, 3))
plt.plot(train_rmse, 'r-', label="train_rmse")
plt.plot(test_rmse, 'b-', label="test_rmse")
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/rmse.jpg')
plt.show()
#### train_loss & train_rmse
fig1 = plt.figure(figsize=(5, 3))
plt.plot(train_loss, 'b-', label='train_loss')
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/train_loss.jpg')
plt.show()
fig1 = plt.figure(figsize=(5, 3))
plt.plot(train_rmse, 'b-', label='train_rmse')
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/train_rmse.jpg')
plt.show()
# accuracy
fig1 = plt.figure(figsize=(5, 3))
plt.plot(test_acc, 'b-', label="test_acc")
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/test_acc.jpg')
plt.show()
# rmse
fig1 = plt.figure(figsize=(5, 3))
plt.plot(test_rmse, 'b-', label="test_rmse")
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/test_rmse.jpg')
plt.show()
# mae
fig1 = plt.figure(figsize=(5, 3))
plt.plot(test_mae, 'b-', label="test_mae")
plt.legend(loc='best', fontsize=10)
plt.savefig(path+'/test_mae.jpg')
plt.show()
Error it gives:
Traceback (most recent call last):
File "C:\Users\shiva\Desktop\f\ml_js\script.py", line 322, in <module>
pred, ttts, ttto = TGCN(inputs, weights, biases)
File "C:\Users\shiva\Desktop\f\ml_js\script.py", line 288, in TGCN
output, states = keras.layers.RNN(cell, return_sequences=True,
TypeError: cannot unpack non-iterable RNN object
And I know that this is happening because the result of RNN call is an object which is not iterable. For some reason, this error started happening in Tensorflow 2.0
I need the output and state from the RNN call

Decoder targets required for RNN inference

I have been trying to run some experiments using the deepfix tool (https://bitbucket.org/iiscseal/deepfix) which is a seq2seq model for correcting common programming errors.
I made changes to the code so that it is compatible to TF-1.12, as the original code contains tensorflow.contrib.seq2seq functions which are not supported in version TF-1.12 (only in TF-1.0.x).
The main changes were in the seq2seq_model defined in neural_net/train.py.
Below is the changed code. I'm new to the tensorflow RNN, and coded the decoder part using help from online codes.
class seq2seq_model():
PAD = 0
EOS = 1
def __init__(self, vocab_size, embedding_size, max_output_seq_len,
cell_type='LSTM', memory_dim=300, num_layers=4, dropout=0.2,
attention=True,
scope=None,
verbose=False):
assert 0 <= dropout and dropout <= 1, '0 <= dropout <= 1, you passed dropout={}'.format(
dropout)
tf.set_random_seed(1189)
self.attention = attention
self.max_output_seq_len = max_output_seq_len
self.memory_dim = memory_dim
self.num_layers = num_layers
self.dropout = dropout
self.scope = scope
if dropout != 0:
self.keep_prob = tf.placeholder(tf.float32)
else:
self.keep_prob = None
self.vocab_size = vocab_size
self.embedding_size = embedding_size
self.encoder_cell = _new_RNN_cell(
memory_dim, num_layers, cell_type, dropout, self.keep_prob)
self.decoder_cell = _new_RNN_cell(
memory_dim, num_layers, cell_type, dropout, self.keep_prob)
self._make_graph()
if self.scope is not None:
saver_vars = [var for var in tf.global_variables(
) if var.name.startswith(self.scope)]
else:
saver_vars = tf.global_variables()
if verbose:
print 'root-scope:', self.scope
print "\n\nDiscovered %d saver variables." % len(saver_vars)
for each in saver_vars:
print each.name
self.saver = tf.train.Saver(saver_vars, max_to_keep=5)
#property
def decoder_hidden_units(self):
return self.memory_dim
def _make_graph(self):
self._init_placeholders()
self._init_decoder_train_connectors()
self._init_embeddings()
self._init_simple_encoder()
self._init_decoder()
self._init_optimizer()
def _init_placeholders(self):
""" Everything is time-major """
self.encoder_inputs = tf.placeholder(
shape=(None, None),
dtype=tf.int32,
name='encoder_inputs',
)
self.encoder_inputs_length = tf.placeholder(
shape=(None,),
dtype=tf.int32,
name='encoder_inputs_length',
)
self.decoder_targets = tf.placeholder(
shape=(None, None),
dtype=tf.int32,
name='decoder_targets'
)
self.decoder_targets_length = tf.placeholder(
shape=(None,),
dtype=tf.int32,
name='decoder_targets_length',
)
def _init_decoder_train_connectors(self):
with tf.name_scope('decoderTrainFeeds'):
sequence_size, batch_size = tf.unstack(
tf.shape(self.decoder_targets), name='decoder_targets_shape')
EOS_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * self.EOS
PAD_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * self.PAD
self.decoder_train_inputs = tf.concat(
[EOS_SLICE, self.decoder_targets], axis=0, name="decoder_train_inputs")
self.decoder_train_length = self.decoder_targets_length + 1
decoder_train_targets = tf.concat(
[self.decoder_targets, PAD_SLICE], axis=0)
decoder_train_targets_seq_len, _ = tf.unstack(
tf.shape(decoder_train_targets))
decoder_train_targets_eos_mask = tf.one_hot(self.decoder_train_length - 1,
decoder_train_targets_seq_len,
on_value=self.EOS, off_value=self.PAD,
dtype=tf.int32)
decoder_train_targets_eos_mask = tf.transpose(
decoder_train_targets_eos_mask, [1, 0])
decoder_train_targets = tf.add(decoder_train_targets,
decoder_train_targets_eos_mask, name="decoder_train_targets")
self.decoder_train_targets = decoder_train_targets
self.loss_weights = tf.ones([
batch_size,
tf.reduce_max(self.decoder_train_length)
], dtype=tf.float32, name="loss_weights")
def _init_embeddings(self):
with tf.variable_scope("embedding") as scope:
sqrt3 = math.sqrt(3)
initializer = tf.random_uniform_initializer(-sqrt3, sqrt3)
self.embedding_matrix = tf.get_variable(
name="embedding_matrix",
shape=[self.vocab_size, self.embedding_size],
initializer=initializer,
dtype=tf.float32)
self.encoder_inputs_embedded = tf.nn.embedding_lookup(
self.embedding_matrix, self.encoder_inputs,
name="encoder_inputs_embedded")
self.decoder_train_inputs_embedded = tf.nn.embedding_lookup(
self.embedding_matrix, self.decoder_train_inputs,
name="decoder_train_inputs_embedded")
def _init_simple_encoder(self):
with tf.variable_scope("Encoder") as scope:
(self.encoder_outputs, self.encoder_state) = (
tf.nn.dynamic_rnn(cell=self.encoder_cell,
inputs=self.encoder_inputs_embedded,
sequence_length=self.encoder_inputs_length,
time_major=True,
dtype=tf.float32)
)
def _init_decoder(self):
with tf.variable_scope("decoder") as scope:
# def output_fn(outputs):
# return tf.contrib.layers.fully_connected(outputs, self.vocab_size, scope=scope,
# name = "output_fn")
sequence_size, batch_size = tf.unstack(
tf.shape(self.decoder_targets), name='decoder_targets_shape')
train_helper = seq2seq.TrainingHelper(
inputs=self.decoder_train_inputs_embedded,
sequence_length=self.decoder_train_length,
time_major=True,
name="train_helper")
pred_helper = seq2seq.SampleEmbeddingHelper(
embedding=self.embedding_matrix,
start_tokens=tf.ones([batch_size], dtype=tf.int32) * self.EOS,
end_token=self.EOS)
# name="pred_helper")
def _decode(helper, scope, reuse=None):
with tf.variable_scope(scope, reuse=reuse):
attention_states = tf.transpose(
self.encoder_outputs, [1, 0, 2])
attention_mechanism = seq2seq.BahdanauAttention(
num_units=self.decoder_hidden_units, memory=attention_states,
name="attention_mechanism")
attention_cell = seq2seq.AttentionWrapper(
self.decoder_cell, attention_mechanism,
name="atttention_wrapper")
out_cell = tf.contrib.rnn.OutputProjectionWrapper(
attention_cell, self.vocab_size, reuse=reuse)
# name="output_cell")
decoder = seq2seq.BasicDecoder(
cell=out_cell, helper=helper,
initial_state=out_cell.zero_state(
dtype=tf.float32, batch_size=batch_size))
# name="decoder")
outputs = seq2seq.dynamic_decode(
decoder=decoder, output_time_major=True,
impute_finished=True)
# name="outputs")
return outputs
(self.decoder_logits_train, self.decoder_state_train, _) = _decode(train_helper, "decoder")
(self.decoder_logits_inference, self.decoder_state_inference, _) = _decode(pred_helper, "decoder", reuse=True)
self.decoder_logits_train = self.decoder_logits_train.rnn_output
self.decoder_logits_inference = self.decoder_logits_inference.rnn_output
# self.decoder_logits_train = output_fn(self.decoder_outputs_train)
self.decoder_prediction_train = tf.argmax(
self.decoder_logits_train, axis=-1, name='decoder_prediction_train')
scope.reuse_variables()
self.decoder_prediction_inference = tf.argmax(self.decoder_logits_inference, axis=-1,
name='decoder_prediction_inference')
def _init_optimizer(self):
logits = tf.transpose(self.decoder_logits_train, [1, 0, 2])
targets = tf.transpose(self.decoder_train_targets, [1, 0])
self.loss = seq2seq.sequence_loss(logits=logits, targets=targets,
weights=self.loss_weights)
self.optimizer = tf.train.AdamOptimizer()
gvs = self.optimizer.compute_gradients(self.loss)
def ClipIfNotNone(grad):
if grad is None:
return grad
return tf.clip_by_value(grad, -1., 1)
# capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
capped_gvs = [(ClipIfNotNone(grad), var) for grad, var in gvs]
self.train_op = self.optimizer.apply_gradients(capped_gvs)
def make_feed_dict(self, x, x_len, y, y_len):
feed_dict = {
self.encoder_inputs: x,
self.encoder_inputs_length: x_len,
self.decoder_targets: y,
self.decoder_targets_length: y_len,
}
if self.dropout != 0:
feed_dict.update({self.keep_prob: 1.0 - self.dropout})
return feed_dict
def load_parameters(self, sess, filename):
self.saver.restore(sess, filename)
def save_parameters(self, sess, filename, global_step=None):
self.saver.save(sess, filename, global_step=global_step)
def train_step(self, session, x, x_len, y, y_len):
feed_dict = self.make_feed_dict(x, x_len, y, y_len)
_, loss = session.run([self.train_op, self.loss], feed_dict)
return loss
def validate_step(self, session, x, x_len, y, y_len):
feed_dict = self.make_feed_dict(x, x_len, y, y_len)
loss, decoder_prediction, decoder_train_targets = session.run([self.loss,
self.decoder_prediction_inference,
self.decoder_train_targets], feed_dict)
return loss, np.array(decoder_prediction).T, np.array(decoder_train_targets).T
def sample(self, session, X, X_len):
feed_dict = {self.encoder_inputs: X,
self.encoder_inputs_length: X_len}
if self.dropout != 0:
feed_dict.update({self.keep_prob: 1.0})
decoder_prediction = session.run(
self.decoder_prediction_inference, feed_dict)
return np.array(decoder_prediction).T
I am having some problems with this code:
Main problem - The seq2seq.train_step() and seq2seq.validate_step() functions are working, but when I use seq2seq.sample() for actually making inferences, I get an error that asks me to feed a value for decoder_targets. This is an unexpected behaviour as the SampleEmbeddingHelper function is used for inference which does not require decoder_targets. The error:
InvalidArgumentError (see above for traceback): You must feed a value
for placeholder tensor 'ids/decoder_targets' with dtype int32 and
shape [?,?] [[node ids/decoder_targets (defined at
.../code/neural_net/train.py:241) = Placeholderdtype=DT_INT32,
shape=[?,?],
_device="/job:localhost/replica:0/task:0/device:CPU:0"]]
When I try to use the GreedyEmbeddingHelper instead of SampleEmbeddingHelper, and then run decoder_logits_inference op, the machine hangs and runs out of memory after some time. Although SampleEmbeddingHelper works fine.
Well, SampleEmbeddingHelper does need decoder targets, since it mixes part of GreedyEmbeddingHelper(infer mode) and tf.contrib.seq2seq.TrainingHelper(teacher forcing). I think you just need to use GreedyEmbeddingHelper.
Since in the beginning, the parameters are totally random (if not pre-trained).
Maybe you have seen that the results of the first few loops of seq2seq model are totally messed up.
So if you use GreedyEmbeddingHelper, which outputs a result based on the previous one, and of course no one teaches it "where to stop", so it usually goes infinitely until your memory runs out. To solve this, you need to set an upper limit for the length of sentence in tf.contrib.seq2seq.dynamic_decode.
The argument is maximum_iterations. as shown in
tf.contrib.seq2seq.dynamic_decode

AttributeError: 'Tensor' object has no attribute '_keras_history' during implementing co-attention layer

everybody. I'm trying to custom a co attention layer for a matching task. And there is an error confused me a lot.
model = Model(inputs=[ans_input, ques_input], outputs=output)
my program shutdown while running the code above. then it will throw
an error
AttributeError: 'Tensor' object has no attribute '_keras_history'
it means that my model cannot be a complete graph I guess. so I have tried lots of methods which I found at StackOverflow and other blogs. But all of these cannot work. :(
I will paste my model below. Thank you for helping me :)
import time
from keras.layers import Embedding, LSTM, TimeDistributed, Lambda
from keras.layers.core import *
from keras.layers.merge import concatenate
from keras.layers.pooling import GlobalMaxPooling1D
from keras.models import *
from keras.optimizers import *
from dialog.keras_lstm.k_call import *
from dialog.model.keras_himodel import ZeroMaskedEntries, logger
class Co_AttLayer(Layer):
def __init__(self, **kwargs):
# self.input_spec = [InputSpec(ndim=3)]
super(Co_AttLayer, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) == 2
assert len(input_shape[0]) == len(input_shape[1])
super(Co_AttLayer, self).build(input_shape)
def cosine_sim(self, x):
ans_ss = K.sum(K.square(x[0]), axis=2, keepdims=True)
ans_norm = K.sqrt(K.maximum(ans_ss, K.epsilon()))
ques_ss = K.sum(K.square(x[1]), axis=2, keepdims=True)
ques_norm = K.sqrt(K.maximum(ques_ss, K.epsilon()))
tr_ques_norm = K.permute_dimensions(ques_norm, (0, 2, 1))
tr_ques = K.permute_dimensions(x[1], (0, 2, 1))
ss = K.batch_dot(x[0], tr_ques, axes=[2, 1])
den = K.batch_dot(ans_norm, tr_ques_norm, axes=[2, 1])
return ss / den
def call(self, x, mask=None):
cosine = Lambda(self.cosine_sim)(x)
coqWij = K.softmax(cosine)
print(x[1].shape, coqWij.shape)
ai = K.dot(coqWij, x[1]) # (N A Q) (N Q L)
coaWij = K.softmax(K.permute_dimensions(cosine, (0, 2, 1)))
qj = K.dot(coaWij, x[0])
print(qj.shape, ai.shape)
return concatenate([ai, qj], axis=2)
def compute_output_shape(self, input_shape):
return input_shape
def build_QAmatch_model(opts, vocab_size=0, maxlen=300, embedd_dim=50, init_mean_value=None):
ans_input = Input(shape=(maxlen,), dtype='int32', name='ans_input')
ques_input = Input(shape=(maxlen,), dtype='int32', name='ques_input')
embedding = Embedding(output_dim=embedd_dim, input_dim=vocab_size, input_length=maxlen,
mask_zero=True, name='embedding')
dropout = Dropout(opts.dropout, name='dropout')
lstm = LSTM(opts.lstm_units, return_sequences=True, name='lstm')
hidden_layer = Dense(units=opts.hidden_units, name='hidden_layer')
output_layer = Dense(units=1, name='output_layer')
zme = ZeroMaskedEntries(name='maskedout')
ans_maskedout = zme(embedding(ans_input))
ques_maskedout = zme(embedding(ques_input))
ans_lstm = lstm(dropout(ans_maskedout)) # (A V)
ques_lstm = lstm(dropout(ques_maskedout)) # (Q V)
co_att = Co_AttLayer()([ans_lstm, ques_lstm])
def slice(x, index):
return x[:, :, index, :]
ans_att = Lambda(slice, output_shape=(maxlen, embedd_dim), arguments={'index': 0})(co_att)
ques_att = Lambda(slice, output_shape=(maxlen, embedd_dim), arguments={'index': 1})(co_att)
merged_ques = concatenate([ques_lstm, ques_att, ques_maskedout], axis=2)
merged_ans = concatenate([ans_lstm, ans_att, ans_maskedout], axis=2)
ans_vec = GlobalMaxPooling1D(name='ans_pooling')(merged_ans)
ques_vec = GlobalMaxPooling1D(name='ques_pooling')(merged_ques)
ans_hid = hidden_layer(ans_vec)
ques_hid = hidden_layer(ques_vec)
merged_hid = concatenate([ans_hid, ques_hid], axis=-1)
merged_all = concatenate([merged_hid, ans_hid + ques_hid, ans_hid - ques_hid, K.abs(ans_hid - ques_hid)], axis=-1)
output = output_layer(merged_all)
model = Model(inputs=[ans_input, ques_input], outputs=output)
if init_mean_value:
logger.info("Initialise output layer bias with log(y_mean/1-y_mean)")
bias_value = (np.log(init_mean_value) - np.log(1 - init_mean_value)).astype(K.floatx())
model.layers[-1].b.set_value(bias_value)
if verbose:
model.summary()
start_time = time.time()
model.compile(loss='mse', optimizer='rmsprop')
total_time = time.time() - start_time
logger.info("Model compiled in %.4f s" % total_time)
return model
I can't reproduce your code, but I presume the error happens here:
merged_all = concatenate([merged_hid, ans_hid + ques_hid, ans_hid - ques_hid,
K.abs(ans_hid - ques_hid)], axis=-1)
The backend operations +, - and K.abs are not wrapped within a Lambda layer, so the resulting tensors are not Keras tensors and therefore they lack some attributes such as _keras_history. You could wrap them as follows:
l1 = Lambda(lambda x: x[0] + x[1])([ans_hid, ques_hid])
l2 = Lambda(lambda x: x[0] - x[1])([ans_hid, ques_hid])
l3 = Lambda(lambda x: K.abs(x[0] - x[1]))([ans_hid, ques_hid])
merged_all = concatenate([merged_hid, l1, l2, l3], axis=-1)
NOTE: Not tested.

Categories

Resources