Tensorflow : Trainable variable not getting learnt - python

I am trying to implement a custom modified ReLU in Tensorflow 1, in which I use two learnable parameters. But the parameters are not getting learnt even after running 1000 training steps, as suggested by printing their values before and after training. I have observed that inside the function, when I do not split x, i.e. execute the commented lines, then the coefficients are learnt. Could anyone suggest why splitting the input results in the trainable coefficients not being learnt and how this can be resolved?
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
def weight_variable(shape,vari_name):
initial = tf.truncated_normal(shape, stddev=0.1,dtype=tf.float32)
return tf.Variable(initial,name = vari_name)
def init_Prelu_coefficient(var1, var2):
coeff = tf.truncated_normal(([1]), stddev=0.1,dtype=tf.float32)
coeff1 = tf.truncated_normal(([1]), stddev=0.1,dtype=tf.float32)
return tf.Variable(coeff, trainable=True, name=var1), tf.Variable(coeff1, trainable=True, name=var2)
def Prelu(x, coeff, coeff1):
s = int(x.shape[-1])
sop = x[:,:,:,:s//2]*coeff+x[:,:,:,s//2:]*coeff1
sop1 = x[:,:,:,:s//2]*coeff-x[:,:,:,s//2:]*coeff1
copied_variable = tf.concat([sop, sop1], axis=-1)
copied_variable = tf.math.maximum(copied_variable,0)/copied_variable
# copied_variable = tf.identity(x)
# copied_variable = tf.math.maximum(copied_variable*coeff+copied_variable*coeff1,0)/copied_variable
# copied_variable = tf.multiply(copied_variable,x)
return copied_variable
def conv2d_dilate(x, W, dilate_rate):
return tf.nn.convolution(x, W,padding='VALID',dilation_rate = [1,dilate_rate])
matr = np.random.rand(1, 60, 40, 8)
target = np.random.rand(1, 58, 36, 8)
def learning(sess):
# define placeholder for inputs to network
Input = tf.placeholder(tf.float32, [1, 60, 40, 8])
input_Target = tf.placeholder(tf.float32, [1, 58, 36, 8])
kernel = weight_variable([3, 3, 8, 8],'G1')
coeff, coeff1 = init_Prelu_coefficient('alpha', 'alpha1')
conv = Prelu(conv2d_dilate(Input, kernel , 2), coeff, coeff1)
error_norm = 1*tf.norm(input_Target - conv)
print("MOMENTUM LEARNING")
train_step = tf.train.MomentumOptimizer(learning_rate=0.001,momentum=0.9,use_nesterov=False).minimize(error_norm)
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)
print("INIT coefficient ", sess.run(coeff), sess.run(coeff1))
init_var = tf.trainable_variables()
error_prev = 1 # initial error, we set 1 and it began to decrease.
for i in range(1000):
sess.run(train_step, feed_dict={Input: matr, input_Target: target})
if i % 100 == 0:
error_now=sess.run(error_norm,feed_dict={Input : matr, input_Target: target})
print('The',i,'th iteration gives an error',error_now)
error = sess.run(error_norm,feed_dict={Input: matr, input_Target: target})
print(sess.run(kernel))
print("LEARNT coefficient ", sess.run(coeff), sess.run(coeff1))
sess = tf.Session()
learning(sess)

Related

How to solve this Error? ValueError: Cannot feed value of shape (1, 360, 480, 3, 1) for Tensor Placeholder_1:0, which has shape (1, 360, 480, 1)

I need your help in my segnet.
I cloned this repository in git.
https://github.com/tkuanlun350/Tensorflow-SegNet
And I changed dataset to my custom data consisting of 3 classes including the background.
But This error occurred when I executed the test command.
ValueError: Cannot feed value of shape (1, 360, 480, 3, 1) for Tensor Placeholder_1:0, which has shape (1, 360, 480, 1)
How to solve this problem?
I'm not good at tensorflow. Please Help me...
Here is my code.
python 3.7
tensorflow 2.0
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
import os, sys
import numpy as np
import math
from datetime import datetime
import time
from PIL import Image
from math import ceil
from tensorflow.python.ops import gen_nn_ops
# modules
from Utils import _variable_with_weight_decay, _variable_on_cpu, _add_loss_summaries, _activation_summary, print_hist_summery, get_hist, per_class_acc, writeImage
from Inputs import *
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
os.environ["CUDA_VISIBLE_DEVICES"]="0, 1, 2, 3"
# gpus = tf.config.experimental.list_physical_devices('GPU')
gpus = tf.config.list_physical_devices('GPU')
for i in range(len(gpus)):
tf.config.experimental.set_memory_growth(gpus[i], True)
# Constants describing the training process.
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.001 # Initial learning rate.
EVAL_BATCH_SIZE = 8
BATCH_SIZE = 8
# for CamVid
IMAGE_HEIGHT = 360
IMAGE_WIDTH = 480
IMAGE_DEPTH = 3
NUM_CLASSES = 3
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 367
NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 101
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 1
TEST_ITER = NUM_EXAMPLES_PER_EPOCH_FOR_TEST / BATCH_SIZE
def msra_initializer(kl, dl):
"""
kl for kernel size, dl for filter number
"""
stddev = math.sqrt(2. / (kl**2 * dl))
return tf.truncated_normal_initializer(stddev=stddev)
def orthogonal_initializer(scale = 1.1):
''' From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
'''
def _initializer(shape, dtype=tf.float32, partition_info=None):
flat_shape = (shape[0], np.prod(shape[1:]))
a = np.random.normal(0.0, 1.0, flat_shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
# pick the one with the correct shape
q = u if u.shape == flat_shape else v
q = q.reshape(shape) #this needs to be corrected to float32
return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32)
return _initializer
def loss(logits, labels):
"""
loss func without re-weighting
"""
# Calculate the average cross entropy loss across the batch.
logits = tf.reshape(logits, (-1,NUM_CLASSES))
labels = tf.reshape(labels, [-1])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def weighted_loss(logits, labels, num_classes, head=None):
""" median-frequency re-weighting """
with tf.name_scope('loss'):
logits = tf.reshape(logits, (-1, num_classes))
epsilon = tf.constant(value=1e-10)
logits = logits + epsilon
# consturct one-hot label array
label_flat = tf.reshape(labels, (-1, 1))
# should be [batch ,num_classes]
labels = tf.reshape(tf.one_hot(label_flat, depth=num_classes), (-1, num_classes))
softmax = tf.nn.softmax(logits)
cross_entropy = -tf.reduce_sum(tf.multiply(labels * tf.log(softmax + epsilon), head), axis=[1])
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
return loss
def cal_loss(logits, labels):
loss_weight = np.array([
0.2595,
0.3826,
1.0974]) # class 0~11
labels = tf.cast(labels, tf.int32)
# return loss(logits, labels)
return weighted_loss(logits, labels, num_classes=NUM_CLASSES, head=loss_weight)
def conv_layer_with_bn(inputT, shape, train_phase, activation=True, name=None):
in_channel = shape[2]
out_channel = shape[3]
k_size = shape[0]
with tf.variable_scope(name) as scope:
kernel = _variable_with_weight_decay('ort_weights', shape=shape, initializer=orthogonal_initializer(), wd=None)
conv = tf.nn.conv2d(inputT, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [out_channel], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
if activation is True:
conv_out = tf.nn.relu(batch_norm_layer(bias, train_phase, scope.name))
else:
conv_out = batch_norm_layer(bias, train_phase, scope.name)
return conv_out
def get_deconv_filter(f_shape):
"""
reference: https://github.com/MarvinTeichmann/tensorflow-fcn
"""
width = f_shape[0]
heigh = f_shape[0]
f = ceil(width/2.0)
c = (2 * f - 1 - f % 2) / (2.0 * f)
bilinear = np.zeros([f_shape[0], f_shape[1]])
for x in range(width):
for y in range(heigh):
value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
bilinear[x, y] = value
weights = np.zeros(f_shape)
for i in range(f_shape[2]):
weights[:, :, i, i] = bilinear
init = tf.constant_initializer(value=weights,
dtype=tf.float32)
return tf.get_variable(name="up_filter", initializer=init,
shape=weights.shape)
def deconv_layer(inputT, f_shape, output_shape, stride=2, name=None):
# output_shape = [b, w, h, c]
# sess_temp = tf.InteractiveSession()
sess_temp = tf.global_variables_initializer()
strides = [1, stride, stride, 1]
with tf.variable_scope(name):
weights = get_deconv_filter(f_shape)
deconv = tf.nn.conv2d_transpose(inputT, weights, output_shape,
strides=strides, padding='SAME')
return deconv
def batch_norm_layer(inputT, is_training, scope):
return tf.cond(is_training,
lambda: tf.keras.layers.BatchNormalization(center=False)(inputT),
lambda: tf.keras.layers.BatchNormalization(center=False)(inputT))
def inference(images, labels, batch_size, phase_train):
# norm1
norm1 = tf.nn.lrn(images, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75,
name='norm1')
# conv1
conv1 = conv_layer_with_bn(norm1, [7, 7, images.get_shape().as_list()[3], 64], phase_train, name="conv1")
print(conv1.shape)
# pool1
pool1, pool1_indices = tf.nn.max_pool_with_argmax(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
# conv2
conv2 = conv_layer_with_bn(pool1, [7, 7, 64, 64], phase_train, name="conv2")
# pool2
pool2, pool2_indices = tf.nn.max_pool_with_argmax(conv2, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# conv3
conv3 = conv_layer_with_bn(pool2, [7, 7, 64, 64], phase_train, name="conv3")
# pool3
pool3, pool3_indices = tf.nn.max_pool_with_argmax(conv3, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool3')
# conv4
conv4 = conv_layer_with_bn(pool3, [7, 7, 64, 64], phase_train, name="conv4")
# pool4
pool4, pool4_indices = tf.nn.max_pool_with_argmax(conv4, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool4')
""" End of encoder """
""" start upsample """
# upsample4
# Need to change when using different dataset out_w, out_h
# upsample4 = upsample_with_pool_indices(pool4, pool4_indices, pool4.get_shape(), out_w=45, out_h=60, scale=2, name='upsample4')
upsample4 = deconv_layer(pool4, [2, 2, 64, 64], [batch_size, 45, 60, 64], 2, "up4")
# decode 4
conv_decode4 = conv_layer_with_bn(upsample4, [7, 7, 64, 64], phase_train, False, name="conv_decode4")
# upsample 3
# upsample3 = upsample_with_pool_indices(conv_decode4, pool3_indices, conv_decode4.get_shape(), scale=2, name='upsample3')
upsample3= deconv_layer(conv_decode4, [2, 2, 64, 64], [batch_size, 90, 120, 64], 2, "up3")
# decode 3
conv_decode3 = conv_layer_with_bn(upsample3, [7, 7, 64, 64], phase_train, False, name="conv_decode3")
# upsample2
# upsample2 = upsample_with_pool_indices(conv_decode3, pool2_indices, conv_decode3.get_shape(), scale=2, name='upsample2')
upsample2= deconv_layer(conv_decode3, [2, 2, 64, 64], [batch_size, 180, 240, 64], 2, "up2")
# decode 2
conv_decode2 = conv_layer_with_bn(upsample2, [7, 7, 64, 64], phase_train, False, name="conv_decode2")
# upsample1
# upsample1 = upsample_with_pool_indices(conv_decode2, pool1_indices, conv_decode2.get_shape(), scale=2, name='upsample1')
upsample1= deconv_layer(conv_decode2, [2, 2, 64, 64], [batch_size, 360, 480, 64], 2, "up1")
# decode4
conv_decode1 = conv_layer_with_bn(upsample1, [7, 7, 64, 64], phase_train, False, name="conv_decode1")
""" end of Decode """
""" Start Classify """
# output predicted class number (6)
with tf.variable_scope('conv_classifier') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[1, 1, 64, NUM_CLASSES],
initializer=msra_initializer(1, 64),
wd=0.0005)
conv = tf.nn.conv2d(conv_decode1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0))
conv_classifier = tf.nn.bias_add(conv, biases, name=scope.name)
logit = conv_classifier
loss = cal_loss(conv_classifier, labels)
return loss, logit
def train(total_loss, global_step):
total_sample = 274
num_batches_per_epoch = 274/1
""" fix lr """
lr = INITIAL_LEARNING_RATE
loss_averages_op = _add_loss_summaries(total_loss)
# Compute gradients.
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.AdamOptimizer(lr)
grads = opt.compute_gradients(total_loss)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Add histograms for trainable variables.
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
# Add histograms for gradients.
for grad, var in grads:
if grad is not None:
tf.summary.histogram(var.op.name + '/gradients', grad)
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
def test(FLAGS):
max_steps = FLAGS.max_steps
batch_size = FLAGS.batch_size
train_dir = FLAGS.log_dir # /tmp3/first350/TensorFlow/Logs
# test_dir = FLAGS.test_dir # /tmp3/first350/SegNet-Tutorial/CamVid/train.txt
test_dir = '/home/ml/song/Segnet_tensorflow_3/dataset/test.txt'
# test_ckpt = FLAGS.testing
test_ckpt = "/home/ml/song/Segnet_tensorflow_3/path_to_your_log/model.ckpt-19999.meta"
image_w = FLAGS.image_w
image_h = FLAGS.image_h
image_c = FLAGS.image_c
# testing should set BATCH_SIZE = 1
batch_size = 1
image_filenames, label_filenames = get_filename_list(test_dir)
test_data_node = tf.placeholder(
tf.float32,
shape=[batch_size, image_h, image_w, image_c])
test_labels_node = tf.placeholder(tf.int64, shape=[batch_size, 360, 480, 1])
phase_train = tf.placeholder(tf.bool, name='phase_train')
loss, logits = inference(test_data_node, test_labels_node, batch_size, phase_train)
pred = tf.argmax(logits, axis=3)
# get moving avg
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.0001)
with tf.Session() as sess:
# Load checkpoint
# saver.restore(sess, "/home/ml/song/Segnet_tensorflow/path_to_your_log/model.ckpt-19999")
# saver = tf.train.import_meta_graph('/home/ml/song/Segnet_tensorflow/Logs/model.ckpt-19999.meta')
saver.restore(sess, '/home/ml/song/Segnet_tensorflow_3/path_to_your_log/model.ckpt-19999')
images, labels = get_all_test_data(image_filenames, label_filenames)
threads = tf.train.start_queue_runners(sess=sess)
hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
for image_batch, label_batch in zip(images, labels):
feed_dict = {
test_data_node: image_batch,
test_labels_node: label_batch,
phase_train: False
}
print('*'*100)
print(type(feed_dict))
print(test_data_node.shape)
print(test_labels_node.shape)
print('*'*100)
print(' -- logits')
print(' ', logits.shape, type(logits))
print(logits[0])
print(' -- pred')
print(' ', pred.shape, type(pred))
print('')
dense_prediction, im = sess.run([logits, pred], feed_dict=feed_dict)
print(dense_prediction.shape)
print(im.shape)
print('*'*100)
# output_image to verify
if (FLAGS.save_image):
writeImage(im[0], 'testing_image.png')
# writeImage(im[0], 'out_image/'+str(image_filenames[count]).split('/')[-1])
hist += get_hist(dense_prediction, label_batch)
# count+=1
acc_total = np.diag(hist).sum() / hist.sum()
iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
print("acc: ", acc_total)
print("mean IU: ", np.nanmean(iu))
# ------------------------------------------------------------------------------------------------------
from tensorflow.python.client import device_lib
def get_available_gpus():
local_device_protos = device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
# ------------------------------------------------------------------------------------------------------
def training(FLAGS, is_finetune=False):
max_steps = FLAGS.max_steps
batch_size = FLAGS.batch_size
train_dir = FLAGS.log_dir # /tmp3/first350/TensorFlow/Logs
#image_dir = FLAGS.image_dir # /tmp3/first350/SegNet-Tutorial/CamVid/train.txt
image_dir = "/home/ml/song/Segnet_tensorflow_3/dataset/train.txt"
#val_dir = FLAGS.val_dir # /tmp3/first350/SegNet-Tutorial/CamVid/val.txt
val_dir = "/home/ml/song/Segnet_tensorflow_3/dataset/val.txt"
finetune_ckpt = FLAGS.finetune
image_w = FLAGS.image_w
image_h = FLAGS.image_h
image_c = FLAGS.image_c
startstep = 0 if not is_finetune else int(FLAGS.finetune.split('-')[-1])
image_filenames, label_filenames = get_filename_list(image_dir)
val_image_filenames, val_label_filenames = get_filename_list(val_dir)
with tf.Graph().as_default():
train_data_node = tf.compat.v1.placeholder( tf.float32, shape=[batch_size, image_h, image_w, image_c])
train_labels_node = tf.compat.v1.placeholder(tf.int64, shape=[batch_size, image_h, image_w, 1])
phase_train = tf.compat.v1.placeholder(tf.bool, name='phase_train')
global_step = tf.Variable(0, trainable=False)
# For CamVid
images, labels = CamVidInputs(image_filenames, label_filenames, batch_size)
val_images, val_labels = CamVidInputs(val_image_filenames, val_label_filenames, batch_size)
# Build a Graph that computes the logits predictions from the inference model.
loss, eval_prediction = inference(train_data_node, train_labels_node, batch_size, phase_train)
# Build a Graph that trains the model with one batch of examples and updates the model parameters.
train_op = train(loss, global_step)
saver = tf.train.Saver(tf.global_variables())
summary_op = tf.summary.merge_all()
# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.0001)
with tf.Session() as sess:
# Build an initialization operation to run below.
if (is_finetune == True):
saver.restore(sess, finetune_ckpt )
else:
init = tf.global_variables_initializer()
sess.run(init)
# Start the queue runners.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# Summery placeholders
summary_writer = tf.summary.FileWriter(train_dir, sess.graph)
average_pl = tf.compat.v1.placeholder(tf.float32)
acc_pl = tf.compat.v1.placeholder(tf.float32)
iu_pl = tf.compat.v1.placeholder(tf.float32)
average_summary = tf.summary.scalar("test_average_loss", average_pl)
acc_summary = tf.summary.scalar("test_accuracy", acc_pl)
iu_summary = tf.summary.scalar("Mean_IU", iu_pl)
for step in range(startstep, startstep + max_steps):
image_batch ,label_batch = sess.run([images, labels])
# since we still use mini-batches in validation, still set bn-layer phase_train = True
feed_dict = {
train_data_node: image_batch,
train_labels_node: label_batch,
phase_train: True
}
start_time = time.time()
_, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
# eval current training batch pre-class accuracy
pred = sess.run(eval_prediction, feed_dict=feed_dict)
per_class_acc(pred, label_batch)
if step % 100 == 0:
print("start validating.....")
total_val_loss = 0.0
hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
for test_step in range(int(TEST_ITER)):
val_images_batch, val_labels_batch = sess.run([val_images, val_labels])
_val_loss, _val_pred = sess.run([loss, eval_prediction], feed_dict={
train_data_node: val_images_batch,
train_labels_node: val_labels_batch,
phase_train: True
})
total_val_loss += _val_loss
hist += get_hist(_val_pred, val_labels_batch)
print("val loss: ", total_val_loss / TEST_ITER)
acc_total = np.diag(hist).sum() / hist.sum()
iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
test_summary_str = sess.run(average_summary, feed_dict={average_pl: total_val_loss / TEST_ITER})
acc_summary_str = sess.run(acc_summary, feed_dict={acc_pl: acc_total})
iu_summary_str = sess.run(iu_summary, feed_dict={iu_pl: np.nanmean(iu)})
print_hist_summery(hist)
print(" end validating.... ")
summary_str = sess.run(summary_op, feed_dict=feed_dict)
summary_writer.add_summary(summary_str, step)
summary_writer.add_summary(test_summary_str, step)
summary_writer.add_summary(acc_summary_str, step)
summary_writer.add_summary(iu_summary_str, step)
# Save the model checkpoint periodically.
if step % 1000 == 0 or (step + 1) == max_steps:
checkpoint_path = os.path.join(train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
coord.request_stop()
coord.join(threads)

tensorflow 2, gradients are an empty list

I'm reorganizing my code to be easy to read, but when compiling it says:
ValueError: No gradients provided for any variable: ['enc_conv_4/kernel:0'.... I know my loss function is differentiable cuz the code worked before touch it, but now is missing the gradients of my model.
#tf.function
def train_disc(self,real_imgs,gen_imgs):
with tf.GradientTape() as disc_tape:
d_loss = self.wasserstein_loss(real_imgs,gen_imgs)
gradients_d = disc_tape.gradient(d_loss, self.discriminator.trainable_variables)
self.d_optimizer.apply_gradients(zip(gradients_d, self.discriminator.trainable_variables))
return d_loss
#tf.function
def train_gen(self,real_img,gen_imgs,mask,img_feat,rot_feat_mean):
with tf.GradientTape() as gen_tape:
g_loss_param = self.generator_loss(mask,img_feat,rot_feat_mean)
g_loss = g_loss_param(real_img, gen_imgs)
gradients_g = gen_tape.gradient(g_loss, self.generator.trainable_variables)
print(gradients_g)
self.g_optimizer.apply_gradients(zip(gradients_g, self.generator.trainable_variables))
As you can see, when I do the same for discriminator and generator, the generator gives me an empty list of gradients.
gen_imgs = self.generator([real_img, mask], training=True)
d_loss = self.train_disc(real_img,gen_imgs[:,:,:,:-1])
if step%self.n_critic == 0:
masked_images = real_img * mask
idx = 3 # index of desired layer
layer_input = Input(shape=(self.img_shape)) #
x = layer_input
for layer in self.generator.layers[idx:idx+12]:
x = layer(x)
model_feat = Model(inputs=layer_input,outputs=x)
model_feat.trainable = False
img_feat = model_feat(masked_images,training=False)
rot_feat_mean = []
for i in range(self.batch_size):
rot = []
for an in [180, 155, 130, 105, 80, 55, 20, 10]:
r = tf.keras.preprocessing.image.random_rotation(masked_images[i], an, row_axis=0, col_axis=1,
channel_axis=2)
rot.append(r)
rot = np.array(rot)
rot_feat_mean.append(np.mean(model_feat(rot,training=False),axis=0))
rot_feat_mean = np.array(rot_feat_mean)
g_loss = self.train_gen(real_img,gen_imgs[:,:,:,:-1],mask,img_feat,rot_feat_mean)
The last line in the last code gives me an error. I don't know if this erros is due to any semantic mistake.

Feeding multiple inputs and outputs in the tensor model?

I have created multiple layer model, and now I would like to teach it with hundreds of values, so it can predict outputs from different inputs. But how should I implement those inputs? I tried now to make some array in array. And feed inputs and outputs one by one using training function. But it seems that on the second time its reteaching itself and it predicts only second answer rightly. Maybe I dont understand the concept?
import tensorflow as tf
import numpy as np
print("TensorFlow version: {}".format(tf.__version__))
print("Eager execution: {}".format(tf.executing_eagerly()))
x = np.array([[[10, 10, 30, 20], [20, 10, 20, 10],]])
y = np.array([[[10, 10, 100, 10], [100, 10, 10, 10],]])
class Model(object):
def __init__(self, x, y):
# get random values.
self.W = tf.Variable(tf.random.normal((len(x), len(x[0][0]))))
self.b = tf.Variable(tf.random.normal((len(y),)))
self.W1 = tf.Variable(tf.random.normal((len(x), len(x[0][0]))))
self.b1 = tf.Variable(tf.random.normal((len(y),)))
self.W2 = tf.Variable(tf.random.normal((len(x), len(x[0][0]))))
self.b2 = tf.Variable(tf.random.normal((len(y),)))
def __call__(self, x):
out1 = tf.multiply(x, self.W) + self.b
out2 = tf.multiply(out1, self.W1) + self.b1
last_layer = tf.multiply(out2, self.W2) + self.b2
# Input_Leyer = self.W * x + self.b
return last_layer
def loss(predicted_y, desired_y):
return tf.reduce_sum(tf.square(predicted_y - desired_y))
optimizer = tf.optimizers.Adam(0.1)
# noinspection PyPep8Naming
def train(model, inputs, outputs):
with tf.GradientTape() as t:
current_loss = loss(model(inputs), outputs)
grads = t.gradient(current_loss, [model.W, model.b, model.W1, model.b1, model.W2, model.b2])
optimizer.apply_gradients(zip(grads, [model.W, model.b, model.W1, model.b1, model.W2, model.b2]))
print(current_loss)
model = Model(x, y)
for i in range(5000):
train(model, x[0][0], y[0][0])
for i in range(10000):
train(model, x[0][1], y[0][1])
for i in range(3):
InputX = np.array([
[input(), input(), input(), input()],
])
#returning = tf.math.multiply(InputX, model.W, name=None )
first = tf.multiply(InputX, model.W)
second = tf.multiply(first, model.W1)
returning = tf.multiply(second, model.W2)
print("I predict:", returning)
You have to feed mixed data:
for i in range(5000):
train(model, x[0][0], y[0][0])
train(model, x[0][1], y[0][1])

No gradients provided for any variable. TF

I am trying to build an implicit quantile network. I build a custom loss function but do not get it working. I get the error 'no gradients available' but I belief I only use functions that should provide gradients, like tf.tile and stuff. I dont explicityly cast something in my loss_kv_iq() function.
Below I provide the code for my custom layer ( IQNlayer ) , the network I use (IQN), and my custom loss function. Also a small piece of code in the main that should be able to reproduce the error.
TF version: 2.1.0
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
class IQN(keras.Model):
def __init__(self, quantile_dims, fc_dims, n_actions, n_quantiles):
super(IQN, self).__init__()
self.n_quantiles = n_quantiles
initializer = keras.initializers.he_uniform()
self.iq = IQNlayer(quantile_dims, n_quantiles)
self.dense = keras.layers.Dense(fc_dims, activation='relu', kernel_initializer = initializer)
self.out = keras.layers.Dense(n_actions, activation = None)
def call(self, state, tau):
batch_size, state_size = state.shape
x = self.iq(state, tau)
x = self.dense(x)
x = self.out(x)
x = tf.transpose(tf.split(x, batch_size, axis=0), perm=[0, 2, 1])
return x
class IQNlayer(keras.layers.Layer):
def __init__(self, quantile_dims, n_quantiles):
super(IQNlayer, self).__init__()
self.quantile_dims = quantile_dims
self.n_quantiles = n_quantiles
self.fc1 = keras.layers.Dense(self.quantile_dims, activation = tf.nn.selu)
self.fc2 = keras.layers.Dense(self.quantile_dims, activation = tf.nn.relu)
def call(self, state, tau):
batch_size, state_size = state.shape
state_tile = tf.tile(state, [1, self.n_quantiles])
state_reshape = tf.reshape(state_tile, [-1, state_size])
state_net = self.fc1(state_reshape)
tau = tf.reshape(tau, [-1, 1])
pi_mtx = tf.constant(np.expand_dims(np.pi * np.arange(0, 64), axis=0), dtype=tf.float32)
cos_tau = tf.cos(tf.matmul(tau, pi_mtx))
phi = self.fc2(cos_tau)
net = tf.multiply(state_net, phi)
return net
def loss_kv_iq(x, tau, action_hot, theta_target):
expand_dim_action = tf.expand_dims(action_hot, -1)
main_support = tf.reduce_sum(x * expand_dim_action, axis=1)
theta_loss_tile = tf.tile(tf.expand_dims(main_support, axis=2), [1, 1, N_QUANTILES])
logit_valid_tile = tf.tile(tf.expand_dims(theta_target, axis=1), [1, N_QUANTILES, 1])
Huber_loss = hloss(logit_valid_tile, theta_loss_tile)
inv_tau = 1 - tau
tau = tf.tile(tf.expand_dims(tau, axis=1), [1, N_QUANTILES, 1])
inv_tau = tf.tile(tf.expand_dims(inv_tau, axis=1), [1, N_QUANTILES, 1])
error_loss = logit_valid_tile - theta_loss_tile
Loss = tf.where(tf.less(error_loss, 0.0), inv_tau * Huber_loss, tau * Huber_loss)
loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(Loss, axis=2), axis=1))
return loss
if __name__ == '__main__':
hloss = tf.keras.losses.Huber(reduction = tf.keras.losses.Reduction.NONE)
N_QUANTILES = 10
BATCH_SIZE = 2
ACTION_SIZE = 5
STATE_SIZE = 16
# FOR EXAMPLE: RANDOM BATCH
cs = np.random.rand(BATCH_SIZE,STATE_SIZE)
a = np.random.randint(0,5,size=(2))
r = np.random.randint(0,500,size=(2))
ns = np.random.rand(BATCH_SIZE,STATE_SIZE)
tau = np.random.uniform(size=(BATCH_SIZE, N_QUANTILES))
tau = tau.astype('float32')
iq = IQN(128,128,ACTION_SIZE,N_QUANTILES)
action_hot = np.zeros((BATCH_SIZE,ACTION_SIZE), dtype = np.float32)
action_hot[np.arange(BATCH_SIZE), a] = 1
Q = iq(ns, tau)
theta_target = np.random.rand(BATCH_SIZE,N_QUANTILES)
theta_target = theta_target.astype('float32')
optimizer = tf.keras.optimizers.Adam(lr = 1e-3)
with tf.GradientTape() as tape:
loss = loss_kv_iq(Q, tau, action_hot, theta_target)
grads = tape.gradient(loss, iq.trainable_weights)
optimizer.apply_gradients(zip(grads,iq.trainable_weights))
Error:
Traceback (most recent call last):
File "C:\Users\rensj\.spyder-py3\Thesis\test.py", line 106, in <module>
optimizer.apply_gradients(zip(grads,iq.trainable_weights))
File "C:\Users\rensj\Anaconda3\envs\tfnew\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 426, in apply_gradients
grads_and_vars = _filter_grads(grads_and_vars)
File "C:\Users\rensj\Anaconda3\envs\tfnew\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 1039, in _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['iqn_4/iq_nlayer_4/dense_16/kernel:0', 'iqn_4/iq_nlayer_4/dense_16/bias:0', 'iqn_4/iq_nlayer_4/dense_17/kernel:0', 'iqn_4/iq_nlayer_4/dense_17/bias:0', 'iqn_4/dense_18/kernel:0', 'iqn_4/dense_18/bias:0', 'iqn_4/dense_19/kernel:0', 'iqn_4/dense_19/bias:0'].
EDIT:
As mister Agrawal pointed out, I use numpy operation in pi_mtx. I changed these to their tensorflow counterparts, and together with some other small change to the same line, this becomes:
pi_mtx = tf.constant(tf.expand_dims(tf.constant(np.pi) * tf.range(0, 64, dtype=tf.float32), axis=0), dtype=tf.float32)
However, I keep having the same ValueError: No gradients provided
In the line
pi_mtx = tf.constant(np.expand_dims(np.pi * np.arange(0, 64), axis=0), dtype=tf.float32)
You're using numpy functions. Change them to their tensorflow counterparts.
np.expand_dims -> tf.expand_dims
np.arange -> tf.keras.backend.arange OR tf.range
You can use np.pi, since that is a constant, not an operation.

Using multiple Tensorflow Restored Graphs and Sessions

I am attempting to reload a session and graph in a function to process more data. I keep getting errors as using uninitialized variable.
I have tried to reuse my GRU cell with no avail.
I am currently resorting to loading each model into a dameon thread and having it look for an empty list to run data and return to list instead of a function return the predictions.
model = {
'chunk_size' : 9,
'num_chunk' : 31,
'rnn_size' : 18,
'rnn_classes' : 2
}
graphx = tf.Graph()
sess = tf.Session(graph= graphx)
save_path = ('models/rnn_1d/rnn_1d.ckpt')
def loadModel(model, graphx, sess, save_path):
with graphx.as_default():
chunk_size = model['chunk_size']
num_chunks = model['num_chunk'] #need to update to num_chunks in model creator
rnn_size = model['rnn_size']
rnn_classes = model['rnn_classes']
X = tf.placeholder(dtype=tf.float32, shape=[None, num_chunks, chunk_size])
Y = tf.placeholder(dtype=tf.float32)
def rnn_model(x):
weight_initializer = tf.variance_scaling_initializer(mode="fan_avg", distribution="uniform", scale=1)
bias_initializer = tf.zeros_initializer()
layer = {'weights': tf.Variable(weight_initializer([rnn_size, rnn_classes])),
'biases':tf.Variable(bias_initializer([rnn_classes]))}
x = tf.transpose(x, [1, 0, 2])
x = tf.reshape(x, [-1, chunk_size])
x = tf.split(x, num_chunks, 0)
lstm_cell = rnn_cell.GRUCell(rnn_size)
outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
output = tf.add(tf.matmul(outputs[-1], layer['weights']), layer['biases'])
return output
prediction = rnn_model(X)
saver = tf.train.Saver()
saver.restore(sess, save_path)
print(' loaded')
return sess, graphx
def feedModel(model, sess, graphx, Set):
with graphx.as_default():
chunk_size = model['chunk_size']
num_chunks = model['num_chunk'] #need to update to num_chunks in model creator
rnn_size = model['rnn_size']
rnn_classes = model['rnn_classes']
X = tf.placeholder(dtype=tf.float32, shape=[None, num_chunks, chunk_size])
Y = tf.placeholder(dtype=tf.float32)
def rnn_model(x):
weight_initializer = tf.variance_scaling_initializer(mode="fan_avg", distribution="uniform", scale=1)
bias_initializer = tf.zeros_initializer()
layer = {'weights': tf.Variable(weight_initializer([rnn_size, rnn_classes])),
'biases':tf.Variable(bias_initializer([rnn_classes]))}
x = tf.transpose(x, [1, 0, 2])
x = tf.reshape(x, [-1, chunk_size])
x = tf.split(x, num_chunks, 0)
lstm_cell = rnn_cell.GRUCell(rnn_size, reuse=tf.AUTO_REUSE)
outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
output = tf.add(tf.matmul(outputs[-1], layer['weights']), layer['biases'])
return output
prediction = rnn_model(X)
prediction = sess.run(prediction, feed_dict={X: Set})
return prediction
sess, graphx = loadModel(model, graphx, sess, save_path)
print(feedModel(model, sess, graphx, np.ones((1,31,9)) ) )
It looks you're totally (and I think unnecessarily) recreating your model in feedModel. All of those ops are already defined in graphx, which you pass in. You presumably initialize those in some other code (the one that saves the session) using something like tf.global_variables_initializer().run(). The new variables that you define here won't be initialized- I suspect that's why you're getting the uninitialized variable error.
It looks to me like feedModel should simply look like this:
def feedModel(model, sess, graphx, Set):
with graphx.as_default():
prediction = sess.run(prediction, feed_dict={X: Set})
return prediction

Categories

Resources