Related
I need your help in my segnet.
I cloned this repository in git.
https://github.com/tkuanlun350/Tensorflow-SegNet
And I changed dataset to my custom data consisting of 3 classes including the background.
But This error occurred when I executed the test command.
ValueError: Cannot feed value of shape (1, 360, 480, 3, 1) for Tensor Placeholder_1:0, which has shape (1, 360, 480, 1)
How to solve this problem?
I'm not good at tensorflow. Please Help me...
Here is my code.
python 3.7
tensorflow 2.0
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
import os, sys
import numpy as np
import math
from datetime import datetime
import time
from PIL import Image
from math import ceil
from tensorflow.python.ops import gen_nn_ops
# modules
from Utils import _variable_with_weight_decay, _variable_on_cpu, _add_loss_summaries, _activation_summary, print_hist_summery, get_hist, per_class_acc, writeImage
from Inputs import *
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
os.environ["CUDA_VISIBLE_DEVICES"]="0, 1, 2, 3"
# gpus = tf.config.experimental.list_physical_devices('GPU')
gpus = tf.config.list_physical_devices('GPU')
for i in range(len(gpus)):
tf.config.experimental.set_memory_growth(gpus[i], True)
# Constants describing the training process.
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.001 # Initial learning rate.
EVAL_BATCH_SIZE = 8
BATCH_SIZE = 8
# for CamVid
IMAGE_HEIGHT = 360
IMAGE_WIDTH = 480
IMAGE_DEPTH = 3
NUM_CLASSES = 3
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 367
NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 101
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 1
TEST_ITER = NUM_EXAMPLES_PER_EPOCH_FOR_TEST / BATCH_SIZE
def msra_initializer(kl, dl):
"""
kl for kernel size, dl for filter number
"""
stddev = math.sqrt(2. / (kl**2 * dl))
return tf.truncated_normal_initializer(stddev=stddev)
def orthogonal_initializer(scale = 1.1):
''' From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
'''
def _initializer(shape, dtype=tf.float32, partition_info=None):
flat_shape = (shape[0], np.prod(shape[1:]))
a = np.random.normal(0.0, 1.0, flat_shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
# pick the one with the correct shape
q = u if u.shape == flat_shape else v
q = q.reshape(shape) #this needs to be corrected to float32
return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32)
return _initializer
def loss(logits, labels):
"""
loss func without re-weighting
"""
# Calculate the average cross entropy loss across the batch.
logits = tf.reshape(logits, (-1,NUM_CLASSES))
labels = tf.reshape(labels, [-1])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def weighted_loss(logits, labels, num_classes, head=None):
""" median-frequency re-weighting """
with tf.name_scope('loss'):
logits = tf.reshape(logits, (-1, num_classes))
epsilon = tf.constant(value=1e-10)
logits = logits + epsilon
# consturct one-hot label array
label_flat = tf.reshape(labels, (-1, 1))
# should be [batch ,num_classes]
labels = tf.reshape(tf.one_hot(label_flat, depth=num_classes), (-1, num_classes))
softmax = tf.nn.softmax(logits)
cross_entropy = -tf.reduce_sum(tf.multiply(labels * tf.log(softmax + epsilon), head), axis=[1])
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
return loss
def cal_loss(logits, labels):
loss_weight = np.array([
0.2595,
0.3826,
1.0974]) # class 0~11
labels = tf.cast(labels, tf.int32)
# return loss(logits, labels)
return weighted_loss(logits, labels, num_classes=NUM_CLASSES, head=loss_weight)
def conv_layer_with_bn(inputT, shape, train_phase, activation=True, name=None):
in_channel = shape[2]
out_channel = shape[3]
k_size = shape[0]
with tf.variable_scope(name) as scope:
kernel = _variable_with_weight_decay('ort_weights', shape=shape, initializer=orthogonal_initializer(), wd=None)
conv = tf.nn.conv2d(inputT, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [out_channel], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
if activation is True:
conv_out = tf.nn.relu(batch_norm_layer(bias, train_phase, scope.name))
else:
conv_out = batch_norm_layer(bias, train_phase, scope.name)
return conv_out
def get_deconv_filter(f_shape):
"""
reference: https://github.com/MarvinTeichmann/tensorflow-fcn
"""
width = f_shape[0]
heigh = f_shape[0]
f = ceil(width/2.0)
c = (2 * f - 1 - f % 2) / (2.0 * f)
bilinear = np.zeros([f_shape[0], f_shape[1]])
for x in range(width):
for y in range(heigh):
value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
bilinear[x, y] = value
weights = np.zeros(f_shape)
for i in range(f_shape[2]):
weights[:, :, i, i] = bilinear
init = tf.constant_initializer(value=weights,
dtype=tf.float32)
return tf.get_variable(name="up_filter", initializer=init,
shape=weights.shape)
def deconv_layer(inputT, f_shape, output_shape, stride=2, name=None):
# output_shape = [b, w, h, c]
# sess_temp = tf.InteractiveSession()
sess_temp = tf.global_variables_initializer()
strides = [1, stride, stride, 1]
with tf.variable_scope(name):
weights = get_deconv_filter(f_shape)
deconv = tf.nn.conv2d_transpose(inputT, weights, output_shape,
strides=strides, padding='SAME')
return deconv
def batch_norm_layer(inputT, is_training, scope):
return tf.cond(is_training,
lambda: tf.keras.layers.BatchNormalization(center=False)(inputT),
lambda: tf.keras.layers.BatchNormalization(center=False)(inputT))
def inference(images, labels, batch_size, phase_train):
# norm1
norm1 = tf.nn.lrn(images, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75,
name='norm1')
# conv1
conv1 = conv_layer_with_bn(norm1, [7, 7, images.get_shape().as_list()[3], 64], phase_train, name="conv1")
print(conv1.shape)
# pool1
pool1, pool1_indices = tf.nn.max_pool_with_argmax(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
# conv2
conv2 = conv_layer_with_bn(pool1, [7, 7, 64, 64], phase_train, name="conv2")
# pool2
pool2, pool2_indices = tf.nn.max_pool_with_argmax(conv2, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# conv3
conv3 = conv_layer_with_bn(pool2, [7, 7, 64, 64], phase_train, name="conv3")
# pool3
pool3, pool3_indices = tf.nn.max_pool_with_argmax(conv3, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool3')
# conv4
conv4 = conv_layer_with_bn(pool3, [7, 7, 64, 64], phase_train, name="conv4")
# pool4
pool4, pool4_indices = tf.nn.max_pool_with_argmax(conv4, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool4')
""" End of encoder """
""" start upsample """
# upsample4
# Need to change when using different dataset out_w, out_h
# upsample4 = upsample_with_pool_indices(pool4, pool4_indices, pool4.get_shape(), out_w=45, out_h=60, scale=2, name='upsample4')
upsample4 = deconv_layer(pool4, [2, 2, 64, 64], [batch_size, 45, 60, 64], 2, "up4")
# decode 4
conv_decode4 = conv_layer_with_bn(upsample4, [7, 7, 64, 64], phase_train, False, name="conv_decode4")
# upsample 3
# upsample3 = upsample_with_pool_indices(conv_decode4, pool3_indices, conv_decode4.get_shape(), scale=2, name='upsample3')
upsample3= deconv_layer(conv_decode4, [2, 2, 64, 64], [batch_size, 90, 120, 64], 2, "up3")
# decode 3
conv_decode3 = conv_layer_with_bn(upsample3, [7, 7, 64, 64], phase_train, False, name="conv_decode3")
# upsample2
# upsample2 = upsample_with_pool_indices(conv_decode3, pool2_indices, conv_decode3.get_shape(), scale=2, name='upsample2')
upsample2= deconv_layer(conv_decode3, [2, 2, 64, 64], [batch_size, 180, 240, 64], 2, "up2")
# decode 2
conv_decode2 = conv_layer_with_bn(upsample2, [7, 7, 64, 64], phase_train, False, name="conv_decode2")
# upsample1
# upsample1 = upsample_with_pool_indices(conv_decode2, pool1_indices, conv_decode2.get_shape(), scale=2, name='upsample1')
upsample1= deconv_layer(conv_decode2, [2, 2, 64, 64], [batch_size, 360, 480, 64], 2, "up1")
# decode4
conv_decode1 = conv_layer_with_bn(upsample1, [7, 7, 64, 64], phase_train, False, name="conv_decode1")
""" end of Decode """
""" Start Classify """
# output predicted class number (6)
with tf.variable_scope('conv_classifier') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[1, 1, 64, NUM_CLASSES],
initializer=msra_initializer(1, 64),
wd=0.0005)
conv = tf.nn.conv2d(conv_decode1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0))
conv_classifier = tf.nn.bias_add(conv, biases, name=scope.name)
logit = conv_classifier
loss = cal_loss(conv_classifier, labels)
return loss, logit
def train(total_loss, global_step):
total_sample = 274
num_batches_per_epoch = 274/1
""" fix lr """
lr = INITIAL_LEARNING_RATE
loss_averages_op = _add_loss_summaries(total_loss)
# Compute gradients.
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.AdamOptimizer(lr)
grads = opt.compute_gradients(total_loss)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Add histograms for trainable variables.
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
# Add histograms for gradients.
for grad, var in grads:
if grad is not None:
tf.summary.histogram(var.op.name + '/gradients', grad)
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
def test(FLAGS):
max_steps = FLAGS.max_steps
batch_size = FLAGS.batch_size
train_dir = FLAGS.log_dir # /tmp3/first350/TensorFlow/Logs
# test_dir = FLAGS.test_dir # /tmp3/first350/SegNet-Tutorial/CamVid/train.txt
test_dir = '/home/ml/song/Segnet_tensorflow_3/dataset/test.txt'
# test_ckpt = FLAGS.testing
test_ckpt = "/home/ml/song/Segnet_tensorflow_3/path_to_your_log/model.ckpt-19999.meta"
image_w = FLAGS.image_w
image_h = FLAGS.image_h
image_c = FLAGS.image_c
# testing should set BATCH_SIZE = 1
batch_size = 1
image_filenames, label_filenames = get_filename_list(test_dir)
test_data_node = tf.placeholder(
tf.float32,
shape=[batch_size, image_h, image_w, image_c])
test_labels_node = tf.placeholder(tf.int64, shape=[batch_size, 360, 480, 1])
phase_train = tf.placeholder(tf.bool, name='phase_train')
loss, logits = inference(test_data_node, test_labels_node, batch_size, phase_train)
pred = tf.argmax(logits, axis=3)
# get moving avg
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.0001)
with tf.Session() as sess:
# Load checkpoint
# saver.restore(sess, "/home/ml/song/Segnet_tensorflow/path_to_your_log/model.ckpt-19999")
# saver = tf.train.import_meta_graph('/home/ml/song/Segnet_tensorflow/Logs/model.ckpt-19999.meta')
saver.restore(sess, '/home/ml/song/Segnet_tensorflow_3/path_to_your_log/model.ckpt-19999')
images, labels = get_all_test_data(image_filenames, label_filenames)
threads = tf.train.start_queue_runners(sess=sess)
hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
for image_batch, label_batch in zip(images, labels):
feed_dict = {
test_data_node: image_batch,
test_labels_node: label_batch,
phase_train: False
}
print('*'*100)
print(type(feed_dict))
print(test_data_node.shape)
print(test_labels_node.shape)
print('*'*100)
print(' -- logits')
print(' ', logits.shape, type(logits))
print(logits[0])
print(' -- pred')
print(' ', pred.shape, type(pred))
print('')
dense_prediction, im = sess.run([logits, pred], feed_dict=feed_dict)
print(dense_prediction.shape)
print(im.shape)
print('*'*100)
# output_image to verify
if (FLAGS.save_image):
writeImage(im[0], 'testing_image.png')
# writeImage(im[0], 'out_image/'+str(image_filenames[count]).split('/')[-1])
hist += get_hist(dense_prediction, label_batch)
# count+=1
acc_total = np.diag(hist).sum() / hist.sum()
iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
print("acc: ", acc_total)
print("mean IU: ", np.nanmean(iu))
# ------------------------------------------------------------------------------------------------------
from tensorflow.python.client import device_lib
def get_available_gpus():
local_device_protos = device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
# ------------------------------------------------------------------------------------------------------
def training(FLAGS, is_finetune=False):
max_steps = FLAGS.max_steps
batch_size = FLAGS.batch_size
train_dir = FLAGS.log_dir # /tmp3/first350/TensorFlow/Logs
#image_dir = FLAGS.image_dir # /tmp3/first350/SegNet-Tutorial/CamVid/train.txt
image_dir = "/home/ml/song/Segnet_tensorflow_3/dataset/train.txt"
#val_dir = FLAGS.val_dir # /tmp3/first350/SegNet-Tutorial/CamVid/val.txt
val_dir = "/home/ml/song/Segnet_tensorflow_3/dataset/val.txt"
finetune_ckpt = FLAGS.finetune
image_w = FLAGS.image_w
image_h = FLAGS.image_h
image_c = FLAGS.image_c
startstep = 0 if not is_finetune else int(FLAGS.finetune.split('-')[-1])
image_filenames, label_filenames = get_filename_list(image_dir)
val_image_filenames, val_label_filenames = get_filename_list(val_dir)
with tf.Graph().as_default():
train_data_node = tf.compat.v1.placeholder( tf.float32, shape=[batch_size, image_h, image_w, image_c])
train_labels_node = tf.compat.v1.placeholder(tf.int64, shape=[batch_size, image_h, image_w, 1])
phase_train = tf.compat.v1.placeholder(tf.bool, name='phase_train')
global_step = tf.Variable(0, trainable=False)
# For CamVid
images, labels = CamVidInputs(image_filenames, label_filenames, batch_size)
val_images, val_labels = CamVidInputs(val_image_filenames, val_label_filenames, batch_size)
# Build a Graph that computes the logits predictions from the inference model.
loss, eval_prediction = inference(train_data_node, train_labels_node, batch_size, phase_train)
# Build a Graph that trains the model with one batch of examples and updates the model parameters.
train_op = train(loss, global_step)
saver = tf.train.Saver(tf.global_variables())
summary_op = tf.summary.merge_all()
# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.0001)
with tf.Session() as sess:
# Build an initialization operation to run below.
if (is_finetune == True):
saver.restore(sess, finetune_ckpt )
else:
init = tf.global_variables_initializer()
sess.run(init)
# Start the queue runners.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# Summery placeholders
summary_writer = tf.summary.FileWriter(train_dir, sess.graph)
average_pl = tf.compat.v1.placeholder(tf.float32)
acc_pl = tf.compat.v1.placeholder(tf.float32)
iu_pl = tf.compat.v1.placeholder(tf.float32)
average_summary = tf.summary.scalar("test_average_loss", average_pl)
acc_summary = tf.summary.scalar("test_accuracy", acc_pl)
iu_summary = tf.summary.scalar("Mean_IU", iu_pl)
for step in range(startstep, startstep + max_steps):
image_batch ,label_batch = sess.run([images, labels])
# since we still use mini-batches in validation, still set bn-layer phase_train = True
feed_dict = {
train_data_node: image_batch,
train_labels_node: label_batch,
phase_train: True
}
start_time = time.time()
_, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
# eval current training batch pre-class accuracy
pred = sess.run(eval_prediction, feed_dict=feed_dict)
per_class_acc(pred, label_batch)
if step % 100 == 0:
print("start validating.....")
total_val_loss = 0.0
hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
for test_step in range(int(TEST_ITER)):
val_images_batch, val_labels_batch = sess.run([val_images, val_labels])
_val_loss, _val_pred = sess.run([loss, eval_prediction], feed_dict={
train_data_node: val_images_batch,
train_labels_node: val_labels_batch,
phase_train: True
})
total_val_loss += _val_loss
hist += get_hist(_val_pred, val_labels_batch)
print("val loss: ", total_val_loss / TEST_ITER)
acc_total = np.diag(hist).sum() / hist.sum()
iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
test_summary_str = sess.run(average_summary, feed_dict={average_pl: total_val_loss / TEST_ITER})
acc_summary_str = sess.run(acc_summary, feed_dict={acc_pl: acc_total})
iu_summary_str = sess.run(iu_summary, feed_dict={iu_pl: np.nanmean(iu)})
print_hist_summery(hist)
print(" end validating.... ")
summary_str = sess.run(summary_op, feed_dict=feed_dict)
summary_writer.add_summary(summary_str, step)
summary_writer.add_summary(test_summary_str, step)
summary_writer.add_summary(acc_summary_str, step)
summary_writer.add_summary(iu_summary_str, step)
# Save the model checkpoint periodically.
if step % 1000 == 0 or (step + 1) == max_steps:
checkpoint_path = os.path.join(train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
coord.request_stop()
coord.join(threads)
I had a error when I tried predict my data with saved_weight model. My structure program was like this repository, but I had little enhancement for my model.
Here my edited code in models/faster_rcnn.py
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Lambda, Input, Conv2D, TimeDistributed, Dense, Flatten, BatchNormalization, Dropout
from ..utils import bbox_utils, train_utils
class Decoder(Layer):
"""Generating bounding boxes and labels from faster rcnn predictions.
First calculating the boxes from predicted deltas and label probs.
Then applied non max suppression and selecting top_n boxes by scores.
inputs:
roi_bboxes = (batch_size, roi_bbox_size, [y1, x1, y2, x2])
pred_deltas = (batch_size, roi_bbox_size, total_labels * [delta_y, delta_x, delta_h, delta_w])
pred_label_probs = (batch_size, roi_bbox_size, total_labels)
outputs:
pred_bboxes = (batch_size, top_n, [y1, x1, y2, x2])
pred_labels = (batch_size, top_n)
1 to total label number
pred_scores = (batch_size, top_n)
"""
def __init__(self, variances, total_labels, max_total_size=200, score_threshold=0.67, **kwargs):
super(Decoder, self).__init__(**kwargs)
self.variances = variances
self.total_labels = total_labels
self.max_total_size = max_total_size
self.score_threshold = score_threshold
def get_config(self):
config = super(Decoder, self).get_config()
config.update({
"variances": self.variances,
"total_labels": self.total_labels,
"max_total_size": self.max_total_size,
"score_threshold": self.score_threshold
})
return config
def call(self, inputs):
roi_bboxes = inputs[0]
pred_deltas = inputs[1]
pred_label_probs = inputs[2]
batch_size = tf.shape(pred_deltas)[0]
#
pred_deltas = tf.reshape(pred_deltas, (batch_size, -1, self.total_labels, 4))
pred_deltas *= self.variances
#
expanded_roi_bboxes = tf.tile(tf.expand_dims(roi_bboxes, -2), (1, 1, self.total_labels, 1))
pred_bboxes = bbox_utils.get_bboxes_from_deltas(expanded_roi_bboxes, pred_deltas)
#
pred_labels_map = tf.expand_dims(tf.argmax(pred_label_probs, -1), -1)
pred_labels = tf.where(tf.not_equal(pred_labels_map, 0), pred_label_probs, tf.zeros_like(pred_label_probs))
#
final_bboxes, final_scores, final_labels, _ = bbox_utils.non_max_suppression(
pred_bboxes, pred_labels,
max_output_size_per_class=self.max_total_size,
max_total_size=self.max_total_size,
score_threshold=self.score_threshold)
#
return final_bboxes, final_labels, final_scores
class RoIBBox(Layer):
"""Generating bounding boxes from rpn predictions.
First calculating the boxes from predicted deltas and label probs.
Then applied non max suppression and selecting "train or test nms_topn" boxes.
inputs:
rpn_bbox_deltas = (batch_size, img_output_height, img_output_width, anchor_count * [delta_y, delta_x, delta_h, delta_w])
img_output_height and img_output_width are calculated to the base model feature map
rpn_labels = (batch_size, img_output_height, img_output_width, anchor_count)
outputs:
roi_bboxes = (batch_size, train/test_nms_topn, [y1, x1, y2, x2])
"""
def __init__(self, anchors, mode, hyper_params, **kwargs):
super(RoIBBox, self).__init__(**kwargs)
self.hyper_params = hyper_params
self.mode = mode
self.anchors = tf.constant(anchors, dtype=tf.float32)
def get_config(self):
config = super(RoIBBox, self).get_config()
config.update({"hyper_params": self.hyper_params, "anchors": self.anchors.numpy(), "mode": self.mode})
return config
def call(self, inputs):
rpn_bbox_deltas = inputs[0]
rpn_labels = inputs[1]
anchors = self.anchors
#
pre_nms_topn = self.hyper_params["pre_nms_topn"]
post_nms_topn = self.hyper_params["train_nms_topn"] if self.mode == "training" else self.hyper_params["test_nms_topn"]
nms_iou_threshold = self.hyper_params["nms_iou_threshold"]
variances = self.hyper_params["variances"]
total_anchors = anchors.shape[0]
batch_size = tf.shape(rpn_bbox_deltas)[0]
rpn_bbox_deltas = tf.reshape(rpn_bbox_deltas, (batch_size, total_anchors, 4))
rpn_labels = tf.reshape(rpn_labels, (batch_size, total_anchors))
#
rpn_bbox_deltas *= variances
rpn_bboxes = bbox_utils.get_bboxes_from_deltas(anchors, rpn_bbox_deltas)
#
_, pre_indices = tf.nn.top_k(rpn_labels, pre_nms_topn)
#
pre_roi_bboxes = tf.gather(rpn_bboxes, pre_indices, batch_dims=1)
pre_roi_labels = tf.gather(rpn_labels, pre_indices, batch_dims=1)
#
pre_roi_bboxes = tf.reshape(pre_roi_bboxes, (batch_size, pre_nms_topn, 1, 4))
pre_roi_labels = tf.reshape(pre_roi_labels, (batch_size, pre_nms_topn, 1))
#
roi_bboxes, _, _, _ = bbox_utils.non_max_suppression(pre_roi_bboxes, pre_roi_labels,
max_output_size_per_class=post_nms_topn,
max_total_size=post_nms_topn,
iou_threshold=nms_iou_threshold)
#
return tf.stop_gradient(roi_bboxes)
class RoIDelta(Layer):
"""Calculating faster rcnn actual bounding box deltas and labels.
This layer only running on the training phase.
inputs:
roi_bboxes = (batch_size, nms_topn, [y1, x1, y2, x2])
gt_boxes = (batch_size, padded_gt_boxes_size, [y1, x1, y2, x2])
gt_labels = (batch_size, padded_gt_boxes_size)
outputs:
roi_bbox_deltas = (batch_size, train_nms_topn * total_labels, [delta_y, delta_x, delta_h, delta_w])
roi_bbox_labels = (batch_size, train_nms_topn, total_labels)
"""
def __init__(self, hyper_params, **kwargs):
super(RoIDelta, self).__init__(**kwargs)
self.hyper_params = hyper_params
def get_config(self):
config = super(RoIDelta, self).get_config()
config.update({"hyper_params": self.hyper_params})
return config
def call(self, inputs):
roi_bboxes = inputs[0]
gt_boxes = inputs[1]
gt_labels = inputs[2]
total_labels = self.hyper_params["total_labels"]
total_pos_bboxes = self.hyper_params["total_pos_bboxes"]
total_neg_bboxes = self.hyper_params["total_neg_bboxes"]
variances = self.hyper_params["variances"]
batch_size, total_bboxes = tf.shape(roi_bboxes)[0], tf.shape(roi_bboxes)[1]
# Calculate iou values between each bboxes and ground truth boxes
iou_map = bbox_utils.generate_iou_map(roi_bboxes, gt_boxes)
# Get max index value for each row
max_indices_each_gt_box = tf.argmax(iou_map, axis=2, output_type=tf.int32)
# IoU map has iou values for every gt boxes and we merge these values column wise
merged_iou_map = tf.reduce_max(iou_map, axis=2)
#
pos_mask = tf.greater(merged_iou_map, 0.67)
pos_mask = train_utils.randomly_select_xyz_mask(pos_mask, tf.constant([total_pos_bboxes], dtype=tf.int32))
#
neg_mask = tf.logical_and(tf.less(merged_iou_map, 0.47), tf.greater(merged_iou_map, 0.1))
neg_mask = train_utils.randomly_select_xyz_mask(neg_mask, tf.constant([total_neg_bboxes], dtype=tf.int32))
#
gt_boxes_map = tf.gather(gt_boxes, max_indices_each_gt_box, batch_dims=1)
expanded_gt_boxes = tf.where(tf.expand_dims(pos_mask, axis=-1), gt_boxes_map, tf.zeros_like(gt_boxes_map))
#
gt_labels_map = tf.gather(gt_labels, max_indices_each_gt_box, batch_dims=1)
pos_gt_labels = tf.where(pos_mask, gt_labels_map, tf.constant(-1, dtype=tf.int32))
neg_gt_labels = tf.cast(neg_mask, dtype=tf.int32)
expanded_gt_labels = pos_gt_labels + neg_gt_labels
#
roi_bbox_deltas = bbox_utils.get_deltas_from_bboxes(roi_bboxes, expanded_gt_boxes) / variances
#
roi_bbox_labels = tf.one_hot(expanded_gt_labels, total_labels)
scatter_indices = tf.tile(tf.expand_dims(roi_bbox_labels, -1), (1, 1, 1, 4))
roi_bbox_deltas = scatter_indices * tf.expand_dims(roi_bbox_deltas, -2)
roi_bbox_deltas = tf.reshape(roi_bbox_deltas, (batch_size, total_bboxes * total_labels, 4))
#
return tf.stop_gradient(roi_bbox_deltas), tf.stop_gradient(roi_bbox_labels)
class RoIPooling(Layer):
"""Reducing all feature maps to same size.
Firstly cropping bounding boxes from the feature maps and then resizing it to the pooling size.
inputs:
feature_map = (batch_size, img_output_height, img_output_width, channels)
roi_bboxes = (batch_size, train/test_nms_topn, [y1, x1, y2, x2])
outputs:
final_pooling_feature_map = (batch_size, train/test_nms_topn, pooling_size[0], pooling_size[1], channels)
pooling_size usually (7, 7)
"""
def __init__(self, hyper_params, **kwargs):
super(RoIPooling, self).__init__(**kwargs)
self.hyper_params = hyper_params
def get_config(self):
config = super(RoIPooling, self).get_config()
config.update({"hyper_params": self.hyper_params})
return config
def call(self, inputs):
feature_map = inputs[0]
roi_bboxes = inputs[1]
pooling_size = self.hyper_params["pooling_size"]
batch_size, total_bboxes = tf.shape(roi_bboxes)[0], tf.shape(roi_bboxes)[1]
#
row_size = batch_size * total_bboxes
# We need to arange bbox indices for each batch
pooling_bbox_indices = tf.tile(tf.expand_dims(tf.range(batch_size), axis=1), (1, total_bboxes))
pooling_bbox_indices = tf.reshape(pooling_bbox_indices, (-1, ))
pooling_bboxes = tf.reshape(roi_bboxes, (row_size, 4))
# Crop to bounding box size then resize to pooling size
pooling_feature_map = tf.image.crop_and_resize(
feature_map,
pooling_bboxes,
pooling_bbox_indices,
pooling_size
)
final_pooling_feature_map = tf.reshape(pooling_feature_map, (batch_size, total_bboxes, pooling_feature_map.shape[1], pooling_feature_map.shape[2], pooling_feature_map.shape[3]))
return final_pooling_feature_map
def get_model_frcnn(feature_extractor, rpn_model, anchors, hyper_params, mode="training"):
"""Generating rpn model for given backbone base model and hyper params.
inputs:
feature_extractor = feature extractor layer from the base model
rpn_model = tf.keras.model generated rpn model
anchors = (total_anchors, [y1, x1, y2, x2])
these values in normalized format between [0, 1]
hyper_params = dictionary
mode = "training" or "inference"
outputs:
frcnn_model = tf.keras.model
"""
input_img = rpn_model.input
rpn_reg_predictions, rpn_cls_predictions = rpn_model.output
#
roi_bboxes = RoIBBox(anchors, mode, hyper_params, name="roi_bboxes")([rpn_reg_predictions, rpn_cls_predictions])
#
roi_pooled = RoIPooling(hyper_params, name="roi_pooling")([feature_extractor.output, roi_bboxes])
#
output = TimeDistributed(Flatten(), name="frcnn_flatten")(roi_pooled)
output = TimeDistributed(Dense(4096, activation="relu"), name="frcnn_fc1")(output)
# output = TimeDistributed(Dropout(0.5), name="frcnn_dropout1")(output)
output = TimeDistributed(Dense(4096, activation="relu"), name="frcnn_fc2")(output)
output = TimeDistributed(Dropout(0.5), name="frcnn_dropout2")(output)
frcnn_cls_predictions = TimeDistributed(Dense(hyper_params["total_labels"], activation="softmax"), name="frcnn_cls")(output)
frcnn_reg_predictions = TimeDistributed(Dense(hyper_params["total_labels"] * 4, activation="linear"), name="frcnn_reg")(output)
#
if mode == "training":
input_gt_boxes = Input(shape=(None, 4), name="input_gt_boxes", dtype=tf.float32)
input_gt_labels = Input(shape=(None, ), name="input_gt_labels", dtype=tf.int32)
rpn_cls_actuals = Input(shape=(None, None, hyper_params["anchor_count"]), name="input_rpn_cls_actuals", dtype=tf.float32)
rpn_reg_actuals = Input(shape=(None, 4), name="input_rpn_reg_actuals", dtype=tf.float32)
frcnn_reg_actuals, frcnn_cls_actuals = RoIDelta(hyper_params, name="roi_deltas")(
[roi_bboxes, input_gt_boxes, input_gt_labels])
#
loss_names = ["rpn_reg_loss", "rpn_cls_loss", "frcnn_reg_loss", "frcnn_cls_loss"]
rpn_reg_loss_layer = Lambda(train_utils.reg_loss, name=loss_names[0])([rpn_reg_actuals, rpn_reg_predictions])
rpn_cls_loss_layer = Lambda(train_utils.rpn_cls_loss, name=loss_names[1])([rpn_cls_actuals, rpn_cls_predictions])
frcnn_reg_loss_layer = Lambda(train_utils.reg_loss, name=loss_names[2])([frcnn_reg_actuals, frcnn_reg_predictions])
frcnn_cls_loss_layer = Lambda(train_utils.frcnn_cls_loss, name=loss_names[3])([frcnn_cls_actuals, frcnn_cls_predictions])
#
frcnn_model = Model(inputs=[input_img, input_gt_boxes, input_gt_labels,
rpn_reg_actuals, rpn_cls_actuals],
outputs=[roi_bboxes, rpn_reg_predictions, rpn_cls_predictions,
frcnn_reg_predictions, frcnn_cls_predictions,
rpn_reg_loss_layer, rpn_cls_loss_layer,
frcnn_reg_loss_layer, frcnn_cls_loss_layer])
#
for layer_name in loss_names:
layer = frcnn_model.get_layer(layer_name)
frcnn_model.add_loss(layer.output)
frcnn_model.add_metric(layer.output, name=layer_name, aggregation="mean")
#
else:
bboxes, labels, scores = Decoder(hyper_params["variances"], hyper_params["total_labels"], name="faster_rcnn_decoder")(
[roi_bboxes, frcnn_reg_predictions, frcnn_cls_predictions])
frcnn_model = Model(inputs=input_img, outputs=[bboxes, labels, scores])
#
return frcnn_model
def init_model_frcnn(model, hyper_params):
"""Generating dummy data for initialize model.
In this way, the training process can continue from where it left off.
inputs:
model = tf.keras.model
hyper_params = dictionary
"""
final_height, final_width = hyper_params["img_size"], hyper_params["img_size"]
img = tf.random.uniform((1, final_height, final_width, 3))
feature_map_shape = hyper_params["feature_map_shape"]
total_anchors = feature_map_shape * feature_map_shape * hyper_params["anchor_count"]
gt_boxes = tf.random.uniform((1, 1, 4))
gt_labels = tf.random.uniform((1, 1), maxval=hyper_params["total_labels"], dtype=tf.int32)
bbox_deltas = tf.random.uniform((1, total_anchors, 4))
bbox_labels = tf.random.uniform((1, feature_map_shape, feature_map_shape, hyper_params["anchor_count"]), maxval=1, dtype=tf.float32)
model([img, gt_boxes, gt_labels, bbox_deltas, bbox_labels])
and this is code to test a model.
batch_size = 4
epochs = 10
load_weights = False
backbone = "vgg16"
hyper_params = train_utils.get_hyper_params(backbone)
labels = list(label_map_dict.keys()) # my custom label (pothole and crack)
labels = ["bg"] + labels
test_total_item = len(list(test_data))
test_data = test_data.map(lambda data : data_utils.preprocessing_before_frcnn(
data, IMAGE_SIZE, IMAGE_SIZE))
test_data = test_data.padded_batch(
batch_size, padded_shapes=data_shapes, padding_values=padding_values)
load_path = io_utils.get_model_path("faster_rcnn", backbone)
rpn_model, feature_extractor = rpn_vgg16.get_model_vgg16(hyper_params)
frcnn_test_model = faster_rcnn.get_model_frcnn(feature_extractor, rpn_model, anchors, hyper_params, mode="test")
frcnn_test_model.load_weights(load_path)
step_size = train_utils.get_step_size(test_total_item, batch_size)
pred_bboxes, pred_labels, pred_scores = frcnn_test_model.predict(test_data, steps=step_size, verbose=1)
After I run my test code, the error happens like this:
TypeError Traceback (most recent call last)
<ipython-input-26-de9c8627623e> in <module>()
1 step_size = train_utils.get_step_size(test_total_item, batch_size)
----> 2 pred_bboxes, pred_labels, pred_scores = frcnn_test_model.predict(test_data, steps=step_size, verbose=1)
16 frames
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
235 except Exception as e: # pylint:disable=broad-except
236 if hasattr(e, 'ag_error_metadata'):
--> 237 raise e.ag_error_metadata.to_exception(e)
238 else:
239 raise
TypeError: in converted code:
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/keras/engine/training_v2.py:677 map_fn
batch_size=None)
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/keras/engine/training.py:2474 _standardize_tensors
sample_weight, feed_output_names)
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/keras/engine/training_utils.py:639 standardize_sample_weights
'sample_weight')
/usr/local/lib/python3.7/dist-packages/tensorflow_core/python/keras/engine/training_utils.py:629 standardize_sample_or_class_weights
str(x_weight))
TypeError: The model has multiple outputs, so `sample_weight` should be either a list or a dict. Provided `sample_weight` type not understood: Tensor("args_2:0", shape=(None, None), dtype=int32)
The current tensorflow version I used is Tensorflow 2.1.0
This is the cat vs dog problem from kaggle competition. My code is look like correct but still a value error annoying me. I think I have given the correct size of the input but still the error comes.
Please help me to find out the error.
Here's my full code:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
from random import shuffle
import tensorflow
from tqdm import tqdm
TRAIN_DIR = 'C:\\Users\Kashif\PycharmProjects\DeepLearning-Tensorflow (Sentdex)\Learnings\Cat_VS_Dog\TrainingData'
TEST_DIR = 'C:\\Users\Kashif\PycharmProjects\DeepLearning-Tensorflow (Sentdex)\Learnings\Cat_VS_Dog\TestingData'
IMG_SIZE = 50
MODEL_NAME = 'dogvscat-{}-{}.model'.format(LR, '2conv-basic')
def label_img(img):
word_label = img.split('.')[-3]
if word_label == 'cat': return [1,0]
elif word_label == 'dog': return [0,1]
def create_train_data():
training_data = []
for img in tqdm(os.listdir(TRAIN_DIR)):
label = label_img(img)
path = os.path.join(TRAIN_DIR,img)
img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
training_data.append([np.array(img),np.array(label)])
shuffle(training_data)
np.save('train_data.npy', training_data)
return training_data
def process_test_data():
testing_data=[]
for img in tqdm(os.listdir(TEST_DIR)):
path = os.path.join(TEST_DIR, img)
img_num = img.split('.')[0]
img = cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE), (IMG_SIZE, IMG_SIZE))
testing_data.append([np.array(img), img_num])
np.save('test_data.npy', testing_data)
return testing_data
train_data = create_train_data()
learning_rate = 0.01
epochs = 10
batch_size = 128
n_classes = 2
drop_out = 0.8
filter_h_w = 5
depth_in = 1
depth_out_1 = 32
depth_out_2 = 64
x = tf.placeholder('float', [None, IMG_SIZE * IMG_SIZE])
y = tf.placeholder('float', [None, n_classes])
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def conv_nural_network(x):
weights = {
'W_conv1': tf.Variable(tf.random_normal([filter_h_w, filter_h_w, depth_in, depth_out_1])),
'W_conv2': tf.Variable(tf.random_normal([filter_h_w, filter_h_w, depth_out_1, depth_out_2])),
'W_fc': tf.Variable(tf.random_normal([ int(IMG_SIZE/4) * int(IMG_SIZE/4) * depth_out_2, 1024])),
'out': tf.Variable(tf.random_normal([1024, n_classes]))
}
biases = {
'b_conv1': tf.Variable(tf.random_normal([depth_out_1])),
'b_conv2': tf.Variable(tf.random_normal([depth_out_2])),
'b_fc': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
x = tf.reshape(x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool2d(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, int(IMG_SIZE/4) * int(IMG_SIZE/4) * depth_out_2])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
fc = tf.nn.dropout(fc, drop_out)
output = tf.matmul(fc, weights['out']) + biases['out']
return output
train = train_data[:-500]
test = train_data[-500:]
train_X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
train_y = [i[1] for i in train]
test_X = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
test_y = [i[1] for i in test]
def train_neural_network(x):
prediction = conv_nural_network(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
init = tf.global_variables_initializer()
loss_trace = []
accuracy_trace = []
with tf.Session() as sess:
sess.run(init)
for i in range(epochs):
sess.run(optimizer, feed_dict={x: train_X, y: train_y})
loss = sess.run(cost_function, feed_dict={x: train_X, y: train_y})
accuracy = np.mean(np.argmax(sess.run(prediction,feed_dict={x:train_X,y:train_y}),axis=1) == np.argmax(train_y,axis=1))
loss_trace.append(loss)
accuracy_trace.append(accuracy)
print('Epoch:', (i + 1), 'loss:', loss, 'accuracy:', accuracy)
print('Final training result:', 'loss:', loss, 'accuracy:', accuracy)
loss_test = sess.run(cost_function, feed_dict={x: test_X, y: test_y})
test_pred = np.argmax(sess.run(prediction, feed_dict={x: test_X, y: test_y}), axis=1)
accuracy_test = np.mean(test_pred == np.argmax(test_y, axis=1))
print('Results on test dataset:', 'loss:', loss_test, 'accuracy:', accuracy_test)
train_neural_network(x)
This error comes after that. A value error has come but I don't know where I have given the wrong input type.
ValueError Traceback (most recent call last)
<ipython-input-91-7682c5a4d0ec> in <module>
25
26
---> 27 train_neural_network(x)
<ipython-input-91-7682c5a4d0ec> in train_neural_network(x)
11 sess.run(init)
12 for i in range(epochs):
---> 13 sess.run(optimizer, feed_dict={x: train_X, y: train_y})
14 loss = sess.run(cost_function, feed_dict={x: train_X, y: train_y})
15 accuracy = np.mean(np.argmax(sess.run(prediction,feed_dict={x:train_X,y:train_y}),axis=1) == np.argmax(train_y,axis=1))
ValueError: Cannot feed value of shape (24500, 50, 50, 1) for Tensor 'Placeholder_34:0', which has shape '(?, 2500)'
I have been trying to run some experiments using the deepfix tool (https://bitbucket.org/iiscseal/deepfix) which is a seq2seq model for correcting common programming errors.
I made changes to the code so that it is compatible to TF-1.12, as the original code contains tensorflow.contrib.seq2seq functions which are not supported in version TF-1.12 (only in TF-1.0.x).
The main changes were in the seq2seq_model defined in neural_net/train.py.
Below is the changed code. I'm new to the tensorflow RNN, and coded the decoder part using help from online codes.
class seq2seq_model():
PAD = 0
EOS = 1
def __init__(self, vocab_size, embedding_size, max_output_seq_len,
cell_type='LSTM', memory_dim=300, num_layers=4, dropout=0.2,
attention=True,
scope=None,
verbose=False):
assert 0 <= dropout and dropout <= 1, '0 <= dropout <= 1, you passed dropout={}'.format(
dropout)
tf.set_random_seed(1189)
self.attention = attention
self.max_output_seq_len = max_output_seq_len
self.memory_dim = memory_dim
self.num_layers = num_layers
self.dropout = dropout
self.scope = scope
if dropout != 0:
self.keep_prob = tf.placeholder(tf.float32)
else:
self.keep_prob = None
self.vocab_size = vocab_size
self.embedding_size = embedding_size
self.encoder_cell = _new_RNN_cell(
memory_dim, num_layers, cell_type, dropout, self.keep_prob)
self.decoder_cell = _new_RNN_cell(
memory_dim, num_layers, cell_type, dropout, self.keep_prob)
self._make_graph()
if self.scope is not None:
saver_vars = [var for var in tf.global_variables(
) if var.name.startswith(self.scope)]
else:
saver_vars = tf.global_variables()
if verbose:
print 'root-scope:', self.scope
print "\n\nDiscovered %d saver variables." % len(saver_vars)
for each in saver_vars:
print each.name
self.saver = tf.train.Saver(saver_vars, max_to_keep=5)
#property
def decoder_hidden_units(self):
return self.memory_dim
def _make_graph(self):
self._init_placeholders()
self._init_decoder_train_connectors()
self._init_embeddings()
self._init_simple_encoder()
self._init_decoder()
self._init_optimizer()
def _init_placeholders(self):
""" Everything is time-major """
self.encoder_inputs = tf.placeholder(
shape=(None, None),
dtype=tf.int32,
name='encoder_inputs',
)
self.encoder_inputs_length = tf.placeholder(
shape=(None,),
dtype=tf.int32,
name='encoder_inputs_length',
)
self.decoder_targets = tf.placeholder(
shape=(None, None),
dtype=tf.int32,
name='decoder_targets'
)
self.decoder_targets_length = tf.placeholder(
shape=(None,),
dtype=tf.int32,
name='decoder_targets_length',
)
def _init_decoder_train_connectors(self):
with tf.name_scope('decoderTrainFeeds'):
sequence_size, batch_size = tf.unstack(
tf.shape(self.decoder_targets), name='decoder_targets_shape')
EOS_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * self.EOS
PAD_SLICE = tf.ones([1, batch_size], dtype=tf.int32) * self.PAD
self.decoder_train_inputs = tf.concat(
[EOS_SLICE, self.decoder_targets], axis=0, name="decoder_train_inputs")
self.decoder_train_length = self.decoder_targets_length + 1
decoder_train_targets = tf.concat(
[self.decoder_targets, PAD_SLICE], axis=0)
decoder_train_targets_seq_len, _ = tf.unstack(
tf.shape(decoder_train_targets))
decoder_train_targets_eos_mask = tf.one_hot(self.decoder_train_length - 1,
decoder_train_targets_seq_len,
on_value=self.EOS, off_value=self.PAD,
dtype=tf.int32)
decoder_train_targets_eos_mask = tf.transpose(
decoder_train_targets_eos_mask, [1, 0])
decoder_train_targets = tf.add(decoder_train_targets,
decoder_train_targets_eos_mask, name="decoder_train_targets")
self.decoder_train_targets = decoder_train_targets
self.loss_weights = tf.ones([
batch_size,
tf.reduce_max(self.decoder_train_length)
], dtype=tf.float32, name="loss_weights")
def _init_embeddings(self):
with tf.variable_scope("embedding") as scope:
sqrt3 = math.sqrt(3)
initializer = tf.random_uniform_initializer(-sqrt3, sqrt3)
self.embedding_matrix = tf.get_variable(
name="embedding_matrix",
shape=[self.vocab_size, self.embedding_size],
initializer=initializer,
dtype=tf.float32)
self.encoder_inputs_embedded = tf.nn.embedding_lookup(
self.embedding_matrix, self.encoder_inputs,
name="encoder_inputs_embedded")
self.decoder_train_inputs_embedded = tf.nn.embedding_lookup(
self.embedding_matrix, self.decoder_train_inputs,
name="decoder_train_inputs_embedded")
def _init_simple_encoder(self):
with tf.variable_scope("Encoder") as scope:
(self.encoder_outputs, self.encoder_state) = (
tf.nn.dynamic_rnn(cell=self.encoder_cell,
inputs=self.encoder_inputs_embedded,
sequence_length=self.encoder_inputs_length,
time_major=True,
dtype=tf.float32)
)
def _init_decoder(self):
with tf.variable_scope("decoder") as scope:
# def output_fn(outputs):
# return tf.contrib.layers.fully_connected(outputs, self.vocab_size, scope=scope,
# name = "output_fn")
sequence_size, batch_size = tf.unstack(
tf.shape(self.decoder_targets), name='decoder_targets_shape')
train_helper = seq2seq.TrainingHelper(
inputs=self.decoder_train_inputs_embedded,
sequence_length=self.decoder_train_length,
time_major=True,
name="train_helper")
pred_helper = seq2seq.SampleEmbeddingHelper(
embedding=self.embedding_matrix,
start_tokens=tf.ones([batch_size], dtype=tf.int32) * self.EOS,
end_token=self.EOS)
# name="pred_helper")
def _decode(helper, scope, reuse=None):
with tf.variable_scope(scope, reuse=reuse):
attention_states = tf.transpose(
self.encoder_outputs, [1, 0, 2])
attention_mechanism = seq2seq.BahdanauAttention(
num_units=self.decoder_hidden_units, memory=attention_states,
name="attention_mechanism")
attention_cell = seq2seq.AttentionWrapper(
self.decoder_cell, attention_mechanism,
name="atttention_wrapper")
out_cell = tf.contrib.rnn.OutputProjectionWrapper(
attention_cell, self.vocab_size, reuse=reuse)
# name="output_cell")
decoder = seq2seq.BasicDecoder(
cell=out_cell, helper=helper,
initial_state=out_cell.zero_state(
dtype=tf.float32, batch_size=batch_size))
# name="decoder")
outputs = seq2seq.dynamic_decode(
decoder=decoder, output_time_major=True,
impute_finished=True)
# name="outputs")
return outputs
(self.decoder_logits_train, self.decoder_state_train, _) = _decode(train_helper, "decoder")
(self.decoder_logits_inference, self.decoder_state_inference, _) = _decode(pred_helper, "decoder", reuse=True)
self.decoder_logits_train = self.decoder_logits_train.rnn_output
self.decoder_logits_inference = self.decoder_logits_inference.rnn_output
# self.decoder_logits_train = output_fn(self.decoder_outputs_train)
self.decoder_prediction_train = tf.argmax(
self.decoder_logits_train, axis=-1, name='decoder_prediction_train')
scope.reuse_variables()
self.decoder_prediction_inference = tf.argmax(self.decoder_logits_inference, axis=-1,
name='decoder_prediction_inference')
def _init_optimizer(self):
logits = tf.transpose(self.decoder_logits_train, [1, 0, 2])
targets = tf.transpose(self.decoder_train_targets, [1, 0])
self.loss = seq2seq.sequence_loss(logits=logits, targets=targets,
weights=self.loss_weights)
self.optimizer = tf.train.AdamOptimizer()
gvs = self.optimizer.compute_gradients(self.loss)
def ClipIfNotNone(grad):
if grad is None:
return grad
return tf.clip_by_value(grad, -1., 1)
# capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
capped_gvs = [(ClipIfNotNone(grad), var) for grad, var in gvs]
self.train_op = self.optimizer.apply_gradients(capped_gvs)
def make_feed_dict(self, x, x_len, y, y_len):
feed_dict = {
self.encoder_inputs: x,
self.encoder_inputs_length: x_len,
self.decoder_targets: y,
self.decoder_targets_length: y_len,
}
if self.dropout != 0:
feed_dict.update({self.keep_prob: 1.0 - self.dropout})
return feed_dict
def load_parameters(self, sess, filename):
self.saver.restore(sess, filename)
def save_parameters(self, sess, filename, global_step=None):
self.saver.save(sess, filename, global_step=global_step)
def train_step(self, session, x, x_len, y, y_len):
feed_dict = self.make_feed_dict(x, x_len, y, y_len)
_, loss = session.run([self.train_op, self.loss], feed_dict)
return loss
def validate_step(self, session, x, x_len, y, y_len):
feed_dict = self.make_feed_dict(x, x_len, y, y_len)
loss, decoder_prediction, decoder_train_targets = session.run([self.loss,
self.decoder_prediction_inference,
self.decoder_train_targets], feed_dict)
return loss, np.array(decoder_prediction).T, np.array(decoder_train_targets).T
def sample(self, session, X, X_len):
feed_dict = {self.encoder_inputs: X,
self.encoder_inputs_length: X_len}
if self.dropout != 0:
feed_dict.update({self.keep_prob: 1.0})
decoder_prediction = session.run(
self.decoder_prediction_inference, feed_dict)
return np.array(decoder_prediction).T
I am having some problems with this code:
Main problem - The seq2seq.train_step() and seq2seq.validate_step() functions are working, but when I use seq2seq.sample() for actually making inferences, I get an error that asks me to feed a value for decoder_targets. This is an unexpected behaviour as the SampleEmbeddingHelper function is used for inference which does not require decoder_targets. The error:
InvalidArgumentError (see above for traceback): You must feed a value
for placeholder tensor 'ids/decoder_targets' with dtype int32 and
shape [?,?] [[node ids/decoder_targets (defined at
.../code/neural_net/train.py:241) = Placeholderdtype=DT_INT32,
shape=[?,?],
_device="/job:localhost/replica:0/task:0/device:CPU:0"]]
When I try to use the GreedyEmbeddingHelper instead of SampleEmbeddingHelper, and then run decoder_logits_inference op, the machine hangs and runs out of memory after some time. Although SampleEmbeddingHelper works fine.
Well, SampleEmbeddingHelper does need decoder targets, since it mixes part of GreedyEmbeddingHelper(infer mode) and tf.contrib.seq2seq.TrainingHelper(teacher forcing). I think you just need to use GreedyEmbeddingHelper.
Since in the beginning, the parameters are totally random (if not pre-trained).
Maybe you have seen that the results of the first few loops of seq2seq model are totally messed up.
So if you use GreedyEmbeddingHelper, which outputs a result based on the previous one, and of course no one teaches it "where to stop", so it usually goes infinitely until your memory runs out. To solve this, you need to set an upper limit for the length of sentence in tf.contrib.seq2seq.dynamic_decode.
The argument is maximum_iterations. as shown in
tf.contrib.seq2seq.dynamic_decode
I am writing an implementation of style transfer by loading a vgg model from keras and supplying it to a tensorflow model.
I am using an adam optimizer. The loss function is reducing but it is very slow and plateaus off at about 108. Also the style loss is huge (order of 108) whereas content loss is much smaller(order of 105). This is weird as the paper for style transfer says to scale content loss down by a factor of 100 or 1000 when calculating total loss.
I tried increasing the learning rate but that only makes the gradient overshoot.
I suspect there must be a bug in my implementation but despite searching endlessly I have been unable to find what's wrong.
Here's the code:
# coding: utf-8
# In[1]:
from keras.applications.vgg16 import VGG16
from keras.models import Model
import tensorflow as tf
import tensorflow.contrib.eager as tfe
import numpy as np
import matplotlib.pyplot as plt
# In[2]:
content_image_path = './skyline.jpg'
style_image_path = './starry_night.jpg'
output_image_path = './output.jpg'
# In[4]:
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
# In[5]:
content_image = image.load_img(content_image_path, target_size=(224, 224))
#plt.imshow(content_image)
content_arr = image.img_to_array(content_image)
content_arr = tf.convert_to_tensor(preprocess_input(np.expand_dims(content_arr, axis=0)), tf.float64)
sess.run(tf.shape(content_arr))
# In[6]:
style_image = image.load_img(style_image_path, target_size=(224, 224))
#plt.imshow(style_image)
style_arr = image.img_to_array(style_image)
style_arr = tf.convert_to_tensor(preprocess_input(np.expand_dims(style_arr, axis=0)), tf.float64)
sess.run(tf.shape(style_arr))
# In[7]:
#generate random image with pixel values b/w 0 -> 255
o_input = np.random.randint(low=0, high=256, size=(224, 224, 3)).astype('float64')
plt.imshow(o_input)
o_input_old = np.copy(o_input)
o_input = preprocess_input(np.expand_dims(o_input, axis=0))
print(o_input_old)
o_input_var = tf.Variable(o_input, name="gen_img_vector", trainable=True)
# In[8]:
content_model = VGG16(include_top=False, weights='imagenet', input_tensor=content_arr, input_shape=(224, 224, 3))
style_model = VGG16(include_top=False, weights='imagenet', input_tensor=style_arr, input_shape=(224, 224, 3))
train_model = VGG16(include_top=False, weights='imagenet', input_tensor=o_input_var, input_shape=(224, 224, 3))
# In[10]:
content_model.summary()
# In[11]:
def get_feature_rep(layer_type, layer_names, model):
outputs = []
for name in layer_names:
out = model.get_layer(name=name).output
N = tf.shape(out)[3]#number of channels
M = tf.multiply(tf.shape(out)[1], tf.shape(out)[2])#product of dimensions
out = tf.transpose(tf.reshape(out, (M, N)))#Flattens each channel into 1-D tensor & reshapes layer
if layer_type == 'style':
out = get_gram_matrix(out)
print(out)
outputs.append(out)
return outputs
# In[12]:
def get_gram_matrix(F):
G = tf.matmul(F, tf.transpose(F))
return G
# In[13]:
def style_loss(Gs, As):
total = tf.Variable(tf.constant(0.0, tf.float64), name="style_loss", trainable=False)
style_reps = list(zip(Gs, As))
for layer in style_reps:
loss = tf.reduce_sum(tf.cast(tf.squared_difference(layer[0], layer[1]), tf.float64), [0, 1])
N_layer = tf.shape(layer[0])[0]
M_layer = tf.shape(layer[0])[1]
den = tf.square(tf.cast(tf.multiply(N_layer, M_layer), tf.float64))
loss = loss/den
loss = loss*0.2/4.0 #weighting loss
total = total + loss
return total
# In[14]:
def content_loss(P, F):
# loss = tf.Variable(tf.constant(0.0, tf.float64), name="content_loss", trainable=False)
loss = tf.reduce_sum(tf.cast(tf.squared_difference(P, F), tf.float64), [0, 1])
loss = loss/2.0
return loss
# In[15]:
content_layer_names = ['block4_conv2']
style_layer_names = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1']
# In[32]:
P = tf.squeeze(get_feature_rep('content', content_layer_names, content_model))
# In[34]:
F = tf.squeeze(get_feature_rep('content', content_layer_names, train_model))
# In[18]:
#Each member of As consists of a feature map corresponding to a particular layer (dim. channels x pixels per channel)
As = get_feature_rep('style', style_layer_names, style_model)
# In[19]:
Gs = get_feature_rep('style', style_layer_names, train_model)
# In[20]:
styleloss = style_loss(Gs, As)
# In[21]:
contentloss = content_loss(P, F)
# In[22]:
total_loss = tf.add(styleloss, tf.multiply(tf.constant(0.01, tf.float64), contentloss))
# In[23]:
optimizer = tf.train.AdamOptimizer(5).minimize(total_loss, var_list=[o_input_var])
# In[26]:
def reprocess(x):
VGG_MEAN = [123.68, 116.78, 103.94]
means = tf.reshape(tf.constant(VGG_MEAN, tf.float64), [1, 1, 3])
#Undo mean imagenet scale preprocessing
x = tf.add(x, means)
tf.clip_by_value(x, 0, 255)
#bgr to rgb
x = x[..., ::-1]
return x
# In[27]:
saver = tf.train.Saver(tf.global_variables())
# In[28]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# saver.restore(sess, './model/nst_model.ckpt')
for epoch in range(100):
_, styleloss_curr, contentloss_curr, loss_curr, new_arr = sess.run([optimizer, styleloss, contentloss, total_loss, o_input_var])
print('Epoch: %i Content Loss: %.2f Style Loss: %.2f Total Loss: %.2f' % (epoch, contentloss_curr, styleloss_curr, loss_curr))
if epoch % 15 == 0:
saver.save(sess, './model/nst_model.ckpt')
# In[30]:
with tf.Session() as sess:
new_arr = reprocess(new_arr)
new_im = sess.run(tf.cast(tf.round(tf.squeeze(new_arr)), tf.uint8))
# new_im = new_im[...,::-1]
# print(sess.run(new_arr[0]/255))
print(sess.run(tf.shape(new_im)))
plt.imshow(new_im)