TensorFlow CNN on multiple GPUs

TensorFlow CNN on multiple GPUs - python

I am trying to parallelize my code to have my tensorflow model run on multiple GPUs. For some reason, the code I wrote to parallelize the training works for a standard deep neural net, but throws errors when using a convolutional neural net.
Here is my code to compute the average gradients:
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
This is my deep neural net architecture: (this works)
def neuralNet(data):
hl_1 = {'weights':tf.get_variable('Weights1',[TF_SHAPE,n_nodes_hl1],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases1',[n_nodes_hl1],initializer=tf.random_normal_initializer())}
hl_2 = {'weights':tf.get_variable('Weights2',[n_nodes_hl1, n_nodes_hl2],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases2',[n_nodes_hl2],initializer=tf.random_normal_initializer())}
hl_3 = {'weights':tf.get_variable('Weights3',[n_nodes_hl2, n_nodes_hl3],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases3',[n_nodes_hl3],initializer=tf.random_normal_initializer())}
hl_4 = {'weights':tf.get_variable('Weights4',[n_nodes_hl3, n_nodes_hl4],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases4',[n_nodes_hl4],initializer=tf.random_normal_initializer())}
hl_5 = {'weights':tf.get_variable('Weights5',[n_nodes_hl4, n_nodes_hl5],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases5',[n_nodes_hl5],initializer=tf.random_normal_initializer())}
output_layer = {'weights':tf.get_variable('Weights-outputlayer',[n_nodes_hl5, n_classes],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases-outputlayer',[n_classes],initializer=tf.random_normal_initializer())}
l1 = tf.add(tf.matmul(data, hl_1['weights']), hl_1['biases'])
l1 = tf.nn.sigmoid(l1, name='op1')
l2 = tf.add(tf.matmul(l1, hl_2['weights']), hl_2['biases'])
l2 = tf.nn.sigmoid(l2, name='op2')
l3 = tf.add(tf.matmul(l2, hl_3['weights']), hl_3['biases'])
l3 = tf.nn.sigmoid(l3, name='op3')
l4 = tf.add(tf.matmul(l3, hl_4['weights']), hl_4['biases'])
l4 = tf.nn.sigmoid(l4, name='op4')
l5 = tf.add(tf.matmul(l4, hl_5['weights']), hl_5['biases'])
l5 = tf.nn.sigmoid(l5, name='op5')
dropout = tf.nn.dropout(l5,keep_prob, name='op6')
ol = tf.add(tf.matmul(dropout, output_layer['weights']), output_layer['biases'], name='op7')
return ol
This is my convnet: (this does not work)
def conv2d(x,W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
def convNeuralNet(x):
weights = {'w_conv1':tf.get_variable('w_conv1',[7,7,1,2],initializer=tf.random_normal_initializer()),
'w_conv2':tf.get_variable('w_conv2',[7,7,2,4],initializer=tf.random_normal_initializer()),
'w_conv3':tf.get_variable('w_conv3',[7,7,4,8],initializer=tf.random_normal_initializer()),
'w_conv4':tf.get_variable('w_conv4',[7,7,8,16],initializer=tf.random_normal_initializer()),
'w_conv5':tf.get_variable('w_conv5',[7,7,16,32],initializer=tf.random_normal_initializer()),
'w_conv6':tf.get_variable('w_conv6',[7,7,32,64],initializer=tf.random_normal_initializer()),
'w_conv7':tf.get_variable('w_conv7',[7,7,64,128],initializer=tf.random_normal_initializer()),
'w_conv8':tf.get_variable('w_conv8',[7,7,128,256],initializer=tf.random_normal_initializer()),
'w_conv9':tf.get_variable('w_conv9',[7,7,256,512],initializer=tf.random_normal_initializer()),
'w_fc1':tf.get_variable('w_fc1',[512,1024],initializer=tf.random_normal_initializer()),
'w_fc2':tf.get_variable('w_fc2',[1024,2048],initializer=tf.random_normal_initializer()),
'w_fc3':tf.get_variable('w_fc3',[2048,4096],initializer=tf.random_normal_initializer()),
'out':tf.get_variable('w_out',[4096,n_classes],initializer=tf.random_normal_initializer())}
biases = {'b_conv1':tf.get_variable('b_conv1',[2],initializer=tf.random_normal_initializer()),
'b_conv2':tf.get_variable('b_conv2',[4],initializer=tf.random_normal_initializer()),
'b_conv3':tf.get_variable('b_conv3',[8],initializer=tf.random_normal_initializer()),
'b_conv4':tf.get_variable('b_conv4',[16],initializer=tf.random_normal_initializer()),
'b_conv5':tf.get_variable('b_conv5',[32],initializer=tf.random_normal_initializer()),
'b_conv6':tf.get_variable('b_conv6',[64],initializer=tf.random_normal_initializer()),
'b_conv7':tf.get_variable('b_conv7',[128],initializer=tf.random_normal_initializer()),
'b_conv8':tf.get_variable('b_conv8',[256],initializer=tf.random_normal_initializer()),
'b_conv9':tf.get_variable('b_conv9',[512],initializer=tf.random_normal_initializer()),
'b_fc1':tf.get_variable('b_fc1',[1024],initializer=tf.random_normal_initializer()),
'b_fc2':tf.get_variable('b_fc2',[2048],initializer=tf.random_normal_initializer()),
'b_fc3':tf.get_variable('b_fc3',[4096],initializer=tf.random_normal_initializer()),
'out':tf.get_variable('b_out',[n_classes],initializer=tf.random_normal_initializer())}
x = tf.reshape(x,shape=[-1,7,len_puzzle,1])
conv1 = conv2d(x, weights['w_conv1'])
conv1 = maxpool2d(conv1)
conv2 = conv2d(conv1, weights['w_conv2'])
conv2 = maxpool2d(conv2)
conv3 = conv2d(conv2, weights['w_conv3'])
conv3 = maxpool2d(conv3)
conv4 = conv2d(conv3, weights['w_conv4'])
conv4 = maxpool2d(conv4)
conv5 = conv2d(conv4, weights['w_conv5'])
conv5 = maxpool2d(conv5)
conv6 = conv2d(conv5, weights['w_conv6'])
conv6 = maxpool2d(conv6)
conv7 = conv2d(conv6, weights['w_conv7'])
conv7 = maxpool2d(conv7)
conv8 = conv2d(conv7, weights['w_conv8'])
conv8 = maxpool2d(conv8)
conv9 = conv2d(conv8, weights['w_conv9'])
conv9 = maxpool2d(conv9)
fc1 = tf.reshape(conv9, [-1,512])
fc1 = tf.nn.sigmoid(tf.add(tf.matmul(fc1,weights['w_fc1']),biases['b_fc1']))
fc2 = tf.nn.sigmoid(tf.add(tf.matmul(fc1,weights['w_fc2']),biases['b_fc2']))
fc3 = tf.nn.sigmoid(tf.add(tf.matmul(fc2,weights['w_fc3']),biases['b_fc3']))
last = tf.nn.dropout(fc3,keep_prob)
output = tf.add(tf.matmul(last, weights['out']), biases['out'], name='op7')
return output
This is the code which runs the session:
def train(x):
tower_grads = []
opt = tf.train.AdamOptimizer(learning_rate)
for i in xrange(2):
with tf.device('/gpu:%d' % i):
with tf.variable_scope('NN',reuse=i>0):
prediction = convNeuralNet(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
tf.summary.scalar('cross_entropy',cost)
grads = opt.compute_gradients(cost)
tower_grads.append(grads)
print grads
print len(grads)
#scope.reuse_variables()
grads = average_gradients(tower_grads)
apply_gradient_op = opt.apply_gradients(grads)
train_op = tf.group(apply_gradient_op)
correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct,'float'))
tf.summary.scalar('accuracy',accuracy)
num_epochs = ne
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess:
saver = tf.train.Saver()
# UNCOMMENT THIS WHEN RESTARTING FROM Checkpoint
#saver.restore(sess, tf.train.latest_checkpoint(os.getcwd()+'/models/base/.'))
sess.run(tf.global_variables_initializer())
merged_summary = tf.summary.merge_all()
for epoch in range(num_epochs):
epoch_loss = 0
for i in range(int(real_X_9.shape[0])/batch_size):#mnist.train.num_examples/batch_size)): # X.shape[0]
randidx = np.random.choice(real_X_9.shape[0], batch_size, replace=False)
epoch_x,epoch_y = real_X_9[randidx,:],real_y_9[randidx,:] #mnist.train.next_batch(batch_size) # X,y
j,c = sess.run([train_op,cost],feed_dict={x:epoch_x,y:epoch_y,keep_prob:TRAIN_KEEP_PROB})
if i == 0:
[ta] = sess.run([accuracy],feed_dict={x:epoch_x,y:epoch_y,keep_prob:TRAIN_KEEP_PROB})
print 'Train Accuracy', ta
epoch_loss += c
print '\n','Epoch', epoch + 1, 'completed out of', num_epochs, '\nLoss:',epoch_loss
#saver.save(sess, os.getcwd()+'/models/base/baseDNN7')
#saver.export_meta_graph(os.getcwd()+'/models/base/baseDNN7.meta')
print '\n','Train Accuracy', accuracy.eval(feed_dict={x:real_X_9, y:real_y_9, keep_prob:TRAIN_KEEP_PROB})
print '\n','Test Accuracy', accuracy.eval(feed_dict={x:test_real_X, y:test_real_y, keep_prob:1.0}) #X, y #mnist.test.images, mnist.test.labels
train(x)
This is the error:
Traceback (most recent call last):
File "CNN_gpu.py", line 393, in <module>
train(x)
File "CNN_gpu.py", line 311, in train
grads = average_gradients(tower_grads)
expanded_g = tf.expand_dims(g, 0)
File "/share/sw/free/tensorflow.1/1.1.0/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.py", line 170, in expand_dims
return gen_array_ops._expand_dims(input, axis, name)
File "/share/sw/free/tensorflow.1/1.1.0/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 900, in _expand_dims
result = _op_def_lib.apply_op("ExpandDims", input=input, dim=dim, name=name)
File "/share/sw/free/tensorflow.1/1.1.0/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 509, in apply_op
(input_name, err))
ValueError: Tried to convert 'input' to a tensor and failed. Error: None values not supported.
I'm really confused. Parallelization across multiple GPUs should work regardless of the type of neural net being used.
Any help here would be appreciated.

Related

How to solve this Error? ValueError: Cannot feed value of shape (1, 360, 480, 3, 1) for Tensor Placeholder_1:0, which has shape (1, 360, 480, 1)

I need your help in my segnet.
I cloned this repository in git.
https://github.com/tkuanlun350/Tensorflow-SegNet
And I changed dataset to my custom data consisting of 3 classes including the background.
But This error occurred when I executed the test command.
ValueError: Cannot feed value of shape (1, 360, 480, 3, 1) for Tensor Placeholder_1:0, which has shape (1, 360, 480, 1)
How to solve this problem?
I'm not good at tensorflow. Please Help me...
Here is my code.
python 3.7
tensorflow 2.0
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
import os, sys
import numpy as np
import math
from datetime import datetime
import time
from PIL import Image
from math import ceil
from tensorflow.python.ops import gen_nn_ops
# modules
from Utils import _variable_with_weight_decay, _variable_on_cpu, _add_loss_summaries, _activation_summary, print_hist_summery, get_hist, per_class_acc, writeImage
from Inputs import *
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
os.environ["CUDA_VISIBLE_DEVICES"]="0, 1, 2, 3"
# gpus = tf.config.experimental.list_physical_devices('GPU')
gpus = tf.config.list_physical_devices('GPU')
for i in range(len(gpus)):
tf.config.experimental.set_memory_growth(gpus[i], True)
# Constants describing the training process.
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.001 # Initial learning rate.
EVAL_BATCH_SIZE = 8
BATCH_SIZE = 8
# for CamVid
IMAGE_HEIGHT = 360
IMAGE_WIDTH = 480
IMAGE_DEPTH = 3
NUM_CLASSES = 3
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 367
NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 101
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 1
TEST_ITER = NUM_EXAMPLES_PER_EPOCH_FOR_TEST / BATCH_SIZE
def msra_initializer(kl, dl):
"""
kl for kernel size, dl for filter number
"""
stddev = math.sqrt(2. / (kl**2 * dl))
return tf.truncated_normal_initializer(stddev=stddev)
def orthogonal_initializer(scale = 1.1):
''' From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
'''
def _initializer(shape, dtype=tf.float32, partition_info=None):
flat_shape = (shape[0], np.prod(shape[1:]))
a = np.random.normal(0.0, 1.0, flat_shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
# pick the one with the correct shape
q = u if u.shape == flat_shape else v
q = q.reshape(shape) #this needs to be corrected to float32
return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32)
return _initializer
def loss(logits, labels):
"""
loss func without re-weighting
"""
# Calculate the average cross entropy loss across the batch.
logits = tf.reshape(logits, (-1,NUM_CLASSES))
labels = tf.reshape(labels, [-1])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def weighted_loss(logits, labels, num_classes, head=None):
""" median-frequency re-weighting """
with tf.name_scope('loss'):
logits = tf.reshape(logits, (-1, num_classes))
epsilon = tf.constant(value=1e-10)
logits = logits + epsilon
# consturct one-hot label array
label_flat = tf.reshape(labels, (-1, 1))
# should be [batch ,num_classes]
labels = tf.reshape(tf.one_hot(label_flat, depth=num_classes), (-1, num_classes))
softmax = tf.nn.softmax(logits)
cross_entropy = -tf.reduce_sum(tf.multiply(labels * tf.log(softmax + epsilon), head), axis=[1])
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
return loss
def cal_loss(logits, labels):
loss_weight = np.array([
0.2595,
0.3826,
1.0974]) # class 0~11
labels = tf.cast(labels, tf.int32)
# return loss(logits, labels)
return weighted_loss(logits, labels, num_classes=NUM_CLASSES, head=loss_weight)
def conv_layer_with_bn(inputT, shape, train_phase, activation=True, name=None):
in_channel = shape[2]
out_channel = shape[3]
k_size = shape[0]
with tf.variable_scope(name) as scope:
kernel = _variable_with_weight_decay('ort_weights', shape=shape, initializer=orthogonal_initializer(), wd=None)
conv = tf.nn.conv2d(inputT, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [out_channel], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
if activation is True:
conv_out = tf.nn.relu(batch_norm_layer(bias, train_phase, scope.name))
else:
conv_out = batch_norm_layer(bias, train_phase, scope.name)
return conv_out
def get_deconv_filter(f_shape):
"""
reference: https://github.com/MarvinTeichmann/tensorflow-fcn
"""
width = f_shape[0]
heigh = f_shape[0]
f = ceil(width/2.0)
c = (2 * f - 1 - f % 2) / (2.0 * f)
bilinear = np.zeros([f_shape[0], f_shape[1]])
for x in range(width):
for y in range(heigh):
value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
bilinear[x, y] = value
weights = np.zeros(f_shape)
for i in range(f_shape[2]):
weights[:, :, i, i] = bilinear
init = tf.constant_initializer(value=weights,
dtype=tf.float32)
return tf.get_variable(name="up_filter", initializer=init,
shape=weights.shape)
def deconv_layer(inputT, f_shape, output_shape, stride=2, name=None):
# output_shape = [b, w, h, c]
# sess_temp = tf.InteractiveSession()
sess_temp = tf.global_variables_initializer()
strides = [1, stride, stride, 1]
with tf.variable_scope(name):
weights = get_deconv_filter(f_shape)
deconv = tf.nn.conv2d_transpose(inputT, weights, output_shape,
strides=strides, padding='SAME')
return deconv
def batch_norm_layer(inputT, is_training, scope):
return tf.cond(is_training,
lambda: tf.keras.layers.BatchNormalization(center=False)(inputT),
lambda: tf.keras.layers.BatchNormalization(center=False)(inputT))
def inference(images, labels, batch_size, phase_train):
# norm1
norm1 = tf.nn.lrn(images, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75,
name='norm1')
# conv1
conv1 = conv_layer_with_bn(norm1, [7, 7, images.get_shape().as_list()[3], 64], phase_train, name="conv1")
print(conv1.shape)
# pool1
pool1, pool1_indices = tf.nn.max_pool_with_argmax(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
# conv2
conv2 = conv_layer_with_bn(pool1, [7, 7, 64, 64], phase_train, name="conv2")
# pool2
pool2, pool2_indices = tf.nn.max_pool_with_argmax(conv2, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# conv3
conv3 = conv_layer_with_bn(pool2, [7, 7, 64, 64], phase_train, name="conv3")
# pool3
pool3, pool3_indices = tf.nn.max_pool_with_argmax(conv3, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool3')
# conv4
conv4 = conv_layer_with_bn(pool3, [7, 7, 64, 64], phase_train, name="conv4")
# pool4
pool4, pool4_indices = tf.nn.max_pool_with_argmax(conv4, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool4')
""" End of encoder """
""" start upsample """
# upsample4
# Need to change when using different dataset out_w, out_h
# upsample4 = upsample_with_pool_indices(pool4, pool4_indices, pool4.get_shape(), out_w=45, out_h=60, scale=2, name='upsample4')
upsample4 = deconv_layer(pool4, [2, 2, 64, 64], [batch_size, 45, 60, 64], 2, "up4")
# decode 4
conv_decode4 = conv_layer_with_bn(upsample4, [7, 7, 64, 64], phase_train, False, name="conv_decode4")
# upsample 3
# upsample3 = upsample_with_pool_indices(conv_decode4, pool3_indices, conv_decode4.get_shape(), scale=2, name='upsample3')
upsample3= deconv_layer(conv_decode4, [2, 2, 64, 64], [batch_size, 90, 120, 64], 2, "up3")
# decode 3
conv_decode3 = conv_layer_with_bn(upsample3, [7, 7, 64, 64], phase_train, False, name="conv_decode3")
# upsample2
# upsample2 = upsample_with_pool_indices(conv_decode3, pool2_indices, conv_decode3.get_shape(), scale=2, name='upsample2')
upsample2= deconv_layer(conv_decode3, [2, 2, 64, 64], [batch_size, 180, 240, 64], 2, "up2")
# decode 2
conv_decode2 = conv_layer_with_bn(upsample2, [7, 7, 64, 64], phase_train, False, name="conv_decode2")
# upsample1
# upsample1 = upsample_with_pool_indices(conv_decode2, pool1_indices, conv_decode2.get_shape(), scale=2, name='upsample1')
upsample1= deconv_layer(conv_decode2, [2, 2, 64, 64], [batch_size, 360, 480, 64], 2, "up1")
# decode4
conv_decode1 = conv_layer_with_bn(upsample1, [7, 7, 64, 64], phase_train, False, name="conv_decode1")
""" end of Decode """
""" Start Classify """
# output predicted class number (6)
with tf.variable_scope('conv_classifier') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[1, 1, 64, NUM_CLASSES],
initializer=msra_initializer(1, 64),
wd=0.0005)
conv = tf.nn.conv2d(conv_decode1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0))
conv_classifier = tf.nn.bias_add(conv, biases, name=scope.name)
logit = conv_classifier
loss = cal_loss(conv_classifier, labels)
return loss, logit
def train(total_loss, global_step):
total_sample = 274
num_batches_per_epoch = 274/1
""" fix lr """
lr = INITIAL_LEARNING_RATE
loss_averages_op = _add_loss_summaries(total_loss)
# Compute gradients.
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.AdamOptimizer(lr)
grads = opt.compute_gradients(total_loss)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Add histograms for trainable variables.
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
# Add histograms for gradients.
for grad, var in grads:
if grad is not None:
tf.summary.histogram(var.op.name + '/gradients', grad)
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
def test(FLAGS):
max_steps = FLAGS.max_steps
batch_size = FLAGS.batch_size
train_dir = FLAGS.log_dir # /tmp3/first350/TensorFlow/Logs
# test_dir = FLAGS.test_dir # /tmp3/first350/SegNet-Tutorial/CamVid/train.txt
test_dir = '/home/ml/song/Segnet_tensorflow_3/dataset/test.txt'
# test_ckpt = FLAGS.testing
test_ckpt = "/home/ml/song/Segnet_tensorflow_3/path_to_your_log/model.ckpt-19999.meta"
image_w = FLAGS.image_w
image_h = FLAGS.image_h
image_c = FLAGS.image_c
# testing should set BATCH_SIZE = 1
batch_size = 1
image_filenames, label_filenames = get_filename_list(test_dir)
test_data_node = tf.placeholder(
tf.float32,
shape=[batch_size, image_h, image_w, image_c])
test_labels_node = tf.placeholder(tf.int64, shape=[batch_size, 360, 480, 1])
phase_train = tf.placeholder(tf.bool, name='phase_train')
loss, logits = inference(test_data_node, test_labels_node, batch_size, phase_train)
pred = tf.argmax(logits, axis=3)
# get moving avg
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.0001)
with tf.Session() as sess:
# Load checkpoint
# saver.restore(sess, "/home/ml/song/Segnet_tensorflow/path_to_your_log/model.ckpt-19999")
# saver = tf.train.import_meta_graph('/home/ml/song/Segnet_tensorflow/Logs/model.ckpt-19999.meta')
saver.restore(sess, '/home/ml/song/Segnet_tensorflow_3/path_to_your_log/model.ckpt-19999')
images, labels = get_all_test_data(image_filenames, label_filenames)
threads = tf.train.start_queue_runners(sess=sess)
hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
for image_batch, label_batch in zip(images, labels):
feed_dict = {
test_data_node: image_batch,
test_labels_node: label_batch,
phase_train: False
}
print('*'*100)
print(type(feed_dict))
print(test_data_node.shape)
print(test_labels_node.shape)
print('*'*100)
print(' -- logits')
print(' ', logits.shape, type(logits))
print(logits[0])
print(' -- pred')
print(' ', pred.shape, type(pred))
print('')
dense_prediction, im = sess.run([logits, pred], feed_dict=feed_dict)
print(dense_prediction.shape)
print(im.shape)
print('*'*100)
# output_image to verify
if (FLAGS.save_image):
writeImage(im[0], 'testing_image.png')
# writeImage(im[0], 'out_image/'+str(image_filenames[count]).split('/')[-1])
hist += get_hist(dense_prediction, label_batch)
# count+=1
acc_total = np.diag(hist).sum() / hist.sum()
iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
print("acc: ", acc_total)
print("mean IU: ", np.nanmean(iu))
# ------------------------------------------------------------------------------------------------------
from tensorflow.python.client import device_lib
def get_available_gpus():
local_device_protos = device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
# ------------------------------------------------------------------------------------------------------
def training(FLAGS, is_finetune=False):
max_steps = FLAGS.max_steps
batch_size = FLAGS.batch_size
train_dir = FLAGS.log_dir # /tmp3/first350/TensorFlow/Logs
#image_dir = FLAGS.image_dir # /tmp3/first350/SegNet-Tutorial/CamVid/train.txt
image_dir = "/home/ml/song/Segnet_tensorflow_3/dataset/train.txt"
#val_dir = FLAGS.val_dir # /tmp3/first350/SegNet-Tutorial/CamVid/val.txt
val_dir = "/home/ml/song/Segnet_tensorflow_3/dataset/val.txt"
finetune_ckpt = FLAGS.finetune
image_w = FLAGS.image_w
image_h = FLAGS.image_h
image_c = FLAGS.image_c
startstep = 0 if not is_finetune else int(FLAGS.finetune.split('-')[-1])
image_filenames, label_filenames = get_filename_list(image_dir)
val_image_filenames, val_label_filenames = get_filename_list(val_dir)
with tf.Graph().as_default():
train_data_node = tf.compat.v1.placeholder( tf.float32, shape=[batch_size, image_h, image_w, image_c])
train_labels_node = tf.compat.v1.placeholder(tf.int64, shape=[batch_size, image_h, image_w, 1])
phase_train = tf.compat.v1.placeholder(tf.bool, name='phase_train')
global_step = tf.Variable(0, trainable=False)
# For CamVid
images, labels = CamVidInputs(image_filenames, label_filenames, batch_size)
val_images, val_labels = CamVidInputs(val_image_filenames, val_label_filenames, batch_size)
# Build a Graph that computes the logits predictions from the inference model.
loss, eval_prediction = inference(train_data_node, train_labels_node, batch_size, phase_train)
# Build a Graph that trains the model with one batch of examples and updates the model parameters.
train_op = train(loss, global_step)
saver = tf.train.Saver(tf.global_variables())
summary_op = tf.summary.merge_all()
# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.0001)
with tf.Session() as sess:
# Build an initialization operation to run below.
if (is_finetune == True):
saver.restore(sess, finetune_ckpt )
else:
init = tf.global_variables_initializer()
sess.run(init)
# Start the queue runners.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# Summery placeholders
summary_writer = tf.summary.FileWriter(train_dir, sess.graph)
average_pl = tf.compat.v1.placeholder(tf.float32)
acc_pl = tf.compat.v1.placeholder(tf.float32)
iu_pl = tf.compat.v1.placeholder(tf.float32)
average_summary = tf.summary.scalar("test_average_loss", average_pl)
acc_summary = tf.summary.scalar("test_accuracy", acc_pl)
iu_summary = tf.summary.scalar("Mean_IU", iu_pl)
for step in range(startstep, startstep + max_steps):
image_batch ,label_batch = sess.run([images, labels])
# since we still use mini-batches in validation, still set bn-layer phase_train = True
feed_dict = {
train_data_node: image_batch,
train_labels_node: label_batch,
phase_train: True
}
start_time = time.time()
_, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
# eval current training batch pre-class accuracy
pred = sess.run(eval_prediction, feed_dict=feed_dict)
per_class_acc(pred, label_batch)
if step % 100 == 0:
print("start validating.....")
total_val_loss = 0.0
hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
for test_step in range(int(TEST_ITER)):
val_images_batch, val_labels_batch = sess.run([val_images, val_labels])
_val_loss, _val_pred = sess.run([loss, eval_prediction], feed_dict={
train_data_node: val_images_batch,
train_labels_node: val_labels_batch,
phase_train: True
})
total_val_loss += _val_loss
hist += get_hist(_val_pred, val_labels_batch)
print("val loss: ", total_val_loss / TEST_ITER)
acc_total = np.diag(hist).sum() / hist.sum()
iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
test_summary_str = sess.run(average_summary, feed_dict={average_pl: total_val_loss / TEST_ITER})
acc_summary_str = sess.run(acc_summary, feed_dict={acc_pl: acc_total})
iu_summary_str = sess.run(iu_summary, feed_dict={iu_pl: np.nanmean(iu)})
print_hist_summery(hist)
print(" end validating.... ")
summary_str = sess.run(summary_op, feed_dict=feed_dict)
summary_writer.add_summary(summary_str, step)
summary_writer.add_summary(test_summary_str, step)
summary_writer.add_summary(acc_summary_str, step)
summary_writer.add_summary(iu_summary_str, step)
# Save the model checkpoint periodically.
if step % 1000 == 0 or (step + 1) == max_steps:
checkpoint_path = os.path.join(train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
coord.request_stop()
coord.join(threads)

Spyder freezes on running session in tesorflow

I am trying to build a model that classifies images from belgium traffic datasets, but spyder freezes when i try to run the optimizer and train operation in sess.run[].
i have attached the code below.
graph = tf.Graph()
with graph.as_default():
images_X = tf.compat.v1.placeholder(tf.float32,shape = [None,32,32,3])
labels_X = tf.compat.v1.placeholder(tf.int32,shape = [None])
#biasInit = tf.constant_initializer(0.1, dtype=tf.float32)
# Initializer
biasInit = tf.initializers.GlorotUniform()
#conv layer 1 - num_filtewrs = 128, kernel size = [6,6]
conv_1 = Conv2D(filters = 128,kernel_size = [6,6],bias_initializer = biasInit)(images_X)
#batch normalization
bn_1 = BatchNormalization(center = True,scale = True)(conv_1)
#maxpoloing
pool_1 = MaxPooling2D(pool_size = (2,2))(bn_1)
#conv layer 2
conv_2 = Conv2D(filters = 256,kernel_size = [6,6] , strides = (2,2),
bias_initializer=biasInit)(pool_1)
#Batch normalization 2
bn_2 = BatchNormalization(center = True,scale = True)(conv_2)
pool_2 = MaxPooling2D(pool_size = (2,2))(bn_2)
#faltten
images_flat = Flatten()(pool_2)
#dense layer - units = 512
fc_1 = Dense(units = 512,activation = 'relu')(images_flat)
bn_3 = BatchNormalization(center = True,scale = True)(fc_1)
dropout = Dropout(0.25)(bn_3)
#logits will be of the size [None,62]
logits = Dense(units = 62,activation = 'relu')(dropout)
#converting the lofits - [None,62] to labels - [None]
predicted_labels = tf.argmax(logits, axis=1)
loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits,
labels = labels_X))
update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
# Create an optimizer, which acts as the training op.
train = tf.compat.v1.train.AdamOptimizer(learning_rate=0.10).minimize(loss_op)
init_op = tf.compat.v1.global_variables_initializer()
print("images_flat: ", images_flat)
print("logits: ", logits)
print("loss: ", loss_op)
print("predicted_labels: ", predicted_labels)
# Create a session to run the graph we created.
session = tf.compat.v1.Session(graph=graph, config=tf.compat.v1.ConfigProto(log_device_placement=True))
session.run(init_op)
for i in range(10):
_, loss_value = session.run([train, loss_op], feed_dict={images_X: images_array, labels_X: labels_array})
print("Loss: ", loss_value)
when i run the program line by line, it runs well until the last for loop, when it reaches the last for loop the memory goes to 99% and the whole pc freezes.

Loss value becomes constant after some training steps while training a CNN in Tensorflow

I'm trying to develop a convolutional neural network for image classification.
Currently I am working on classifying a set from about 1000 cats and dogs images.
However, I´m stuck in the training process.
Firstly, I tried to develop my own network, preprocessing and labeling the images myself, testing with different architectures and hyperparameters using Tensorflow.
As I didn't obtain good results, I tried to create a similar network with keras, obtainig better results.
In the following code I create the trainig and validation sets for the tensorflow network:
def oneHot(img):
label = img.split('.')[-3]
if label == 'cat': return [1, 0]
elif label == 'dog': return [0, 1]
def loadData(img_dir):
global img_h
global img_w
data_set = []
for img in tqdm(os.listdir(img_dir)):
label = oneHot(img)
path = os.path.join(img_dir, img)
img = cv2.imread(path)
img = cv2.resize(img, (img_h, img_w))
data_set.append([np.array(img/255, dtype='float32'), np.array(label)])
shuffle(data_set)
return data_set
def divideSet(data_set, train_size):
len_train = int(len(data_set)*train_size)
train_set = data_set[:len_train]
valid_set = data_set[len_train:]
return train_set, valid_set
def separateArgLabel(data_set):
arg = np.array([i[0] for i in data_set])
label = np.array([i[1] for i in data_set])
return arg, label
train_set = loadData(train_dir)
train_data, valid_data = divideSet(train_set, 0.8)
x_train, y_train = separateArgLabel(train_data)
x_valid, y_valid = separateArgLabel(valid_data)
And the code that I used to build and train my model in tensorflow:
def flattenLayer(x):
layer_shape = x.get_shape()
n_input = layer_shape[1:4].num_elements()
flat_layer = tf.reshape(x,[-1,n_input])
return flat_layer
def getRandomBatch(x, y, size):
rnd_idx = np.random.choice(len(x), size)
x_batch = x[rnd_idx]
y_batch = y[rnd_idx]
return x_batch, y_batch
with tf.Session() as sess:
x = tf.placeholder(tf.float32, shape=[None,img_w,img_h,img_c])
y = tf.placeholder(tf.float32, shape=[None,2])
conv1 = tf.layers.conv2d(x, 32, [5,5], strides=1, padding='same',
activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(conv1, pool_size=[2,2], strides=2)
conv2 = tf.layers.conv2d(pool1, 64, [5,5], strides=1, padding='same',
activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(conv2, pool_size=[2,2], strides=2)
conv3 = tf.layers.conv2d(pool2, 128, [5,5], strides=1, padding='same',
activation=tf.nn.relu)
pool3 = tf.layers.max_pooling2d(conv3, pool_size=[2,2], strides=2)
conv4 = tf.layers.conv2d(pool3, 64, [5,5], strides=1, padding='same',
activation=tf.nn.relu)
pool4 = tf.layers.max_pooling2d(conv4, pool_size=[2,2], strides=2)
conv5 = tf.layers.conv2d(pool4, 32, [5,5], strides=1, padding='same',
activation=tf.nn.relu)
pool5 = tf.layers.max_pooling2d(conv5, pool_size=[2,2], strides=2)
flatten = flattenLayer(pool5)
fc1 = tf.layers.dense(flatten, 1024, activation=tf.nn.relu)
logits = tf.layers.dense(fc1, 2, activation=tf.nn.relu)
y_pred = tf.nn.softmax(logits)
cross_entropy = losses.categorical_crossentropy(y, y_pred)
loss = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(0.0005)
grads = optimizer.compute_gradients(loss)
train = optimizer.apply_gradients(grads)
y_cls = tf.arg_max(y, 1)
y_pred_cls = tf.arg_max(y_pred, 1)
correct = tf.equal(y_pred_cls, y_cls)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
sess.run(init)
for epoch in range(10):
sum_loss_train = 0
sum_acc_train = 0
for i in range(100):
batch_x, batch_y = getRandomBatch(x_train, y_train, 32)
feed_dict_train = {x:batch_x, y:batch_y}
_,loss_train,acc_train = sess.run([train,loss,accuracy],
feed_dict=feed_dict_train)
sum_loss_train += loss_train
sum_acc_train += acc_train
sys.stdout.write('\r'+str(i+1)+'/'+str(100)+'\t'+'loss: '+
str(sum_loss_train/(i+1))+' accuracy: '+str(acc_train))
sys.stdout.flush()
mean_loss_train = sum_loss_train/(i+1)
mean_acc_train = sum_acc_train/(i+1)
print("\nÉpoca: " + str(epoch+1) + " ===========> Epoch loss: " + "
{:.4f}".format(mean_loss_train))
print("\tEpoch accuracy: " + "{:.2f} %".format(mean_acc_train*100))
sum_loss_val = 0
sum_acc_val = 0
for j in range(50):
batch_x_val, batch_y_val = getRandomBatch(x_valid, y_valid, 32)
feed_dict_valid = {x:batch_x_val, y:batch_y_val}
loss_val,acc_val = sess.run([loss,accuracy],
feed_dict=feed_dict_valid)
sum_acc_val += acc_val
sum_loss_val += loss_val
mean_acc_val = sum_acc_val/(j+1)
mean_loss_val = sum_loss_val/(j+1)
print("\nValidation loss: " + "{:.4f}".format(mean_loss_val))
print("\tValidation accuracy: " + "{:.2f} %".format(mean_acc_val*100))
When I run the model, after some iterations, the gradients always became zero an the loss got stuck in a constant value.
At first I thought the network stopped learning because of the lack of images, but when I tried to train the same dataset with a network built in Keras, the results were pretty good.
I used the same number of layers, the same hiperparameters and I processed the images the same way in both cases. Although the weigth's initialization may differ, the results make me think that there is some error in the code I added.
Could someone help me with this issue?

tensorflow NaN loss during training CNN model image classification

I was following the CNN Mnist tutorial on https://www.tensorflow.org/tutorials/layers for my personal image classification task. My input image size is 224 * 224 * 3 instead of 28 * 28 from tutorial, and I have only 5 classes rather than 10. I read previous posts on this problem, and many people pointed out that either a too big learning rate or use of cross_entropy_loss could potentially be problem, but I am not sure if that is the case here.
When I started training, I immediately get this NaN loss training error:
ERROR:tensorflow:Model diverged with loss = NaN.
Traceback (most recent call last):
File "cnn_model.py", line 75, in <module>
main(sys.argv[1], sys.argv[2])
File "cnn_model.py", line 68, in main
classifier.train(input_fn = train_input_fn, steps = 2000, hooks = [logging_hook])
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 241, in train
loss = self._train_model(input_fn=input_fn, hooks=hooks)
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 612, in _train_model
_, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 505, in run
run_metadata=run_metadata)
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 842, in run
run_metadata=run_metadata)
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 798, in run
return self._sess.run(*args, **kwargs)
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 960, in run
run_metadata=run_metadata))
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\training\basic_session_run_hooks.py", line 477, in after_run
raise NanLossDuringTrainingError
tensorflow.python.training.basic_session_run_hooks.NanLossDuringTrainingError: NaN loss during training.
And below is the model code:
import tensorflow as tf
from helper import load_data_and_label
import cv2
import sys
import math
def cnn_model_fn(features, labels, mode):
#input layer
input_layer = tf.reshape(features['x'], [-1, 224, 224, 3])
#conv layer 1
conv1 = tf.layers.conv2d(inputs = input_layer, filters = 32, kernel_size
= [5,5], padding = 'same', activation = tf.nn.relu)
#pooling layer 1
pool1 = tf.layers.max_pooling2d(inputs = conv1, pool_size = [2,2], strides = 2)
#conv2 and pool2 layers
conv2 = tf.layers.conv2d(inputs = pool1, filters = 64, kernel_size = [5,5], padding = 'same', activation = tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs = conv2, pool_size = [2,2], strides = 2)
#conv3 and pool3 layers
conv3 = tf.layers.conv2d(inputs = pool2, filters = 64, kernel_size = [5,5], padding = 'same', activation = tf.nn.relu)
pool3 = tf.layers.max_pooling2d(inputs = conv3, pool_size = [2,2], strides = 2)
#conv4 and pool4 layers
conv4 = tf.layers.conv2d(inputs = pool3, filters = 64, kernel_size = [5,5], padding = 'same', activation = tf.nn.relu)
pool4 = tf.layers.max_pooling2d(inputs = conv4, pool_size = [2,2], strides = 2)
#conv5 and pool5 layers
conv5 = tf.layers.conv2d(inputs = pool4, filters = 64, kernel_size = [5,5], padding = 'same', activation = tf.nn.relu)
pool5 = tf.layers.max_pooling2d(inputs = conv5, pool_size = [2,2], strides = 2)
#dense layer
pool5_flat = tf.reshape(pool5, [-1, 7 * 7 * 64])
dense = tf.layers.dense(inputs = pool5_flat, units = 1024, activation = tf.nn.relu)
dropout = tf.layers.dropout(inputs = dense, rate = 0.5,
training = mode == tf.estimator.ModeKeys.TRAIN)
#logits layer
logits = tf.layers.dense(inputs = dropout, units = 5)
predictions = {"classes":tf.argmax(input = logits, axis = 1),
"prob": tf.nn.softmax(logits, name = 'softmax_tensor')}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode = mode, predictions = predictions)
#calculate loss
onehot_labels = tf.one_hot(indices = tf.cast(labels, tf.int32), depth = 5)
loss = tf.losses.softmax_cross_entropy(onehot_labels = onehot_labels, logits = logits)
#configure training operation
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.001)
train_op = optimizer.minimize(loss = loss, global_step = tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode = mode, loss = loss, train_op = train_op)
#evaluation metrics
eval_metrics_ops = {"accuracy": tf.metrics.accuracy(labels = labels, predictions = predictions["classes"])}
return tf.estimator.EstimatorSpec(mode = mode, loss = loss, eval_metrics_ops = eval_metrics_ops)
def main(imagepath, labelpath):
train_data, train_labels, eval_data, eval_labels = load_data_and_label(imagepath, labelpath)
classifier = tf.estimator.Estimator(model_fn = cnn_model_fn, model_dir = "/tmp/retina_convnet_model")
tensors_to_log = {"prob": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(tensors = tensors_to_log, every_n_iter = 50)
#train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(x = {"x":train_data}, y = train_labels,
batch_size = 32, num_epochs = None, shuffle = True)
classifier.train(input_fn = train_input_fn, steps = 2000, hooks = [logging_hook])
eval_input_fn = tf.estimator.inputs.numpy_input_fn(x = {"x":eval_data}, y = eval_labels, num_epochs = 1, shuffle = False)
eval_results = classifier.evaluate(input_fn = eval_input_fn)
print(eval_results)
if __name__ == "__main__":
main(sys.argv[1], sys.argv[2])
Thank you so much! Any help would be really appreciated!

Did you do any preprocessing on the images? If not, then maybe try to standardize the images in your helper function and see if that helps.

How to use a tensorflow session inside a nerual network model

I was following along with a sentdex tutorial on writing a constitutional neural network and I got to wondering if I could figure out my own pooling layer. The problem is that as part of this pooling layer I have to perform a tensorflow function using a session.
def customPool(x):
patches = tf.extract_image_patches(x, [1, 2, 2, 1], [1,2,2,1], [1,1,1,1], 'SAME')
tempSess = tf.Session()
bool1 = tempSess.run( tf.greater( tf.reduce_max(patches) , tf.contrib.distributions.percentile(patches, q=75.) ) )
tempSess.close()
if bool1:
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
else:
return tf.nn.avg_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
but the problem is, at least I think, that since I start everything out with a place holder
x = tf.placeholder('float', [None, 784])
Basically my question is: How do I compute something using a tensorflow session inside of the neural network model if it is being passed placeholder variables? Help is much appreciated!
full code:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot = True)
n_classes = 10
batch_size = 128
x = tf.placeholder('float', [None, 784])
y = tf.placeholder('float')
keep_rate = 0.8
keep_prob = tf.placeholder(tf.float32)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
# size of window movement of window
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def customPool(x):
patches = tf.extract_image_patches(x, [1, 2, 2, 1], [1,2,2,1], [1,1,1,1], 'SAME')
tempSess = tf.Session()
bool1 = tempSess.run( tf.greater( tf.reduce_max(patches) , tf.contrib.distributions.percentile(patches, q=75.) ) )
tempSess.close()
if bool1:
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
else:
return tf.nn.avg_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def convolutional_neural_network(x):
weights = {'W_conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_fc':tf.Variable(tf.random_normal([7*7*64,1024])),
'out':tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
'b_conv2':tf.Variable(tf.random_normal([64])),
'b_fc':tf.Variable(tf.random_normal([1024])),
'out':tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1, 28, 28, 1])
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
#conv1 = maxpool2d(conv1)
conv1 = customPool(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
#conv2 = maxpool2d(conv2)
conv1 = customPool(conv1)
fc = tf.reshape(conv2,[-1, 7*7*64])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
fc = tf.nn.dropout(fc, keep_rate)
output = tf.matmul(fc, weights['out'])+biases['out']
return output
def train_neural_network(x):
prediction = convolutional_neural_network(x)
cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
hm_epochs = 1
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
saver = tf.train.Saver()
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
epoch_loss += c
print('Epoch', epoch, 'completed out of',hm_epochs,'loss:',epoch_loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy:',accuracy.eval({x:mnist.test.images, y:mnist.test.labels}))
save_path = saver.save(sess, "/tmp/convnet_maxpool")
print("Model saved in file: %s" % save_path)
#sess = tf.Session()
train_neural_network(x)
#sess.close()
EDIT: after following Maxim's advice I ran it and it threw the error
_, c = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[512,10] labels_size=[128,10]
[[Node: SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Reshape_2, Reshape_3)]]
it was tracing back to:
File "conv net custom test 1.py", line 89, in <module>
train_neural_network(x)
File "conv net custom test 1.py", line 59, in train_neural_network
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )

Use tf.cond, not session:
def customPool(x):
patches = tf.extract_image_patches(x, [1, 2, 2, 1], [1,2,2,1], [1,1,1,1], 'SAME')
pred = tf.greater(tf.reduce_max(patches),
tf.contrib.distributions.percentile(patches, q=75.))
return tf.cond(pred,
lambda: tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME'),
lambda: tf.nn.avg_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME'))
Update:
You've also a copy-paste bug: it's conv1 = customPool(conv1) twice in a row, conv2 isn't downsampled, hence the dimensions error.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

TensorFlow CNN on multiple GPUs - python

Related

How to solve this Error? ValueError: Cannot feed value of shape (1, 360, 480, 3, 1) for Tensor Placeholder_1:0, which has shape (1, 360, 480, 1)

Spyder freezes on running session in tesorflow

Loss value becomes constant after some training steps while training a CNN in Tensorflow

tensorflow NaN loss during training CNN model image classification

How to use a tensorflow session inside a nerual network model

Categories

Resources