Refer to the Capsule Network Code, I am using just the classification module from the mentioned code, So following is the complete classification code that I extracted from the link.
from __future__ import division, print_function, unicode_literals
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
tf.reset_default_graph()
np.random.seed(42)
tf.set_random_seed(42)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")
X = tf.placeholder(shape=[None, 28, 28, 1], dtype=tf.float32, name="X")
caps1_n_maps = 32
caps1_n_caps = caps1_n_maps * 6 * 6 # 1152 primary capsules
caps1_n_dims = 8
conv1_params = {
"filters": 256,
"kernel_size": 9,
"strides": 1,
"padding": "valid",
"activation": tf.nn.relu,
}
conv2_params = {
"filters": caps1_n_maps * caps1_n_dims, # 256 convolutional filters
"kernel_size": 9,
"strides": 2,
"padding": "valid",
"activation": tf.nn.relu
}
conv1 = tf.layers.conv2d(X, name="conv1", **conv1_params)
conv2 = tf.layers.conv2d(conv1, name="conv2", **conv2_params)
caps1_raw = tf.reshape(conv2, [-1, caps1_n_caps, caps1_n_dims],name="caps1_raw")
def squash(s, axis=-1, epsilon=1e-7, name=None):
with tf.name_scope(name, default_name="squash"):
squared_norm = tf.reduce_sum(tf.square(s), axis=axis,keep_dims=True)
safe_norm = tf.sqrt(squared_norm + epsilon)
squash_factor = squared_norm / (1. + squared_norm)
unit_vector = s / safe_norm
return squash_factor * unit_vector
caps1_output = squash(caps1_raw, name="caps1_output")
caps2_n_caps = 10
caps2_n_dims = 16
init_sigma = 0.1
W_init = tf.random_normal(
shape=(1, caps1_n_caps, caps2_n_caps, caps2_n_dims, caps1_n_dims),
stddev=init_sigma, dtype=tf.float32, name="W_init")
W = tf.Variable(W_init, name="W")
batch_size = tf.shape(X)[0]
W_tiled = tf.tile(W, [batch_size, 1, 1, 1, 1], name="W_tiled")
caps1_output_expanded = tf.expand_dims(caps1_output, -1,
name="caps1_output_expanded")
caps1_output_tile = tf.expand_dims(caps1_output_expanded, 2,
name="caps1_output_tile")
caps1_output_tiled = tf.tile(caps1_output_tile, [1, 1, caps2_n_caps, 1, 1],
name="caps1_output_tiled")
caps2_predicted = tf.matmul(W_tiled, caps1_output_tiled,
name="caps2_predicted")
raw_weights = tf.zeros([batch_size, caps1_n_caps, caps2_n_caps, 1, 1],
dtype=np.float32, name="raw_weights")
#ROUND 1
routing_weights = tf.nn.softmax(raw_weights, dim=2, name="routing_weights")
weighted_predictions = tf.multiply(routing_weights, caps2_predicted,
name="weighted_predictions")
weighted_sum = tf.reduce_sum(weighted_predictions, axis=1, keep_dims=True,
name="weighted_sum")
caps2_output_round_1 = squash(weighted_sum, axis=-2,
name="caps2_output_round_1")
#ROUND 2
caps2_output_round_1_tiled = tf.tile(
caps2_output_round_1, [1, caps1_n_caps, 1, 1, 1],
name="caps2_output_round_1_tiled")
agreement = tf.matmul(caps2_predicted, caps2_output_round_1_tiled,
transpose_a=True, name="agreement")
raw_weights_round_2 = tf.add(raw_weights, agreement,
name="raw_weights_round_2")
routing_weights_round_2 = tf.nn.softmax(raw_weights_round_2,
dim=2,
name="routing_weights_round_2")
weighted_predictions_round_2 = tf.multiply(routing_weights_round_2,
caps2_predicted,
name="weighted_predictions_round_2")
weighted_sum_round_2 = tf.reduce_sum(weighted_predictions_round_2,
axis=1, keep_dims=True,
name="weighted_sum_round_2")
caps2_output_round_2 = squash(weighted_sum_round_2,
axis=-2,
name="caps2_output_round_2")
caps2_output = caps2_output_round_2
#ESTIMATE CLASS PROBABILITIES
def safe_norm(s, axis=-1, epsilon=1e-7, keep_dims=False, name=None):
with tf.name_scope(name, default_name="safe_norm"):
squared_norm = tf.reduce_sum(tf.square(s), axis=axis,
keep_dims=keep_dims)
return tf.sqrt(squared_norm + epsilon)
y_proba = safe_norm(caps2_output, axis=-2, name="y_proba")
y_proba_argmax = tf.argmax(y_proba, axis=2, name="y_proba")
y_pred = tf.squeeze(y_proba_argmax, axis=[1,2], name="y_pred")
y = tf.placeholder(shape=[None], dtype=tf.int64, name="y")
m_plus = 0.9
m_minus = 0.1
lambda_ = 0.5
T = tf.one_hot(y, depth=caps2_n_caps, name="T")
caps2_output_norm = safe_norm(caps2_output, axis=-2, keep_dims=True,
name="caps2_output_norm")
present_error_raw = tf.square(tf.maximum(0., m_plus - caps2_output_norm),
name="present_error_raw")
present_error = tf.reshape(present_error_raw, shape=(-1, 10),
name="present_error")
absent_error_raw = tf.square(tf.maximum(0., caps2_output_norm - m_minus),
name="absent_error_raw")
absent_error = tf.reshape(absent_error_raw, shape=(-1, 10),
name="absent_error")
L = tf.add(T * present_error, lambda_ * (1.0 - T) * absent_error,
name="L")
margin_loss = tf.reduce_mean(tf.reduce_sum(L, axis=1), name="margin_loss")
loss = tf.add(margin_loss, 0, name="loss")
correct = tf.equal(y, y_pred, name="correct")
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss, name="training_op")
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 10
batch_size = 50
restore_checkpoint = True
n_iterations_per_epoch = mnist.train.num_examples // batch_size
n_iterations_validation = mnist.validation.num_examples // batch_size
best_loss_val = np.infty
checkpoint_path = "./my_capsule_network"
with tf.Session() as sess:
if restore_checkpoint and tf.train.checkpoint_exists(checkpoint_path):
saver.restore(sess, checkpoint_path)
else:
init.run()
for epoch in range(n_epochs):
for iteration in range(1, n_iterations_per_epoch + 1):
X_batch, y_batch = mnist.train.next_batch(batch_size)
# Run the training operation and measure the loss:
_, loss_train = sess.run(
[training_op, loss],
feed_dict={X: X_batch.reshape([-1, 28, 28, 1]),
y: y_batch})
print("\rIteration: {}/{} ({:.1f}%) Loss: {:.5f}".format(
iteration, n_iterations_per_epoch,
iteration * 100 / n_iterations_per_epoch,
loss_train),
end="")
# At the end of each epoch,
# measure the validation loss and accuracy:
loss_vals = []
acc_vals = []
for iteration in range(1, n_iterations_validation + 1):
X_batch, y_batch = mnist.validation.next_batch(batch_size)
loss_val, acc_val = sess.run(
[loss, accuracy],
feed_dict={X: X_batch.reshape([-1, 28, 28, 1]),
y: y_batch})
loss_vals.append(loss_val)
acc_vals.append(acc_val)
print("\rEvaluating the model: {}/{} ({:.1f}%)".format(
iteration, n_iterations_validation,
iteration * 100 / n_iterations_validation),
end=" " * 10)
loss_val = np.mean(loss_vals)
acc_val = np.mean(acc_vals)
print("\rEpoch: {} Val accuracy: {:.4f}% Loss: {:.6f}{}".format(
epoch + 1, acc_val * 100, loss_val,
" (improved)" if loss_val < best_loss_val else ""))
# And save the model if it improved:
if loss_val < best_loss_val:
save_path = saver.save(sess, checkpoint_path)
best_loss_val = loss_val
n_iterations_test = mnist.test.num_examples // batch_size
with tf.Session() as sess:
saver.restore(sess, checkpoint_path)
loss_tests = []
acc_tests = []
for iteration in range(1, n_iterations_test + 1):
X_batch, y_batch = mnist.test.next_batch(batch_size)
loss_test, acc_test = sess.run(
[loss, accuracy],
feed_dict={X: X_batch.reshape([-1, 28, 28, 1]),
y: y_batch})
loss_tests.append(loss_test)
acc_tests.append(acc_test)
print("\rEvaluating the model: {}/{} ({:.1f}%)".format(
iteration, n_iterations_test,
iteration * 100 / n_iterations_test),
end=" " * 10)
loss_test = np.mean(loss_tests)
acc_test = np.mean(acc_tests)
print("\rFinal test accuracy: {:.4f}% Loss: {:.6f}".format(
acc_test * 100, loss_test))
Since reconstruction module is not required so in the final loss function I have set loss = 0 for the reconstruction part so that optimizer only work on the classification loss.
loss = tf.add(margin_loss, 0, name="loss")
Now the classification module is working but the accuracy is pathetically low even for MNIST dataset, I got around 10% validation accuracy, Can somebody explain to me what is the problem here because on MNIST the Capsule Networks perform well in classification as discussed in Paper?
Regards
Related
I need your help in my segnet.
I cloned this repository in git.
https://github.com/tkuanlun350/Tensorflow-SegNet
And I changed dataset to my custom data consisting of 3 classes including the background.
But This error occurred when I executed the test command.
ValueError: Cannot feed value of shape (1, 360, 480, 3, 1) for Tensor Placeholder_1:0, which has shape (1, 360, 480, 1)
How to solve this problem?
I'm not good at tensorflow. Please Help me...
Here is my code.
python 3.7
tensorflow 2.0
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
import os, sys
import numpy as np
import math
from datetime import datetime
import time
from PIL import Image
from math import ceil
from tensorflow.python.ops import gen_nn_ops
# modules
from Utils import _variable_with_weight_decay, _variable_on_cpu, _add_loss_summaries, _activation_summary, print_hist_summery, get_hist, per_class_acc, writeImage
from Inputs import *
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
os.environ["CUDA_VISIBLE_DEVICES"]="0, 1, 2, 3"
# gpus = tf.config.experimental.list_physical_devices('GPU')
gpus = tf.config.list_physical_devices('GPU')
for i in range(len(gpus)):
tf.config.experimental.set_memory_growth(gpus[i], True)
# Constants describing the training process.
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.001 # Initial learning rate.
EVAL_BATCH_SIZE = 8
BATCH_SIZE = 8
# for CamVid
IMAGE_HEIGHT = 360
IMAGE_WIDTH = 480
IMAGE_DEPTH = 3
NUM_CLASSES = 3
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 367
NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 101
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 1
TEST_ITER = NUM_EXAMPLES_PER_EPOCH_FOR_TEST / BATCH_SIZE
def msra_initializer(kl, dl):
"""
kl for kernel size, dl for filter number
"""
stddev = math.sqrt(2. / (kl**2 * dl))
return tf.truncated_normal_initializer(stddev=stddev)
def orthogonal_initializer(scale = 1.1):
''' From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
'''
def _initializer(shape, dtype=tf.float32, partition_info=None):
flat_shape = (shape[0], np.prod(shape[1:]))
a = np.random.normal(0.0, 1.0, flat_shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
# pick the one with the correct shape
q = u if u.shape == flat_shape else v
q = q.reshape(shape) #this needs to be corrected to float32
return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32)
return _initializer
def loss(logits, labels):
"""
loss func without re-weighting
"""
# Calculate the average cross entropy loss across the batch.
logits = tf.reshape(logits, (-1,NUM_CLASSES))
labels = tf.reshape(labels, [-1])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def weighted_loss(logits, labels, num_classes, head=None):
""" median-frequency re-weighting """
with tf.name_scope('loss'):
logits = tf.reshape(logits, (-1, num_classes))
epsilon = tf.constant(value=1e-10)
logits = logits + epsilon
# consturct one-hot label array
label_flat = tf.reshape(labels, (-1, 1))
# should be [batch ,num_classes]
labels = tf.reshape(tf.one_hot(label_flat, depth=num_classes), (-1, num_classes))
softmax = tf.nn.softmax(logits)
cross_entropy = -tf.reduce_sum(tf.multiply(labels * tf.log(softmax + epsilon), head), axis=[1])
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
return loss
def cal_loss(logits, labels):
loss_weight = np.array([
0.2595,
0.3826,
1.0974]) # class 0~11
labels = tf.cast(labels, tf.int32)
# return loss(logits, labels)
return weighted_loss(logits, labels, num_classes=NUM_CLASSES, head=loss_weight)
def conv_layer_with_bn(inputT, shape, train_phase, activation=True, name=None):
in_channel = shape[2]
out_channel = shape[3]
k_size = shape[0]
with tf.variable_scope(name) as scope:
kernel = _variable_with_weight_decay('ort_weights', shape=shape, initializer=orthogonal_initializer(), wd=None)
conv = tf.nn.conv2d(inputT, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [out_channel], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
if activation is True:
conv_out = tf.nn.relu(batch_norm_layer(bias, train_phase, scope.name))
else:
conv_out = batch_norm_layer(bias, train_phase, scope.name)
return conv_out
def get_deconv_filter(f_shape):
"""
reference: https://github.com/MarvinTeichmann/tensorflow-fcn
"""
width = f_shape[0]
heigh = f_shape[0]
f = ceil(width/2.0)
c = (2 * f - 1 - f % 2) / (2.0 * f)
bilinear = np.zeros([f_shape[0], f_shape[1]])
for x in range(width):
for y in range(heigh):
value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
bilinear[x, y] = value
weights = np.zeros(f_shape)
for i in range(f_shape[2]):
weights[:, :, i, i] = bilinear
init = tf.constant_initializer(value=weights,
dtype=tf.float32)
return tf.get_variable(name="up_filter", initializer=init,
shape=weights.shape)
def deconv_layer(inputT, f_shape, output_shape, stride=2, name=None):
# output_shape = [b, w, h, c]
# sess_temp = tf.InteractiveSession()
sess_temp = tf.global_variables_initializer()
strides = [1, stride, stride, 1]
with tf.variable_scope(name):
weights = get_deconv_filter(f_shape)
deconv = tf.nn.conv2d_transpose(inputT, weights, output_shape,
strides=strides, padding='SAME')
return deconv
def batch_norm_layer(inputT, is_training, scope):
return tf.cond(is_training,
lambda: tf.keras.layers.BatchNormalization(center=False)(inputT),
lambda: tf.keras.layers.BatchNormalization(center=False)(inputT))
def inference(images, labels, batch_size, phase_train):
# norm1
norm1 = tf.nn.lrn(images, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75,
name='norm1')
# conv1
conv1 = conv_layer_with_bn(norm1, [7, 7, images.get_shape().as_list()[3], 64], phase_train, name="conv1")
print(conv1.shape)
# pool1
pool1, pool1_indices = tf.nn.max_pool_with_argmax(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
# conv2
conv2 = conv_layer_with_bn(pool1, [7, 7, 64, 64], phase_train, name="conv2")
# pool2
pool2, pool2_indices = tf.nn.max_pool_with_argmax(conv2, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# conv3
conv3 = conv_layer_with_bn(pool2, [7, 7, 64, 64], phase_train, name="conv3")
# pool3
pool3, pool3_indices = tf.nn.max_pool_with_argmax(conv3, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool3')
# conv4
conv4 = conv_layer_with_bn(pool3, [7, 7, 64, 64], phase_train, name="conv4")
# pool4
pool4, pool4_indices = tf.nn.max_pool_with_argmax(conv4, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool4')
""" End of encoder """
""" start upsample """
# upsample4
# Need to change when using different dataset out_w, out_h
# upsample4 = upsample_with_pool_indices(pool4, pool4_indices, pool4.get_shape(), out_w=45, out_h=60, scale=2, name='upsample4')
upsample4 = deconv_layer(pool4, [2, 2, 64, 64], [batch_size, 45, 60, 64], 2, "up4")
# decode 4
conv_decode4 = conv_layer_with_bn(upsample4, [7, 7, 64, 64], phase_train, False, name="conv_decode4")
# upsample 3
# upsample3 = upsample_with_pool_indices(conv_decode4, pool3_indices, conv_decode4.get_shape(), scale=2, name='upsample3')
upsample3= deconv_layer(conv_decode4, [2, 2, 64, 64], [batch_size, 90, 120, 64], 2, "up3")
# decode 3
conv_decode3 = conv_layer_with_bn(upsample3, [7, 7, 64, 64], phase_train, False, name="conv_decode3")
# upsample2
# upsample2 = upsample_with_pool_indices(conv_decode3, pool2_indices, conv_decode3.get_shape(), scale=2, name='upsample2')
upsample2= deconv_layer(conv_decode3, [2, 2, 64, 64], [batch_size, 180, 240, 64], 2, "up2")
# decode 2
conv_decode2 = conv_layer_with_bn(upsample2, [7, 7, 64, 64], phase_train, False, name="conv_decode2")
# upsample1
# upsample1 = upsample_with_pool_indices(conv_decode2, pool1_indices, conv_decode2.get_shape(), scale=2, name='upsample1')
upsample1= deconv_layer(conv_decode2, [2, 2, 64, 64], [batch_size, 360, 480, 64], 2, "up1")
# decode4
conv_decode1 = conv_layer_with_bn(upsample1, [7, 7, 64, 64], phase_train, False, name="conv_decode1")
""" end of Decode """
""" Start Classify """
# output predicted class number (6)
with tf.variable_scope('conv_classifier') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[1, 1, 64, NUM_CLASSES],
initializer=msra_initializer(1, 64),
wd=0.0005)
conv = tf.nn.conv2d(conv_decode1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0))
conv_classifier = tf.nn.bias_add(conv, biases, name=scope.name)
logit = conv_classifier
loss = cal_loss(conv_classifier, labels)
return loss, logit
def train(total_loss, global_step):
total_sample = 274
num_batches_per_epoch = 274/1
""" fix lr """
lr = INITIAL_LEARNING_RATE
loss_averages_op = _add_loss_summaries(total_loss)
# Compute gradients.
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.AdamOptimizer(lr)
grads = opt.compute_gradients(total_loss)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Add histograms for trainable variables.
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
# Add histograms for gradients.
for grad, var in grads:
if grad is not None:
tf.summary.histogram(var.op.name + '/gradients', grad)
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
def test(FLAGS):
max_steps = FLAGS.max_steps
batch_size = FLAGS.batch_size
train_dir = FLAGS.log_dir # /tmp3/first350/TensorFlow/Logs
# test_dir = FLAGS.test_dir # /tmp3/first350/SegNet-Tutorial/CamVid/train.txt
test_dir = '/home/ml/song/Segnet_tensorflow_3/dataset/test.txt'
# test_ckpt = FLAGS.testing
test_ckpt = "/home/ml/song/Segnet_tensorflow_3/path_to_your_log/model.ckpt-19999.meta"
image_w = FLAGS.image_w
image_h = FLAGS.image_h
image_c = FLAGS.image_c
# testing should set BATCH_SIZE = 1
batch_size = 1
image_filenames, label_filenames = get_filename_list(test_dir)
test_data_node = tf.placeholder(
tf.float32,
shape=[batch_size, image_h, image_w, image_c])
test_labels_node = tf.placeholder(tf.int64, shape=[batch_size, 360, 480, 1])
phase_train = tf.placeholder(tf.bool, name='phase_train')
loss, logits = inference(test_data_node, test_labels_node, batch_size, phase_train)
pred = tf.argmax(logits, axis=3)
# get moving avg
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.0001)
with tf.Session() as sess:
# Load checkpoint
# saver.restore(sess, "/home/ml/song/Segnet_tensorflow/path_to_your_log/model.ckpt-19999")
# saver = tf.train.import_meta_graph('/home/ml/song/Segnet_tensorflow/Logs/model.ckpt-19999.meta')
saver.restore(sess, '/home/ml/song/Segnet_tensorflow_3/path_to_your_log/model.ckpt-19999')
images, labels = get_all_test_data(image_filenames, label_filenames)
threads = tf.train.start_queue_runners(sess=sess)
hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
for image_batch, label_batch in zip(images, labels):
feed_dict = {
test_data_node: image_batch,
test_labels_node: label_batch,
phase_train: False
}
print('*'*100)
print(type(feed_dict))
print(test_data_node.shape)
print(test_labels_node.shape)
print('*'*100)
print(' -- logits')
print(' ', logits.shape, type(logits))
print(logits[0])
print(' -- pred')
print(' ', pred.shape, type(pred))
print('')
dense_prediction, im = sess.run([logits, pred], feed_dict=feed_dict)
print(dense_prediction.shape)
print(im.shape)
print('*'*100)
# output_image to verify
if (FLAGS.save_image):
writeImage(im[0], 'testing_image.png')
# writeImage(im[0], 'out_image/'+str(image_filenames[count]).split('/')[-1])
hist += get_hist(dense_prediction, label_batch)
# count+=1
acc_total = np.diag(hist).sum() / hist.sum()
iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
print("acc: ", acc_total)
print("mean IU: ", np.nanmean(iu))
# ------------------------------------------------------------------------------------------------------
from tensorflow.python.client import device_lib
def get_available_gpus():
local_device_protos = device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
# ------------------------------------------------------------------------------------------------------
def training(FLAGS, is_finetune=False):
max_steps = FLAGS.max_steps
batch_size = FLAGS.batch_size
train_dir = FLAGS.log_dir # /tmp3/first350/TensorFlow/Logs
#image_dir = FLAGS.image_dir # /tmp3/first350/SegNet-Tutorial/CamVid/train.txt
image_dir = "/home/ml/song/Segnet_tensorflow_3/dataset/train.txt"
#val_dir = FLAGS.val_dir # /tmp3/first350/SegNet-Tutorial/CamVid/val.txt
val_dir = "/home/ml/song/Segnet_tensorflow_3/dataset/val.txt"
finetune_ckpt = FLAGS.finetune
image_w = FLAGS.image_w
image_h = FLAGS.image_h
image_c = FLAGS.image_c
startstep = 0 if not is_finetune else int(FLAGS.finetune.split('-')[-1])
image_filenames, label_filenames = get_filename_list(image_dir)
val_image_filenames, val_label_filenames = get_filename_list(val_dir)
with tf.Graph().as_default():
train_data_node = tf.compat.v1.placeholder( tf.float32, shape=[batch_size, image_h, image_w, image_c])
train_labels_node = tf.compat.v1.placeholder(tf.int64, shape=[batch_size, image_h, image_w, 1])
phase_train = tf.compat.v1.placeholder(tf.bool, name='phase_train')
global_step = tf.Variable(0, trainable=False)
# For CamVid
images, labels = CamVidInputs(image_filenames, label_filenames, batch_size)
val_images, val_labels = CamVidInputs(val_image_filenames, val_label_filenames, batch_size)
# Build a Graph that computes the logits predictions from the inference model.
loss, eval_prediction = inference(train_data_node, train_labels_node, batch_size, phase_train)
# Build a Graph that trains the model with one batch of examples and updates the model parameters.
train_op = train(loss, global_step)
saver = tf.train.Saver(tf.global_variables())
summary_op = tf.summary.merge_all()
# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.0001)
with tf.Session() as sess:
# Build an initialization operation to run below.
if (is_finetune == True):
saver.restore(sess, finetune_ckpt )
else:
init = tf.global_variables_initializer()
sess.run(init)
# Start the queue runners.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# Summery placeholders
summary_writer = tf.summary.FileWriter(train_dir, sess.graph)
average_pl = tf.compat.v1.placeholder(tf.float32)
acc_pl = tf.compat.v1.placeholder(tf.float32)
iu_pl = tf.compat.v1.placeholder(tf.float32)
average_summary = tf.summary.scalar("test_average_loss", average_pl)
acc_summary = tf.summary.scalar("test_accuracy", acc_pl)
iu_summary = tf.summary.scalar("Mean_IU", iu_pl)
for step in range(startstep, startstep + max_steps):
image_batch ,label_batch = sess.run([images, labels])
# since we still use mini-batches in validation, still set bn-layer phase_train = True
feed_dict = {
train_data_node: image_batch,
train_labels_node: label_batch,
phase_train: True
}
start_time = time.time()
_, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
# eval current training batch pre-class accuracy
pred = sess.run(eval_prediction, feed_dict=feed_dict)
per_class_acc(pred, label_batch)
if step % 100 == 0:
print("start validating.....")
total_val_loss = 0.0
hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
for test_step in range(int(TEST_ITER)):
val_images_batch, val_labels_batch = sess.run([val_images, val_labels])
_val_loss, _val_pred = sess.run([loss, eval_prediction], feed_dict={
train_data_node: val_images_batch,
train_labels_node: val_labels_batch,
phase_train: True
})
total_val_loss += _val_loss
hist += get_hist(_val_pred, val_labels_batch)
print("val loss: ", total_val_loss / TEST_ITER)
acc_total = np.diag(hist).sum() / hist.sum()
iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
test_summary_str = sess.run(average_summary, feed_dict={average_pl: total_val_loss / TEST_ITER})
acc_summary_str = sess.run(acc_summary, feed_dict={acc_pl: acc_total})
iu_summary_str = sess.run(iu_summary, feed_dict={iu_pl: np.nanmean(iu)})
print_hist_summery(hist)
print(" end validating.... ")
summary_str = sess.run(summary_op, feed_dict=feed_dict)
summary_writer.add_summary(summary_str, step)
summary_writer.add_summary(test_summary_str, step)
summary_writer.add_summary(acc_summary_str, step)
summary_writer.add_summary(iu_summary_str, step)
# Save the model checkpoint periodically.
if step % 1000 == 0 or (step + 1) == max_steps:
checkpoint_path = os.path.join(train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
coord.request_stop()
coord.join(threads)
This is the cat vs dog problem from kaggle competition. My code is look like correct but still a value error annoying me. I think I have given the correct size of the input but still the error comes.
Please help me to find out the error.
Here's my full code:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
from random import shuffle
import tensorflow
from tqdm import tqdm
TRAIN_DIR = 'C:\\Users\Kashif\PycharmProjects\DeepLearning-Tensorflow (Sentdex)\Learnings\Cat_VS_Dog\TrainingData'
TEST_DIR = 'C:\\Users\Kashif\PycharmProjects\DeepLearning-Tensorflow (Sentdex)\Learnings\Cat_VS_Dog\TestingData'
IMG_SIZE = 50
MODEL_NAME = 'dogvscat-{}-{}.model'.format(LR, '2conv-basic')
def label_img(img):
word_label = img.split('.')[-3]
if word_label == 'cat': return [1,0]
elif word_label == 'dog': return [0,1]
def create_train_data():
training_data = []
for img in tqdm(os.listdir(TRAIN_DIR)):
label = label_img(img)
path = os.path.join(TRAIN_DIR,img)
img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
training_data.append([np.array(img),np.array(label)])
shuffle(training_data)
np.save('train_data.npy', training_data)
return training_data
def process_test_data():
testing_data=[]
for img in tqdm(os.listdir(TEST_DIR)):
path = os.path.join(TEST_DIR, img)
img_num = img.split('.')[0]
img = cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE), (IMG_SIZE, IMG_SIZE))
testing_data.append([np.array(img), img_num])
np.save('test_data.npy', testing_data)
return testing_data
train_data = create_train_data()
learning_rate = 0.01
epochs = 10
batch_size = 128
n_classes = 2
drop_out = 0.8
filter_h_w = 5
depth_in = 1
depth_out_1 = 32
depth_out_2 = 64
x = tf.placeholder('float', [None, IMG_SIZE * IMG_SIZE])
y = tf.placeholder('float', [None, n_classes])
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def conv_nural_network(x):
weights = {
'W_conv1': tf.Variable(tf.random_normal([filter_h_w, filter_h_w, depth_in, depth_out_1])),
'W_conv2': tf.Variable(tf.random_normal([filter_h_w, filter_h_w, depth_out_1, depth_out_2])),
'W_fc': tf.Variable(tf.random_normal([ int(IMG_SIZE/4) * int(IMG_SIZE/4) * depth_out_2, 1024])),
'out': tf.Variable(tf.random_normal([1024, n_classes]))
}
biases = {
'b_conv1': tf.Variable(tf.random_normal([depth_out_1])),
'b_conv2': tf.Variable(tf.random_normal([depth_out_2])),
'b_fc': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
x = tf.reshape(x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool2d(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, int(IMG_SIZE/4) * int(IMG_SIZE/4) * depth_out_2])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
fc = tf.nn.dropout(fc, drop_out)
output = tf.matmul(fc, weights['out']) + biases['out']
return output
train = train_data[:-500]
test = train_data[-500:]
train_X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
train_y = [i[1] for i in train]
test_X = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
test_y = [i[1] for i in test]
def train_neural_network(x):
prediction = conv_nural_network(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
init = tf.global_variables_initializer()
loss_trace = []
accuracy_trace = []
with tf.Session() as sess:
sess.run(init)
for i in range(epochs):
sess.run(optimizer, feed_dict={x: train_X, y: train_y})
loss = sess.run(cost_function, feed_dict={x: train_X, y: train_y})
accuracy = np.mean(np.argmax(sess.run(prediction,feed_dict={x:train_X,y:train_y}),axis=1) == np.argmax(train_y,axis=1))
loss_trace.append(loss)
accuracy_trace.append(accuracy)
print('Epoch:', (i + 1), 'loss:', loss, 'accuracy:', accuracy)
print('Final training result:', 'loss:', loss, 'accuracy:', accuracy)
loss_test = sess.run(cost_function, feed_dict={x: test_X, y: test_y})
test_pred = np.argmax(sess.run(prediction, feed_dict={x: test_X, y: test_y}), axis=1)
accuracy_test = np.mean(test_pred == np.argmax(test_y, axis=1))
print('Results on test dataset:', 'loss:', loss_test, 'accuracy:', accuracy_test)
train_neural_network(x)
This error comes after that. A value error has come but I don't know where I have given the wrong input type.
ValueError Traceback (most recent call last)
<ipython-input-91-7682c5a4d0ec> in <module>
25
26
---> 27 train_neural_network(x)
<ipython-input-91-7682c5a4d0ec> in train_neural_network(x)
11 sess.run(init)
12 for i in range(epochs):
---> 13 sess.run(optimizer, feed_dict={x: train_X, y: train_y})
14 loss = sess.run(cost_function, feed_dict={x: train_X, y: train_y})
15 accuracy = np.mean(np.argmax(sess.run(prediction,feed_dict={x:train_X,y:train_y}),axis=1) == np.argmax(train_y,axis=1))
ValueError: Cannot feed value of shape (24500, 50, 50, 1) for Tensor 'Placeholder_34:0', which has shape '(?, 2500)'
I am a new with machine learning. I have a final project about prediction using two algorithms, Artificial Neural Network and Bayesian Neural Network. I want to compare the prediction result between ANN and BNN. I have finished the ANN program, but I have a problem with the BNN. I try a tutorial from this link: bayesian neural network tutorial. This is my ANN sample code to train and evaluate the model.
keep_prob = tf.placeholder("float", name="keep_prob")
x = tf.placeholder(tf.float32, [None, n_input], name="x")
y = tf.placeholder(tf.float32, name="y")
training_epochs = 5000
display_step = 1000
batch_size = 5
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=y), name="cost_function")
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001, name="Adam").minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in tqdm(range(training_epochs)):
avg_cost = 0.0
total_batch = int(len(x_train) / batch_size)
x_batches = np.array_split(x_train, total_batch)
y_batches = np.array_split(y_train, total_batch)
for i in range(total_batch):
batch_x, batch_y = x_batches[i], y_batches[i]
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, keep_prob: 0.8})
avg_cost += c / total_batch
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
print("Optimization Finished!")
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1), name="corr_pred")
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
# print('Accuracy: ', sess.run(accuracy, feed_dict={x: x_test, y: y_test}))
print("Accuracy:", accuracy.eval({x: x_test, y: y_test, keep_prob: 1.0}))
and this is my BNN code:
# Importing required libraries
from math import floor
import edward as ed
import numpy as np
import pandas as pd
import tensorflow as tf
from edward.models import Normal, NormalWithSoftplusScale
from fancyimpute import KNN
from sklearn import preprocessing
# Read data
features_dummies_nan = pd.read_csv('csv/features_dummies_with_label.csv', sep=',')
# Function: impute missing value by KNN
def impute_missing_values_by_KNN():
home_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'hp' in col]]
away_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'ap' in col]]
label_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'label' in col]]
home_filled = pd.DataFrame(KNN(3).complete(home_data))
home_filled.columns = home_data.columns
home_filled.index = home_data.index
away_filled = pd.DataFrame(KNN(3).complete(away_data))
away_filled.columns = away_data.columns
away_filled.index = away_data.index
data_frame_out = pd.concat([home_filled, away_filled, label_data], axis=1)
return data_frame_out
features_dummies = impute_missing_values_by_KNN()
target = features_dummies.loc[:, 'label'].values
data = features_dummies.drop('label', axis=1)
data = data.values
perm = np.random.permutation(len(features_dummies))
data = data[perm]
target = target[perm]
train_size = 0.9
train_cnt = floor(features_dummies.shape[0] * train_size)
x_train = data[0:train_cnt] # data_train
y_train = target[0:train_cnt] # target_train
x_test = data[train_cnt:] # data_test
y_test = target[train_cnt:] # target_test
keep_prob = tf.placeholder("float", name="keep_prob")
n_input = data.shape[1] # D
n_classes = 3
n_hidden_1 = 100 # H0
n_hidden_2 = 100 # H1
n_hidden_3 = 100 # H2
def neural_network(X, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out):
hidden1 = tf.nn.relu(tf.matmul(X, W_0) + b_0)
hidden2 = tf.nn.relu(tf.matmul(hidden1, W_1) + b_1)
hidden3 = tf.nn.relu(tf.matmul(hidden2, W_2) + b_2)
output = tf.matmul(hidden3, W_out) + b_out
return tf.reshape(output, [-1])
scaler = preprocessing.StandardScaler().fit(x_train)
data_train_scaled = scaler.transform(x_train)
data_test_scaled = scaler.transform(x_test)
W_0 = Normal(loc=tf.zeros([n_input, n_hidden_1]), scale=5.0 * tf.ones([n_input, n_hidden_1]))
W_1 = Normal(loc=tf.zeros([n_hidden_1, n_hidden_2]), scale=5.0 * tf.ones([n_hidden_1, n_hidden_2]))
W_2 = Normal(loc=tf.zeros([n_hidden_2, n_hidden_3]), scale=5.0 * tf.ones([n_hidden_2, n_hidden_3]))
W_out = Normal(loc=tf.zeros([n_hidden_3, 1]), scale=5.0 * tf.ones([n_hidden_3, 1]))
b_0 = Normal(loc=tf.zeros(n_hidden_1), scale=5.0 * tf.ones(n_hidden_1))
b_1 = Normal(loc=tf.zeros(n_hidden_2), scale=5.0 * tf.ones(n_hidden_2))
b_2 = Normal(loc=tf.zeros(n_hidden_3), scale=5.0 * tf.ones(n_hidden_3))
b_out = Normal(loc=tf.zeros(1), scale=5.0 * tf.ones(1))
qW_0 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_input, n_hidden_1])),
scale=tf.Variable(tf.random_normal([n_input, n_hidden_1])))
qW_1 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
scale=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])))
qW_2 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
scale=tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])))
qW_out = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_3, 1])),
scale=tf.Variable(tf.random_normal([n_hidden_3, 1])))
qb_0 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_1])),
scale=tf.Variable(tf.random_normal([n_hidden_1])))
qb_1 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_2])),
scale=tf.Variable(tf.random_normal([n_hidden_2])))
qb_2 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_3])),
scale=tf.Variable(tf.random_normal([n_hidden_3])))
qb_out = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([1])),
scale=tf.Variable(tf.random_normal([1])))
sigma_y = 1.0
x = tf.placeholder(tf.float32, [None, n_input])
y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)
inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
W_1: qW_1, b_1: qb_1,
W_2: qW_2, b_2: qb_2,
W_out: qW_out, b_out: qb_out}, data={x: x_train, y: y_train})
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
1000, 0.3, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)
inference.run(n_iter=5000, optimizer=optimizer, global_step=global_step)
But, I want to compare two algorithms result. So, I want to make some variables will be same between ANN and BNN, for example sum of epoch. Then I want to adapt my ANN code above for this BNN code section.
sigma_y = 1.0
x = tf.placeholder(tf.float32, [None, n_input])
y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)
inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
W_1: qW_1, b_1: qb_1,
W_2: qW_2, b_2: qb_2,
W_out: qW_out, b_out: qb_out}, data={x: x_train, y: y_train})
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
1000, 0.3, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)
inference.run(n_iter=5000, optimizer=optimizer, global_step=global_step)
I have several things that I don't understand. There is y = tf.placeholder(tf.float32, name="y") in ANN but in BNN is y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y). Then, there is scale in BNN but not in ANN. So, can I adapt my ANN train and test sample code to BNN sample code above? I want to make inference on BNN run like in sess.run() on ANN so I can count the BNN prediction accuracy result. Can I do that?
I have the following code:
import numpy as np
import matplotlib.pyplot as plt
import cifar_tools
import tensorflow as tf
data, labels = cifar_tools.read_data('C:\\Users\\abc\\Desktop\\temp')
x = tf.placeholder(tf.float32, [None, 24 * 24])
y = tf.placeholder(tf.float32, [None, 2])
w1 = tf.Variable(tf.random_normal([5, 5, 1, 64]))
b1 = tf.Variable(tf.random_normal([64]))
w2 = tf.Variable(tf.random_normal([5, 5, 64, 64]))
b2 = tf.Variable(tf.random_normal([64]))
w3 = tf.Variable(tf.random_normal([6*6*64, 1024]))
b3 = tf.Variable(tf.random_normal([1024]))
w_out = tf.Variable(tf.random_normal([1024, 2]))
b_out = tf.Variable(tf.random_normal([2]))
def conv_layer(x,w,b):
conv = tf.nn.conv2d(x,w,strides=[1,1,1,1], padding = 'SAME')
conv_with_b = tf.nn.bias_add(conv,b)
conv_out = tf.nn.relu(conv_with_b)
return conv_out
def maxpool_layer(conv,k=2):
return tf.nn.max_pool(conv, ksize=[1,k,k,1], strides=[1,k,k,1], padding='SAME')
def model():
x_reshaped = tf.reshape(x, shape=[-1,24,24,1])
conv_out1 = conv_layer(x_reshaped, w1, b1)
maxpool_out1 = maxpool_layer(conv_out1)
norm1 = tf.nn.lrn(maxpool_out1, 4, bias=1.0, alpha=0.001/9.0, beta=0.75)
conv_out2 = conv_layer(norm1, w2, b2)
maxpool_out2 = maxpool_layer(conv_out2)
norm2 = tf.nn.lrn(maxpool_out2, 4, bias=1.0, alpha=0.001/9.0, beta=0.75)
maxpool_reshaped = tf.reshape(maxpool_out2, [-1,w3.get_shape().as_list()[0]])
local = tf.add(tf.matmul(maxpool_reshaped, w3), b3)
local_out = tf.nn.relu(local)
out = tf.add(tf.matmul(local_out, w_out), b_out)
return out
model_op = model()
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(model_op, y))
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost)
correct_pred = tf.equal(tf.argmax(model_op, 1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred,tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
onehot_labels = tf.one_hot(labels, 2, on_value=1.,off_value=0.,axis=-1)
onehot_vals = sess.run(onehot_labels)
batch_size = len(data) / 200
print('batch size', batch_size)
for j in range(0, 1000):
print('EPOCH', j)
for i in range(0, len(data), batch_size):
batch_data = data[i:i+batch_size, :]
batch_onehot_vals = onehot_vals[i:i+batch_size, :]
_, accuracy_val = sess.run([train_op, accuracy], feed_dict={x: batch_data, y: batch_onehot_vals})
if i % 1000 == 0:
print(i, accuracy_val)
print('DONE WITH EPOCH')
When I run the code, I get the following error:
batch size 225.0
EPOCH 0
Traceback (most recent call last):
File "cnn.py", line 66, in <module>
for i in range(0, len(data), batch_size):
TypeError: 'float' object cannot be interpreted as an integer
How can I fix this issue?
Thanks.
You can use floor division instead:
batch_size = len(data) // 200
As bernie said, you can use floor division. That is correct but based on what you're trying to do within your for-loop, I was both answering your question and showing how it can be used in your code. Casting batch_size to an integer using int(batch_size) is the correct way to use it in your for-loop.
for i in range(0, len(data), int(batch_size)):
# process data
Not sure why this was down voted.
This question already has an answer here:
TensorFlow: slow performance when getting gradients at inputs
(1 answer)
Closed 6 years ago.
Im an electrical engineering student and im trying to model an industrial plant based on the power in a resistor inside a boiller, the temperature of the water in the boiller and the water flow passing through the boiller using python 3.5 and tensorflow.
The matter is that im a beginner at python and tensorflow and i wrote this code that works, but the trainning starts fast and rapidly start to slow down, and by the middle of the trainning its starts to taking ages between steps.
I just need some help on the optimization, and of course, any tips are welcome!
Thank you very much!
Here is the code:
import numpy as np
import tensorflow as tf
input_vec_size = 3
step_size = 0.05
batch_size = 3
test_size = 16
train_end = 1905
eval_end = 290
predict_end = 1396
n_cores = 4
def read_my_file_format(filename_queue):
line_reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = line_reader.read(filename_queue)
record_defaults = [[0.0], [0.0], [0.0], [0.0]]
time, power_in, temperature, flow = \
tf.decode_csv(csv_row, record_defaults=record_defaults)
features = tf.pack([
power_in,
temperature
])
return features, flow
def input_pipeline(directory, batch_size, n_cores, buffer_size, num_epochs=None):
filename_queue = tf.train.string_input_producer(
tf.train.match_filenames_once(directory),
shuffle=True)
features, flow = read_my_file_format(filename_queue)
x, y = tf.train.batch(
[features, flow], batch_size=batch_size, allow_smaller_final_batch=True)
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.001))
def init_bias(shape): #inicializa bias
initial = tf.constant(0.001, shape=shape) #variancia 0.1
return tf.Variable(initial)
def model(X, w_h, w_h2, w_o, B, B2, B3, p_keep_input, p_keep_hidden):
X = tf.nn.dropout(X, p_keep_input)
h = tf.nn.relu(tf.matmul(X, w_h)+B)
h = tf.nn.dropout(h, p_keep_hidden)
h2 = tf.nn.relu(tf.matmul(h, w_h2)+B2)
h2 = tf.nn.dropout(h2, p_keep_hidden)
return tf.matmul(h2, w_o)+B3
X = tf.placeholder("float", [None, input_vec_size])
Y = tf.placeholder("float", [None, 1])
p_keep_hidden = tf.placeholder("float")
p_keep_input = tf.placeholder("float")
w_h = init_weights([input_vec_size, fclayer_size])
w_h2= init_weights([fclayer_size, fclayer_size])
w_o= init_weights([fclayer_size, 1])
B = init_bias([fclayer_size])
B2 = init_bias([fclayer_size])
B3 = init_bias([1])
py_x = model(X, w_h, w_h2, w_o, B, B2, B3, p_keep_input, p_keep_hidden)
predict_op = py_x[0]
cost = tf.reduce_mean(tf.square(predict_op - Y))
train_op = tf.train.MomentumOptimizer(step_size, 0.5).minimize(cost)
saver = tf.train.Saver()
directory = "./train/*.csv"
x, y = input_pipeline(directory, batch_size, n_cores, buffer_size, num_epochs=None)
directory_eval = "./eval/*.csv"
xe, ye = input_pipeline(directory_eval, test_size, n_cores, buffer_size, num_epochs=None)
directory_predict = "./predict/*.csv"
xp, yp = input_pipeline(directory_predict, test_size, n_cores, buffer_size, num_epochs=None)
with tf.Session() as sess:
tf.initialize_all_variables().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
print("==================================TREINAMENTO=================================")
for iteraction in range(int(train_end/batch_size)):
trX, trY = sess.run([x,y])
for i in range(0, batch_size):
features, features_past, features_past2 = sess.run(tf.unpack(trX[i])), sess.run(tf.unpack(trX[i-1])), sess.run(tf.unpack(trX[i-2]))
power_in_i = features[0] - 4
temperature_i = features[1]
temperature_i1 = features_past[1]
temperature_i2 = features_past2[1]
trX_now = tf.pack([power_in_i, (temperature_i-temperature_i1), (temperature_i-temperature_i2)])
trX_now = sess.run(trX_now)
X_Batch, Y_Batch = trX_now.reshape([-1, input_vec_size]), trY[i].reshape([-1, 1])
sess.run(train_op, feed_dict={X: X_Batch,
Y: Y_Batch, p_keep_input: 0.95, p_keep_hidden: 0.7})
if(i%batch_size == 0):
predict_train = sess.run(tf.reshape(predict_op, [-1, 1]), feed_dict={X: X_Batch, p_keep_input: 1.0, p_keep_hidden: 1.0})
train_cost = sess.run(cost, feed_dict={py_x: predict_train, Y: Y_Batch})
print("Train Batch:", iteraction,"Sample:", batch_size*iteraction, "X:", X_Batch, "Y:", Y_Batch, "y_:",
predict_train, "Cost:", train_cost)
saver.save(sess, "./model.ckpt")
print('Variaveis salvas com sucesso')
coord.request_stop()
coord.join(threads)
sess.close()
print('=============================Fim do Treinamento=============================')
with tf.Session() as sess:
tf.initialize_all_variables().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
print("==============================VALIDAÇAO==================================")
saver.restore(sess, "./model.ckpt")
print("Model restored.")
for iteraction in range(int(eval_end/test_size)):
teX, teY = sess.run([xe, ye])
for i in range(0, test_size):
features, features_past, features_past2 = sess.run(tf.unpack(teX[i])), sess.run(tf.unpack(teX[i - 1])), sess.run(tf.unpack(teX[i-2]))
power_in_i = features[0] - 4
temperature_i = features[1]
temperature_i1 = features_past[1]
teX_now = tf.pack([power_in_i, (temperature_i - temperature_i1), (temperature_i-temperature_i2)])
teX_now = sess.run(teX_now)
X_Batch, Y_Batch = teX_now.reshape([-1, input_vec_size]), teY[i].reshape([-1, 1])
predict_eval = sess.run(tf.reshape(predict_op, [-1, 1]), feed_dict={X: X_Batch, p_keep_input: 1.0, p_keep_hidden: 1.0})
eval_cost = sess.run(cost, feed_dict={py_x: predict_eval, Y: Y_Batch})
print("Eval Batch:", iteraction,"Sample:", batch_size*iteraction, "X:", X.eval(feed_dict={X: X_Batch}), "Y:", Y_Batch, "y_:",
predict_eval, "Cost:", eval_cost)
coord.request_stop()
coord.join(threads)
sess.close()
print('=============================FIM DA VALIDAÇAO=============================')
with tf.Session() as sess:
tf.initialize_all_variables().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
print("==============================PREDIÇÃO==================================")
saver.restore(sess, "./model.ckpt")
print("Model restored.")
predict_batch_mean = 0
predict_mean = 0
for iteraction in range(int(predict_end / test_size)):
tpX, tpY = sess.run([xp, yp])
for i in range(0, test_size):
features, features_past, features_past2 = sess.run(tf.unpack(tpX[i])), sess.run(tf.unpack(tpX[i - 1])), sess.run(tf.unpack(tpX[i-2]))
power_in_i = features[0]- 4
temperature_i = features[1]
temperature_i1 = features_past[1]
tpX_now = tf.pack([power_in_i, (temperature_i - temperature_i1), (temperature_i-temperature_i2)])
tpX_now = sess.run(tpX_now)
X_Batch, Y_Batch = tpX_now.reshape([-1, input_vec_size]), tpY[i].reshape([-1, 1])
prediction = sess.run(tf.reshape(predict_op, [-1, 1]), feed_dict={X: X_Batch, p_keep_input: 1.0, p_keep_hidden: 1.0})
print("Predict Batch:", iteraction,"Sample:", batch_size*iteraction, "X:", X.eval(feed_dict={X: X_Batch}), "y_:",
prediction)
predict_batch_mean = (predict_batch_mean + prediction)/i
predict_mean = (predict_mean + predict_batch_mean)/iteraction
print("Predicted Flow:", predict_mean)
coord.request_stop()
coord.join(threads)
sess.close()
My quick guess is that you are creating a lot of new nodes in each iteration through your training: those tf.packs and tf.reshapes are just making your graph bigger and bigger.
Construct the graph once outside the training loop, and I'll bet everything gets happy.
Pandas saved me this time, and im already in love with it!
After some learning, there is the working code. Now its fast, aside of the "not that good" prediction accuracy yet.
Heres the code:
import numpy as np
import pandas as pd
import tensorflow as tf
#VARIAVEIS
input_vec_size = 6
layer1_size = 512
fclayer_size = 1024
step_size = 0.02
test_size = 16
train_end = 1905
eval_end = 290
predict_end = 1396
#READS TRAIN FILE
def read_data(directory):
data=pd.read_csv(directory, sep=',',header=None)
return data
#Batch Maker
def get_batch(data, i, data_size):
j = i + (input_vec_size - 1 - data_size)*(i//(data_size - input_vec_size + 1)) + input_vec_size - 1
# print(j, i//(data_size - 5))
features = [(data[1][j] - 4) / 16,
(data[2][j] - data[2][j - 1])*10,
(data[2][j] - data[2][j - 2])*10,
(data[2][j] - data[2][j - 3])*10,
(data[2][j] - data[2][j - 4])*10,
(data[2][j] - data[2][j - 5])*10]
features = np.reshape(features, [-1, input_vec_size])
flow = data[3][j]/1500
flow = np.reshape(flow, [-1,1])
return features, flow
#Inicializaçao de variaveis
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.001))
def init_bias(shape): #inicializa bias
initial = tf.constant(0.001, shape=shape) #variancia 0.1
return tf.Variable(initial)
#Definindo Modelo DNN
def model(X, w_h, w_h2, w_o, B, B2, p_keep_input, p_keep_hidden):
X = tf.nn.dropout(X, p_keep_input)
h = tf.nn.relu(tf.matmul(X, w_h)+B)
h = tf.nn.dropout(h, p_keep_hidden)
h2 = tf.nn.relu(tf.matmul(h, w_h2))
h2 = tf.nn.dropout(h2, p_keep_hidden)
return tf.matmul(h2, w_o)
#PLaceholders
X = tf.placeholder("float", [None, input_vec_size])
Y = tf.placeholder("float", [None, 1])
p_keep_hidden = tf.placeholder("float")
p_keep_input = tf.placeholder("float")
#Estados iniciais das variaveis
w_h = init_weights([input_vec_size, layer1_size])
w_h2= init_weights([layer1_size, fclayer_size])
w_o= init_weights([fclayer_size, 1])
B = init_bias([layer1_size])
B2 = init_bias([fclayer_size])
#Modelo
py_x = model(X, w_h, w_h2, w_o, B, B2, p_keep_input, p_keep_hidden)
#Operaçao de previsão
predict_op = tf.reshape(py_x[0], [-1,1])
#Funçao custo
cost = tf.reduce_mean(tf.square(predict_op - Y))
#Operação de treinamento
train_op = tf.train.AdadeltaOptimizer(step_size).minimize(cost)
#Utilizado para salvar as variaveis apos o treinamento
saver = tf.train.Saver()
with tf.Session() as sess:
tf.initialize_all_variables().run()
directory = '~/PycharmProjects/modelagemELT430/train/G2.csv'
data = read_data(directory)
for i in range(0, 10*(train_end - input_vec_size + 1)):
features, flow = get_batch(data, i, train_end)
# features = sess.run(features)
sess.run(train_op, feed_dict={X: features,
Y: flow, p_keep_input: 0.9, p_keep_hidden: 0.6})
predict_train = sess.run(predict_op,
feed_dict={X: features, p_keep_input: 1.0, p_keep_hidden: 1.0})
train_cost = sess.run(cost, feed_dict={py_x: predict_train, Y: flow})
print("Train Sample:", i, "X:", features, "Y:", flow*1500, "y_:",
predict_train*1500, "Cost:", train_cost)
saver.save(sess, "./model.ckpt")
print('Variaveis salvas com sucesso')
sess.close()
print('=============================Fim do Treinamento=============================')
with tf.Session() as sess:
tf.initialize_all_variables().run()
directory = '~/PycharmProjects/modelagemELT430/eval/G2E.csv'
data = read_data(directory)
print("==============================VALIDAÇAO==================================")
saver.restore(sess, "./model.ckpt")
print("Model restored.")
for i in range(0, eval_end - input_vec_size + 1):
features, flow = get_batch(data, i, eval_end)
predict_eval = sess.run(predict_op,
feed_dict={X: features, p_keep_input: 1.0, p_keep_hidden: 1.0})
eval_cost = sess.run(cost, feed_dict={py_x: predict_eval, Y: flow})
print("Eval Sample:", i, "X:", features, "Y:",flow*1500, "y_:",predict_eval*1500, "Cost:", eval_cost)
sess.close()
print('============================Fim da Validação=================================')
with tf.Session() as sess:
tf.initialize_all_variables().run()
directory = '~/PycharmProjects/modelagemELT430/predict/G2P.csv'
data = read_data(directory)
print("==============================Predição==================================")
saver.restore(sess, "./model.ckpt")
print("Model restored.")
for i in range(0, predict_end - input_vec_size + 1):
features, flow = get_batch(data, i, predict_end)
predict = sess.run(predict_op,
feed_dict={X: features, p_keep_input: 1.0, p_keep_hidden: 1.0})
eval_cost = sess.run(cost, feed_dict={py_x: predict, Y: flow})
print("Predict Sample:", i, "X:", features, "y_:",predict*1500)
sess.close()
print('============================Fim da Prediçao=================================')