tensorflow multi-gpu mnist example, loss does not decrease

tensorflow multi-gpu mnist example, loss does not decrease - python

I'm trying to write my own mnist example which could use all the two gpu of one machine.
It is a simple multi-layer perceptron.
Here is my code. You can run it directly.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
learning_rate = 0.001
training_steps = 100000
batch_size = 100
display_step = 100
n_hidden_1 = 256
n_hidden_2 = 256
n_input = 784
n_classes = 10
def _variable_on_cpu(name, shape, initializer):
with tf.device('/cpu:0'):
dtype = tf.float32
var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
return var
def build_model():
def multilayer_perceptron(x, weights, biases):
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
with tf.variable_scope('aaa'):
weights = {
'h1': _variable_on_cpu('h1',[n_input, n_hidden_1],tf.constant_initializer(0.0)),
'h2': _variable_on_cpu('h2',[n_hidden_1, n_hidden_2],tf.constant_initializer(0.0)),
'out': _variable_on_cpu('out_w',[n_hidden_2, n_classes],tf.constant_initializer(0.0))
}
biases = {
'b1': _variable_on_cpu('b1',[n_hidden_1],tf.constant_initializer(0.0)),
'b2': _variable_on_cpu('b2',[n_hidden_2],tf.constant_initializer(0.0)),
'out': _variable_on_cpu('out_b',[n_classes],tf.constant_initializer(0.0))
}
pred = multilayer_perceptron(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
return cost
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
grads = []
for g,_ in grad_and_vars:
expanded_g = tf.expand_dims(g, 0)
grads.append(expanded_g)
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
with tf.Graph().as_default(), tf.device('/cpu:0'):
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
tower_grads = []
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
with tf.variable_scope(tf.get_variable_scope()):
for i in xrange(2):
with tf.device('/gpu:%d' % i):
cost = build_model()
tf.get_variable_scope().reuse_variables()
grads = optimizer.compute_gradients(cost)
tower_grads.append(grads)
grads = average_gradients(tower_grads)
apply_gradient_op = optimizer.apply_gradients(grads)
train_op = apply_gradient_op
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for step in range(training_steps):
image_batch, label_batch = mnist.train.next_batch(batch_size)
_, cost_print = sess.run([train_op, cost],
{x:image_batch,
y:label_batch})
if step % display_step == 0:
print("step=%04d" % (step+1)+ " cost=" + str(cost_print))
print("Optimization Finished!")
sess.close()
The print info looks like:
step=0001 cost=2.30258
step=0101 cost=2.30246
step=0201 cost=2.30128
step=0301 cost=2.30376
step=0401 cost=2.29817
step=0501 cost=2.2992
step=0601 cost=2.3104
step=0701 cost=2.29995
step=0801 cost=2.29802
step=0901 cost=2.30524
step=1001 cost=2.29673
step=1101 cost=2.30016
step=1201 cost=2.31057
step=1301 cost=2.29815
step=1401 cost=2.29669
step=1501 cost=2.30345
step=1601 cost=2.29811
step=1701 cost=2.30867
step=1801 cost=2.30757
step=1901 cost=2.29716
step=2001 cost=2.30394
The loss doesn't decrease. I don't know how to fix it.
By the way, GPU-Util is about 26% and 26%. How to increase the GPU-Util?

The problem is that,
I should use tf.constant_initializer(0.1) for the weights instead of tf.constant_initializer(0)

Related

Tensorflow: Same input data, different output

After training the model, I save it and load to make some tests. But every time I reload the model I get a different accuracy and results with the exactly same input data. After training the model I print the accuracy and it always gets a nice value (0.8 ~ 0.9), but when I reload it goes down to something like (0.1 ~ 0.5) - I dont know if it is something related to the problem btw thats weird.
import tensorflow as tf
import numpy as np
import json
n_nodes_hl1 = 1600
n_nodes_hl2 = 800
n_nodes_hl3 = 400
n_nodes_hl4 = 200
n_classes = 4
batch_size = 50
input_lenght = 65
x = tf.placeholder('float', [None, input_lenght])
y = tf.placeholder('float')
def train_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(cost)
hm_epochs = 20000
saver = tf.train.Saver()
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
epoch = 0
for epoch in range(hm_epochs):
epoch_cost = 0
i = 0
while i < len(train_x):
start = i
end = i + batch_size
batch_x = np.array(train_x[start:end])
batch_y = np.array(train_y[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
epoch_cost += c
i += batch_size
save_path = saver.save(sess, "drive/My Drive/datasets/tensorflow/model")
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print("accuracy:", accuracy.eval({x: test_x, y: test_y}, session=sess))
sess.close()
def group_test_train(features_data, labels_data, test_size):
featureset = []
for i in range(test_size):
featureset += [[features_data[i], labels_data[i]]]
featureset = np.array(featureset)
np.random.shuffle(featureset)
train_x = list(featureset[:, 0][:test_size // 2])
train_y = list(featureset[:, 1][:test_size // 2])
test_x = list(featureset[:, 0][test_size // 2:])
test_y = list(featureset[:, 1][test_size // 2:])
return train_x, train_y, test_x, test_y
def neural_network_model(data):
hidden1 = {'weights': tf.Variable(tf.random_uniform([input_lenght, n_nodes_hl1], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))
}
hidden2 = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl1, n_nodes_hl2], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))
}
hidden3 = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl2, n_nodes_hl3], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))
}
hidden4 = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl3, n_nodes_hl4], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl4]))
}
l_output = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl4, n_classes], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_classes]))
}
l1 = tf.add(tf.matmul(data, hidden1['weights']), hidden1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hidden2['weights']), hidden2['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, hidden3['weights']), hidden3['biases'])
l3 = tf.nn.relu(l3)
l4 = tf.add(tf.matmul(l3, hidden4['weights']), hidden4['biases'])
l4 = tf.nn.relu(l4)
output = tf.add(tf.matmul(l4, l_output['weights']), l_output['biases'])
return output
version = 'end'
with open('drive/My Drive/datasets/json/' + 'data-'+ version +'.json') as json_file:
x_, y_ = json.load(json_file)
train_x, train_y, test_x, test_y = group_test_train(x_, y_, len(x_) )
train_network(x)
Every time I run this part down bellow the accuracy changes and the output as well.
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(cost)
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
new_saver = tf.train.import_meta_graph('drive/My Drive/datasets/tensorflow/model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('drive/My Drive/datasets/tensorflow/'))
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print("accuracy:", accuracy.eval({x: train_x, y: train_y}, session=sess))

Generate Histogram of TensorFlow Predictions

I wish to log the predictions every N epochs\iterations and generate a histogram for each class. My question is how do I log the predictions into an array, including the label in order to generate the histograms?
How do I make sure it happens of every N epochs\iterations?
I have edited the post to add the code so you will be able to see what I am talking about. The last 2 code chunks should somehow be used for what I requested.
Thanks in advance!
import tensorflow as tf
import numpy as np
import math
from random import random
from array import array
from ROOT import TFile, TTree, TH1D, TH2D, TBranch, vector
NUM_EXAMPLES = 1.6e4
TRAIN_SPLIT = .8
MINI_BATCH_SIZE = 1000
#NUM_EPOCHS = 3500
F_PATH = "/home/cauchy/Documents/Machine_Learning"
F_TEST = []
F_TEST += ["d3pd-ckt12rmd2030pp-G_ww_qqqq_%d%d00.root" % (1,2)]
F_TEST += ["d3pd-ckt12rmd2030pp-pyj%d.root" % (4)]
F_TEST += ["d3pd-ckt12rmd2030pp-pyj%d.root" % (5)]
F_TEST += ["d3pd-ckt12rmd2030pp-pyj%d.root" % (6)]
F_TEST += ["d3pd-ckt12rmd2030pp-pyj%d.root" % (7)]
#CALIBRATION_TARGET = "pt" # you can use pt,m,eta
INPUTS = ['m', 'grootau21', 'ysfilt', 'ungrngtrk'] # Removed pt
PT_MIN = 450 #for file 1200
PT_MAX = 730 #for file 1200
F_OUTPUT = "G1200_signaltobackground_from_pt_mass_ysfilt_grootau21_ungrngtrk.root"
N_INPUTS = len(INPUTS)
#============== inputs / target ====================================
jet_features = []
target = []
#=================== branches for training and validation ===========
pt = []
m = []
grootau21 =[]
ysfilt = []
ungrngtrk = []
#weight = []
#================ Prepare the dataset ========================
# I need to change the data to include the multiplication by the weight (constant)
for fi in F_TEST: #Should it include background AND signal files? Yes.
current_e = 0
f = TFile(F_PATH + '/' + fi, 'read')
t = TTree()
f.GetObject("dibjet", t) # Changed from "Tree" to "dibjet"
for entry in t:
current_e += 1
if current_e > NUM_EXAMPLES: # NUM_EXAMPLES should change for the different files
break
if (t.jet1_pt > PT_MAX or t.jet1_pt < PT_MIN):
continue
tmp = []
if 'pt' in INPUTS: tmp += [t.jet1_pt / MAX_PT] #for file 1200
if 'm' in INPUTS: tmp += [t.jet1_m / 500] #for file 1200
if 'grootau21' in INPUTS: tmp += [t.jet1_grootau21]
if 'ysfilt' in INPUTS: tmp += [t.jet1_ysfilt]
if 'ungrngtrk' in INPUTS: tmp += [t.jet1_ungrngtrk / 110] #for file 1200
# We need only look at the class {background, signal} of the entry in terms of target
jet_features += [tmp]
# One-hot encoder
if fi == 'd3pd-ckt12rmd2030pp-G_ww_qqqq_1200.root': target += [[1, 0]]
else: target += [[0, 1]]
pt += [t.jet1_pt]
m += [t.jet1_m]
grootau21 += [t.jet1_grootau21]
ysfilt += [t.jet1_ysfilt]
ungrngtrk += [t.jet1_ungrngtrk]
#weight += [t.weight]
######################################
###### prepare inputs for NN #########
trainset = list(zip(jet_features, target)) # remove ref_target?
np.random.shuffle(trainset)
jet_features, target = zip(*trainset) # What does this line do? Rearranges jetmoments\target...
total_sample = len(target)
train_size = int(total_sample*TRAIN_SPLIT)
all_x = np.float32((jet_features)) # Converts the list type? Why double paranthesis?
all_y = np.float32(target)
train_x = all_x[:train_size] # Create training\testing partitions?
test_x = all_x[train_size:]
train_y = all_y[:train_size]
test_y = all_y[train_size:]
# Define important parameters and variable to work with the tensors
learning_rate = 0.3
training_epochs = 500
cost_history = np.empty(shape=[1], dtype=float)
n_dim = N_INPUTS
#print("n_dim", n_dim)
n_class = 2
model_path = "/home/cauchy/Documents/TensorFlow/Cuts_W" # Forgot what this path is used for
# Define the number of hidden layers and number of neurons for each layer
n_hidden_1 = 10
n_hidden_2 = 10
n_hidden_3 = 10
n_hidden_4 = 10
x = tf.placeholder(tf.float32, [None, n_dim])
W = tf.Variable(tf.zeros([n_dim, n_class]))
b = tf.Variable(tf.zeros([n_class]))
y_ = tf.placeholder(tf.float32, [None, n_class]) # Should we use a vector instead with 1 for signal and 0 for background?
# Define the model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with sigmoid activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.sigmoid(layer_1)
# Hidden layer with sigmoid activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.sigmoid(layer_2)
# Hidden layer with sigmoid activation
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3 = tf.nn.sigmoid(layer_3)
# Hidden layer with ReLU activation
layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
layer_4 = tf.nn.relu(layer_4)
# Output layer with linear activation
out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
return out_layer
# Define the weights and the biases for each layer
weights = {
'h1': tf.Variable(tf.truncated_normal([n_dim, n_hidden_1])),
'h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2])),
'h3': tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3])),
'h4': tf.Variable(tf.truncated_normal([n_hidden_3, n_hidden_4])),
'out': tf.Variable(tf.truncated_normal([n_hidden_4, n_class]))
}
biases = {
'b1': tf.Variable(tf.truncated_normal([n_hidden_1])),
'b2': tf.Variable(tf.truncated_normal([n_hidden_2])),
'b3': tf.Variable(tf.truncated_normal([n_hidden_3])),
'b4': tf.Variable(tf.truncated_normal([n_hidden_4])),
'out': tf.Variable(tf.truncated_normal([n_class]))
}
# Initialize all the variables
init = tf.global_variables_initializer()
saver = tf.train.Saver()
# Call your model defined
y = multilayer_perceptron(x, weights, biases)
# Define the cost function and optimizer
cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
training_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
sess = tf.Session
sess.run(init)
# Calculate the cost and the accuracy for each epoch
mse_history = [] # mean squared error
accuracy_history = []
for epoch in range(training_epochs):
sess.run(training_step, feed_dict={x: train_x, y_: train_y})
cost = sess.run(cost_function, feed_dict={x: train_x, y_: train_y})
cost_history = np.append(cost_history, cost)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# print("Accuracy: ", (sess.run(accuracy, feed_dict={x:test_x, y_:test_y})))
pred_y = sess.run(y, feed_dict={x: test_x})
mse = tf.reduce_mean(tf.square(pred_y - test_y))
mse_ = sess.run(mse)
mse_history.append(mse_)
accuracy = (sess.run(accuracy, feed_dict={x: train_x, y_: train_y}))
accuracy_history.append(accuracy)
print('epoch: ', epoch, ' - ','cost: ', cost, " - MSE: ", mse_, "- Train Accuracy: ", accuracy)
save_path = saver.save(sess, model_path)
print("Model saved in file: %s" % save_path)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Test Accuracy: ", (sess.run(accuracy, feed_dict={x: test_x, y_: test_y})))
# Print the final mean square error
pred_y = sess.run(y, feed_dict={x: test_x})
mse = tf.reduce_mean(tf.square(pred_y - test_y))
print("MSE: $.4f" % sess.run(mse))
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Set up logging for predictions
# Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)

How to solve: "ValueError: Validation size should be between 0 and 0. Received: 5000."?

I am trying to make a character recognition classifier for bangla alphabets. The images are size of 50x50. There are in total of 50 classes. Using the below CNN model to train but I am encountering this error: "ValueError: Validation size should be between 0 and 0. Received: 5000."
How do I resolve this?
MODEL
# Python 3.6.0
# tensorflow 1.1.0
import os
import os.path as path
import tensorflow as tf
from tensorflow.python.tools import freeze_graph
from tensorflow.python.tools import optimize_for_inference_lib
from tensorflow.examples.tutorials.mnist import input_data
MODEL_NAME = 'mnist_convnet'
NUM_STEPS = 3000
BATCH_SIZE = 16
def model_input(input_node_name, keep_prob_node_name):
x = tf.placeholder(tf.float32, shape=[None, 50*50], name=input_node_name)
keep_prob = tf.placeholder(tf.float32, name=keep_prob_node_name)
y_ = tf.placeholder(tf.float32, shape=[None, 50])
return x, keep_prob, y_
def build_model(x, keep_prob, y_, output_node_name):
x_image = tf.reshape(x, [-1, 50, 50, 1])
# 50*50*1
conv1 = tf.layers.conv2d(x_image, 64, 3, 1, 'same', activation=tf.nn.relu)
# 50*50*64
pool1 = tf.layers.max_pooling2d(conv1, 2, 2, 'same')
# 14*14*64
conv2 = tf.layers.conv2d(pool1, 128, 3, 1, 'same', activation=tf.nn.relu)
# 14*14*128
pool2 = tf.layers.max_pooling2d(conv2, 2, 2, 'same')
# 7*7*128
conv3 = tf.layers.conv2d(pool2, 256, 3, 1, 'same', activation=tf.nn.relu)
# 7*7*256
pool3 = tf.layers.max_pooling2d(conv3, 2, 2, 'same')
# 4*4*256
flatten = tf.reshape(pool3, [-1, 4*4*256])
fc = tf.layers.dense(flatten, 1024, activation=tf.nn.relu)
dropout = tf.nn.dropout(fc, keep_prob)
logits = tf.layers.dense(dropout, 50)
outputs = tf.nn.softmax(logits, name=output_node_name)
# loss
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=logits))
# train step
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
# accuracy
correct_prediction = tf.equal(tf.argmax(outputs, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("loss", loss)
tf.summary.scalar("accuracy", accuracy)
merged_summary_op = tf.summary.merge_all()
return train_step, loss, accuracy, merged_summary_op
def train(x, keep_prob, y_, train_step, loss, accuracy,
merged_summary_op, saver):
print("training start...")
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
tf.train.write_graph(sess.graph_def, 'out',
MODEL_NAME + '.pbtxt', True)
# op to write logs to Tensorboard
summary_writer = tf.summary.FileWriter('logs/',
graph=tf.get_default_graph())
for step in range(NUM_STEPS):
batch = mnist.train.next_batch(BATCH_SIZE)
if step % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={
x: batch[0], y_: batch[1], keep_prob: 1.0})
print('step %d, training accuracy %f' % (step, train_accuracy))
_, summary = sess.run([train_step, merged_summary_op],
feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})
summary_writer.add_summary(summary, step)
saver.save(sess, 'out/' + MODEL_NAME + '.chkp')
test_accuracy = accuracy.eval(feed_dict={x: mnist.test.images,
y_: mnist.test.labels,
keep_prob: 1.0})
print('test accuracy %g' % test_accuracy)
print("training finished!")
def export_model(input_node_names, output_node_name):
freeze_graph.freeze_graph('out/' + MODEL_NAME + '.pbtxt', None, False,
'out/' + MODEL_NAME + '.chkp', output_node_name, "save/restore_all",
"save/Const:0", 'out/frozen_' + MODEL_NAME + '.pb', True, "")
input_graph_def = tf.GraphDef()
with tf.gfile.Open('out/frozen_' + MODEL_NAME + '.pb', "rb") as f:
input_graph_def.ParseFromString(f.read())
output_graph_def = optimize_for_inference_lib.optimize_for_inference(
input_graph_def, input_node_names, [output_node_name],
tf.float32.as_datatype_enum)
with tf.gfile.FastGFile('out/opt_' + MODEL_NAME + '.pb', "wb") as f:
f.write(output_graph_def.SerializeToString())
print("graph saved!")
def main():
if not path.exists('out'):
os.mkdir('out')
input_node_name = 'input'
keep_prob_node_name = 'keep_prob'
output_node_name = 'output'
x, keep_prob, y_ = model_input(input_node_name, keep_prob_node_name)
train_step, loss, accuracy, merged_summary_op = build_model(x, keep_prob, y_, output_node_name)
saver = tf.train.Saver()
train(x, keep_prob, y_, train_step, loss, accuracy, merged_summary_op, saver)
export_model([input_node_name, keep_prob_node_name], output_node_name)
if __name__ == '__main__':
main()
ERROR
ValueError Traceback (most recent call last)
<ipython-input-2-2015e0ea466d> in <module>()
136
137 if __name__ == '__main__':
--> 138 main()
<ipython-input-2-2015e0ea466d> in main()
131 saver = tf.train.Saver()
132
--> 133 train(x, keep_prob, y_, train_step, loss, accuracy, merged_summary_op, saver)
134
135 export_model([input_node_name, keep_prob_node_name], output_node_name)
<ipython-input-2-2015e0ea466d> in train(x, keep_prob, y_, train_step, loss, accuracy, merged_summary_op, saver)
67 print("training start...")
68
---> 69 mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
70
71 init_op = tf.global_variables_initializer()
/anaconda3/envs/nlpTFnltk/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/datasets/mnist.py in read_data_sets(train_dir, fake_data, one_hot, dtype, reshape, validation_size)
247 raise ValueError(
248 'Validation size should be between 0 and {}. Received: {}.'
--> 249 .format(len(train_images), validation_size))
250
251 validation_images = train_images[:validation_size]
ValueError: Validation size should be between 0 and 0. Received: 5000.

You're using the MNIST tutorial code, which is calling read_data_sets from here; note that validation_size of 5000 comes from that function's default parameters. It's expecting to get data from the following files:
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
Normally it would try to download those files if it doesn't find them, but the fact that you're getting a validation_size of 0 suggests it isn't doing so. This wouldn't help you anyway, since you don't want to use the MNIST data.
Even if you rename your train and test files to match the above filenames, your code won't work because the MNIST code is also calling extract_labels, which has a default parameter num_classes=10 while you want this to be 50. Your best bet is probably to get rid of the MNIST import completely and read about how to set up an input pipeline; it's not difficult compared to the stuff you've done already.

Dimensions must be equal error in bayesian network

I have bayesian network code to train mnist dataset like this:
import edward as ed
import tensorflow as tf
from edward.models import Normal
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
n_nodes_hl1 = 500
n_nodes_hl2 = 500
n_nodes_hl3 = 500
n_classes = 10
batch_size = 100
x_ = tf.placeholder('float', [None, 784])
y_ = tf.placeholder('float', shape=(batch_size))
# def neural_network_model(data):
w_h1 = Normal(loc=tf.zeros([784, n_nodes_hl1]), scale=tf.ones([784, n_nodes_hl1]))
w_h2 = Normal(loc=tf.zeros([n_nodes_hl1, n_nodes_hl2]), scale=tf.ones([n_nodes_hl1, n_nodes_hl2]))
w_h3 = Normal(loc=tf.zeros([n_nodes_hl2, n_nodes_hl3]), scale=tf.ones([n_nodes_hl2, n_nodes_hl3]))
w_o = Normal(loc=tf.zeros([n_nodes_hl3, n_classes]), scale=tf.ones([n_nodes_hl3, n_classes]))
b_h1 = Normal(loc=tf.zeros([n_nodes_hl1]), scale=tf.ones([n_nodes_hl1]))
b_h2 = Normal(loc=tf.zeros([n_nodes_hl2]), scale=tf.ones([n_nodes_hl2]))
b_h3 = Normal(loc=tf.zeros([n_nodes_hl3]), scale=tf.ones([n_nodes_hl3]))
b_o = Normal(loc=tf.zeros([n_classes]), scale=tf.ones([n_classes]))
y_pre = Normal(tf.matmul(x_, w_o) + b_o, scale=1.0)
qw_h1 = Normal(loc=tf.Variable(tf.random_normal([784, n_nodes_hl1])),
scale=tf.Variable(tf.random_normal([784, n_nodes_hl1])))
qw_h2 = Normal(loc=tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
scale=tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])))
qw_h3 = Normal(loc=tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
scale=tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])))
qw_o = Normal(loc=tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
scale=tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])))
qb_h1 = Normal(loc=tf.Variable(tf.random_normal([n_nodes_hl1])), scale=tf.Variable(tf.random_normal([n_nodes_hl1])))
qb_h2 = Normal(loc=tf.Variable(tf.random_normal([n_nodes_hl2])), scale=tf.Variable(tf.random_normal([n_nodes_hl2])))
qb_h3 = Normal(loc=tf.Variable(tf.random_normal([n_nodes_hl3])), scale=tf.Variable(tf.random_normal([n_nodes_hl3])))
qb_o = Normal(loc=tf.Variable(tf.random_normal([n_classes])), scale=tf.Variable(tf.random_normal([n_classes])))
y = Normal(tf.matmul(x_, qw_o) + qb_o, scale=1.0)
inference = ed.KLqp({w_h1: qw_h1, b_h1: qb_h1,
w_h2: qw_h2, b_h2: qb_h2,
w_h3: qw_h3, b_h3: qb_h3,
w_o: qw_o, b_o: qb_o, }, data={y_pre: y_})
inference.initialize()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
hm_epochs = 10
with sess:
for epoch in range(hm_epochs):
epoch_loss = 0
for _ in range(int(mnist.train.num_examples / batch_size)):
epoch_x, epoch_y = mnist.train.next_batch(batch_size)
inference.update(feed_dict={x_: epoch_x, y_: epoch_y})
I make it by combine neural network from pythonprogramming.net tutorial and bayesian network from this link. But, there is error:
ValueError: Dimensions must be equal, but are 784 and 500 for 'MatMul'
(op: 'MatMul') with input shapes: [?,784], [500,10].
So, what is the meaning of error above? How to solve it?

Tensor' object is not iterable

I have the following message 'Tensor' object is not iterable." when trying to run my code, can someone tell me where is my mistake?
All the problem started because I need to normalize the the data that I am feeding. Therefore the problems comes from this part of my code:
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len / batch_size)
# Loop over all batches
for i in range(total_batch):
batch_x = tf.train.batch(X_train_norm, batch_size)
batch_y = tf.train.batch(Y_train, batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={x: batch_x, y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value - estimate
print("num batch:", total_batch)
Here is my whole code:
import tensorflow as tf
import numpy as np
X_train = np.genfromtxt('data/train500X.csv', delimiter=',', dtype=float)
Y_train = np.genfromtxt('/data/train500Y.csv', delimiter=',', dtype=float)
X_test = np.genfromtxt('data/test100X.csv', delimiter=',', dtype=float,usecols=(14, 7, 33, 13, 32, 60, 16, 50, 18, 61, 17, 34, 26, 59, 85, 53))
Y_test = np.genfromtxt('data/test100Y.csv', delimiter=',', dtype=float)
X_train_norm = tf.nn.l2_normalize(X_train, 0, epsilon=1e-12)
# Parameters
learning_rate = 0.001
training_epochs = 5000
batch_size = 50
display_step = 1
# Network Parameters
n_hidden_1 = 100 # 1st layer number of features
n_hidden_2 = 200 # 2nd layer number of features
n_hidden_3 = 200 # 3rd layer number of features
n_hidden_4 = 256 # 4th layer number of features
n_out = 1
n_input = X_train.shape[1]
total_len = X_train.shape[0]
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Hidden layer with RELU activation
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3 = tf.nn.relu(layer_3)
# Hidden layer with RELU activation
layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
layer_4 = tf.nn.relu(layer_4)
# Output layer with linear activation
out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0, 0.1)),
'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], 0, 0.1)),
'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_4, n_out], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'b2': tf.Variable(tf.random_normal([n_hidden_2], 0, 0.1)),
'b3': tf.Variable(tf.random_normal([n_hidden_3], 0, 0.1)),
'b4': tf.Variable(tf.random_normal([n_hidden_4], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_out], 0, 0.1))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
# cost = tf.reduce_mean(tf.square(pred - y))
cost = tf.reduce_mean(tf.square(tf.transpose(pred) - y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len / batch_size)
# Loop over all batches
for i in range(total_batch):
batch_x = tf.train.batch(X_train_norm, batch_size)
batch_y = tf.train.batch(Y_train, batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={x: batch_x, y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value - estimate
print("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", \
"{:.9f}".format(avg_cost))
print("[*]----------------------------")
for i in range(3):
print("label value:", label_value[i], \
"estimated value:", estimate[i])
print("[*]============================")
print("Optimization Finished!")
Thank you in advance for your help.

I found out that if I put the result in a session run will give me what I needed
batch_x_norm = tf.Session().run(tf.nn.l2_normalize(batch_x, 0, epsilon=1e-12))

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

tensorflow multi-gpu mnist example, loss does not decrease - python

The problem is that, I should use tf.constant_initializer(0.1) for the weights instead of tf.constant_initializer(0)

Related

Tensorflow: Same input data, different output

Generate Histogram of TensorFlow Predictions

How to solve: "ValueError: Validation size should be between 0 and 0. Received: 5000."?

Dimensions must be equal error in bayesian network

Tensor' object is not iterable

Categories

Resources