Related
I'm using tensorflow 1.10.0. I've been following the tutorial for saving and loading a simple trained MLP model. Saving of data works perfectly and creates following files:
train.ckpt.data-00000-of-00001
train.ckpt.index
train.ckpt.meta
When I'm trying to load train_opt or accmetric variable using:
import tensorflow as tf
with tf.Session() as sess:
load_mod = tf.train.import_meta_graph('/home/akshay/train.ckpt.meta')
load_mod.restore(sess, tf.train.latest_checkpoint('/home/akshay/'))
print (tf.get_default_graph().get_tensor_by_name('train_opt:0'))
I get following error:
Traceback (most recent call last):
File "recover_tftrain.py", line 6, in <module>
print (tf.get_default_graph().get_tensor_by_name('accmetric:0'))
File "/home/arpita/anaconda2/lib/python2.7/site-
packages/tensorflow/python/framework/ops.py", line 3515, in get_tensor_by_name
return self.as_graph_element(name, allow_tensor=True, allow_operation=False)
File "/home/arpita/anaconda2/lib/python2.7/site-
packages/tensorflow/python/framework/ops.py", line 3339, in as_graph_element
return self._as_graph_element_locked(obj, allow_tensor, allow_operation)
File "/home/arpita/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3381, in _as_graph_element_locked
"graph." % (repr(name), repr(op_name)))
KeyError: "The name 'accmetric:0' refers to a Tensor which does not exist.
The operation, 'accmetric', does not exist in the graph."
However, the loss variable loads perfectly:
Tensor("loss:0", shape=(), dtype=float32)
Are there only some specific variables that can be loaded? Or is there any issue of scope?
Complete code:
from create_batches import Batch
import extractData
import tensorflow as tf
# prepare input data and output labels for neural network
datafile = '/home/akshay/Desktop/datafile.csv'
labelfile = '/home/akshay/Desktop/labelfile.csv'
num_input = 2000
num_hidden1 = 200
num_hidden2 = 200
num_hidden3 = 200
num_output = 25
batch_size = 200
epochs = 25
batch = Batch(extractData.create_data(datafile), extractData.create_labels(labelfile), batch_size)
# create tensorflow networks
vowel_inp = tf.placeholder(dtype = tf.float32, shape = [None, 40000], name = "text_inp")
label_oup = tf.placeholder(dtype = tf.int32, shape = [None], name = "label_oup")
vowel_flat = tf.contrib.layers.flatten(vowel_inp)
# fully connected layers
hidden_1 = tf.layers.dense(inputs = vowel_flat, units = num_hidden1, name = "hidden1", activation = tf.nn.sigmoid)
hidden_2 = tf.layers.dense(inputs = hidden_1, units = num_hidden2, name = "hidden2", activation = tf.nn.sigmoid)
hidden_3 = tf.layers.dense(inputs = hidden_2, units = num_hidden3, name = "hidden3", activation = tf.nn.sigmoid)
train_oup = tf.layers.dense(inputs = hidden_3, units = num_output, name = "output")
# define a cost function
xentropy = tf.losses.sparse_softmax_cross_entropy(labels = label_oup, logits = train_oup)
# define a loss function
loss = tf.reduce_mean(xentropy, name = "loss")
# define an optimizer
train_opt = tf.train.AdagradOptimizer(learning_rate = 0.001).minimize(loss, name="train_opt")
# define accuracy metric
acc, acc_metric_update = tf.metrics.accuracy(label_oup, tf.argmax(train_oup, 1), name="accmetric")
loss_val, acc_val = 0, 0
sess = tf.Session()
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
for j in range(epochs):
batch.reset()
for i in range(int(2000/batch_size)):
x, y = batch.getBatch()
y = y.reshape(batch_size)
feed_dict = {vowel_inp: x, label_oup: y}
loss_val, _, acc_val = sess.run([loss, train_opt, acc_metric_update], feed_dict=feed_dict)
if j%25==0:
print ('Epoch:', j, 'Accuracy Val:', acc_val)
print ("Final score:",sess.run(acc))
#save the model
print ('Model saved in: ', saver.save(sess, '/home/akshay/train.ckpt'))
sess.close()
I wish to log the predictions every N epochs\iterations and generate a histogram for each class. My question is how do I log the predictions into an array, including the label in order to generate the histograms?
How do I make sure it happens of every N epochs\iterations?
I have edited the post to add the code so you will be able to see what I am talking about. The last 2 code chunks should somehow be used for what I requested.
Thanks in advance!
import tensorflow as tf
import numpy as np
import math
from random import random
from array import array
from ROOT import TFile, TTree, TH1D, TH2D, TBranch, vector
NUM_EXAMPLES = 1.6e4
TRAIN_SPLIT = .8
MINI_BATCH_SIZE = 1000
#NUM_EPOCHS = 3500
F_PATH = "/home/cauchy/Documents/Machine_Learning"
F_TEST = []
F_TEST += ["d3pd-ckt12rmd2030pp-G_ww_qqqq_%d%d00.root" % (1,2)]
F_TEST += ["d3pd-ckt12rmd2030pp-pyj%d.root" % (4)]
F_TEST += ["d3pd-ckt12rmd2030pp-pyj%d.root" % (5)]
F_TEST += ["d3pd-ckt12rmd2030pp-pyj%d.root" % (6)]
F_TEST += ["d3pd-ckt12rmd2030pp-pyj%d.root" % (7)]
#CALIBRATION_TARGET = "pt" # you can use pt,m,eta
INPUTS = ['m', 'grootau21', 'ysfilt', 'ungrngtrk'] # Removed pt
PT_MIN = 450 #for file 1200
PT_MAX = 730 #for file 1200
F_OUTPUT = "G1200_signaltobackground_from_pt_mass_ysfilt_grootau21_ungrngtrk.root"
N_INPUTS = len(INPUTS)
#============== inputs / target ====================================
jet_features = []
target = []
#=================== branches for training and validation ===========
pt = []
m = []
grootau21 =[]
ysfilt = []
ungrngtrk = []
#weight = []
#================ Prepare the dataset ========================
# I need to change the data to include the multiplication by the weight (constant)
for fi in F_TEST: #Should it include background AND signal files? Yes.
current_e = 0
f = TFile(F_PATH + '/' + fi, 'read')
t = TTree()
f.GetObject("dibjet", t) # Changed from "Tree" to "dibjet"
for entry in t:
current_e += 1
if current_e > NUM_EXAMPLES: # NUM_EXAMPLES should change for the different files
break
if (t.jet1_pt > PT_MAX or t.jet1_pt < PT_MIN):
continue
tmp = []
if 'pt' in INPUTS: tmp += [t.jet1_pt / MAX_PT] #for file 1200
if 'm' in INPUTS: tmp += [t.jet1_m / 500] #for file 1200
if 'grootau21' in INPUTS: tmp += [t.jet1_grootau21]
if 'ysfilt' in INPUTS: tmp += [t.jet1_ysfilt]
if 'ungrngtrk' in INPUTS: tmp += [t.jet1_ungrngtrk / 110] #for file 1200
# We need only look at the class {background, signal} of the entry in terms of target
jet_features += [tmp]
# One-hot encoder
if fi == 'd3pd-ckt12rmd2030pp-G_ww_qqqq_1200.root': target += [[1, 0]]
else: target += [[0, 1]]
pt += [t.jet1_pt]
m += [t.jet1_m]
grootau21 += [t.jet1_grootau21]
ysfilt += [t.jet1_ysfilt]
ungrngtrk += [t.jet1_ungrngtrk]
#weight += [t.weight]
######################################
###### prepare inputs for NN #########
trainset = list(zip(jet_features, target)) # remove ref_target?
np.random.shuffle(trainset)
jet_features, target = zip(*trainset) # What does this line do? Rearranges jetmoments\target...
total_sample = len(target)
train_size = int(total_sample*TRAIN_SPLIT)
all_x = np.float32((jet_features)) # Converts the list type? Why double paranthesis?
all_y = np.float32(target)
train_x = all_x[:train_size] # Create training\testing partitions?
test_x = all_x[train_size:]
train_y = all_y[:train_size]
test_y = all_y[train_size:]
# Define important parameters and variable to work with the tensors
learning_rate = 0.3
training_epochs = 500
cost_history = np.empty(shape=[1], dtype=float)
n_dim = N_INPUTS
#print("n_dim", n_dim)
n_class = 2
model_path = "/home/cauchy/Documents/TensorFlow/Cuts_W" # Forgot what this path is used for
# Define the number of hidden layers and number of neurons for each layer
n_hidden_1 = 10
n_hidden_2 = 10
n_hidden_3 = 10
n_hidden_4 = 10
x = tf.placeholder(tf.float32, [None, n_dim])
W = tf.Variable(tf.zeros([n_dim, n_class]))
b = tf.Variable(tf.zeros([n_class]))
y_ = tf.placeholder(tf.float32, [None, n_class]) # Should we use a vector instead with 1 for signal and 0 for background?
# Define the model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with sigmoid activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.sigmoid(layer_1)
# Hidden layer with sigmoid activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.sigmoid(layer_2)
# Hidden layer with sigmoid activation
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3 = tf.nn.sigmoid(layer_3)
# Hidden layer with ReLU activation
layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
layer_4 = tf.nn.relu(layer_4)
# Output layer with linear activation
out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
return out_layer
# Define the weights and the biases for each layer
weights = {
'h1': tf.Variable(tf.truncated_normal([n_dim, n_hidden_1])),
'h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2])),
'h3': tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3])),
'h4': tf.Variable(tf.truncated_normal([n_hidden_3, n_hidden_4])),
'out': tf.Variable(tf.truncated_normal([n_hidden_4, n_class]))
}
biases = {
'b1': tf.Variable(tf.truncated_normal([n_hidden_1])),
'b2': tf.Variable(tf.truncated_normal([n_hidden_2])),
'b3': tf.Variable(tf.truncated_normal([n_hidden_3])),
'b4': tf.Variable(tf.truncated_normal([n_hidden_4])),
'out': tf.Variable(tf.truncated_normal([n_class]))
}
# Initialize all the variables
init = tf.global_variables_initializer()
saver = tf.train.Saver()
# Call your model defined
y = multilayer_perceptron(x, weights, biases)
# Define the cost function and optimizer
cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
training_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
sess = tf.Session
sess.run(init)
# Calculate the cost and the accuracy for each epoch
mse_history = [] # mean squared error
accuracy_history = []
for epoch in range(training_epochs):
sess.run(training_step, feed_dict={x: train_x, y_: train_y})
cost = sess.run(cost_function, feed_dict={x: train_x, y_: train_y})
cost_history = np.append(cost_history, cost)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# print("Accuracy: ", (sess.run(accuracy, feed_dict={x:test_x, y_:test_y})))
pred_y = sess.run(y, feed_dict={x: test_x})
mse = tf.reduce_mean(tf.square(pred_y - test_y))
mse_ = sess.run(mse)
mse_history.append(mse_)
accuracy = (sess.run(accuracy, feed_dict={x: train_x, y_: train_y}))
accuracy_history.append(accuracy)
print('epoch: ', epoch, ' - ','cost: ', cost, " - MSE: ", mse_, "- Train Accuracy: ", accuracy)
save_path = saver.save(sess, model_path)
print("Model saved in file: %s" % save_path)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Test Accuracy: ", (sess.run(accuracy, feed_dict={x: test_x, y_: test_y})))
# Print the final mean square error
pred_y = sess.run(y, feed_dict={x: test_x})
mse = tf.reduce_mean(tf.square(pred_y - test_y))
print("MSE: $.4f" % sess.run(mse))
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Set up logging for predictions
# Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)
I'm trying 2 class classification of images by neural network using Tensorflow.
I want to extract 1000 pixels randomly.
However, I am in trouble with error:
"logits = inference(images_placeholder, keep_prob)
File "train5.py", line 83, in inference
list = random.sample(x_image(IMAGE_PIXELS),SAMPLE_PIXELS)
TypeError: 'Tensor' object is not callable"
Please tell me what should I do.
I will attach a code below.
import sys
sys.path.append('/usr/local/opt/opencv3/lib/python3.5.4/site-packages')
import cv2
import numpy as np
import tensorflow as tf
import tensorflow.python.platform
import tensorboard as tb
import os
import math
import time
import random
start_time = time.time()
# TensorBoard output information directory
log_dir = '/tmp/data1' #tensorboard --logdir=/tmp/data1
#directory delete and reconstruction
if tf.gfile.Exists(log_dir):
tf.gfile.DeleteRecursively(log_dir)
tf.gfile.MakeDirs(log_dir)
# Reserve memory
config = tf.ConfigProto(
gpu_options=tf.GPUOptions(allow_growth=True))
sess = sess = tf.Session(config=config)
NUM_CLASSES = 2
IMAGE_SIZE_x = 1024
IMAGE_SIZE_y = 768
IMAGE_CHANNELS = 1
IMAGE_PIXELS = IMAGE_SIZE_x*IMAGE_SIZE_y*IMAGE_CHANNELS
SAMPLE_PIXELS = 1000
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('train', 'train.txt', 'File name of train data')
flags.DEFINE_string('test', 'test.txt', 'File name of train data')
flags.DEFINE_string('image_dir', 'trdata', 'Directory of images')
flags.DEFINE_string('train_dir', '/tmp/data', 'Directory to put the training data.')
flags.DEFINE_integer('max_steps', 20000, 'Number of steps to run trainer.')
flags.DEFINE_integer('batch_size', 10, 'Batch size'
'Must divide evenly into the dataset sizes.')
flags.DEFINE_float('learning_rate', 1e-5, 'Initial learning rate.')
def inference(images_placeholder, keep_prob):
""" Function to create predictive model
argument:
images_placeholder: image placeholder
keep_prob: dropout rate placeholder
Return:
y_conv:
"""
# Initialie with normal distribution with weight of 0.1
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
# Initialized with normal distribution with bias of 0.1
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
# Reshape input
x_image = images_placeholder
# ramdom sumpling pixels
list = random.sample(x_image(IMAGE_PIXELS),SAMPLE_PIXELS)
x_list = [samples[i] for i in list]
# Input
with tf.name_scope('fc1') as scope:
W_fc1 = weight_variable([x_list,10])
b_fc1 = bias_variable([10])
h_fc1 = tf.nn.relu(tf.matmul(x_image,W_fc1) + b_fc1)
# Affine1
with tf.name_scope('fc2') as scope:
W_fc2 = weight_variable([10,10])
b_fc2 = bias_variable([10])
h_fc2 = tf.nn.relu(tf.matmul(h_fc1,W_fc2) + b_fc2)
# Affine2
with tf.name_scope('fc3') as scope:
W_fc3 = weight_variable([10,10])
b_fc3 = bias_variable([10])
h_fc3 = tf.nn.relu(tf.matmul(h_fc2,W_fc3) + b_fc3)
# Affine3
with tf.name_scope('fc4') as scope:
W_fc4 = weight_variable([10,10])
b_fc4 = bias_variable([10])
h_fc4 = tf.nn.relu(tf.matmul(h_fc3,W_fc4) + b_fc4)
# Affine4
with tf.name_scope('fc5') as scope:
W_fc5 = weight_variable([10,2])
b_fc5 = bias_variable([2])
# softmax regression
with tf.name_scope('softmax') as scope:
y_out=tf.nn.softmax(tf.matmul(h_fc4, W_fc5) + b_fc5)
# return
return y_out
def loss(logits, labels):
""" loss function
引数:
logits: logit tensor, float - [batch_size, NUM_CLASSES]
labels: labrl tensor, int32 - [batch_size, NUM_CLASSES]
返り値:
cross_entropy:tensor, float
"""
# cross entropy
cross_entropy = -tf.reduce_sum(labels*tf.log(tf.clip_by_value(logits,1e-10,1.0)))
# TensorBoard
tf.summary.scalar("cross_entropy", cross_entropy)
return cross_entropy
def training(loss, learning_rate):
#Adam
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
return train_step
def accuracy(logits, labels):
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
tf.summary.scalar("accuracy", accuracy)
return accuracy
if __name__ == '__main__':
f = open(FLAGS.train, 'r')
# array data
train_image = []
train_label = []
for line in f:
# Separate space and remove newlines
line = line.rstrip()
l = line.split()
# Load data and resize
img = cv2.imread(FLAGS.image_dir + '/' + l[0])
img = cv2.resize(img, (IMAGE_SIZE_x, IMAGE_SIZE_y))
#transrate grayscale
img_gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# transrate one row and 0-1 float
train_image.append(img_gry.flatten().astype(np.float32)/255.0)
# Prepare with label 1-of-k method
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
train_label.append(tmp)
# transrate numpy
train_image = np.asarray(train_image)
train_label = np.asarray(train_label)
f.close()
f = open(FLAGS.test, 'r')
test_image = []
test_label = []
for line in f:
line = line.rstrip()
l = line.split()
img = cv2.imread(FLAGS.image_dir + '/' + l[0])
img = cv2.resize(img, (IMAGE_SIZE_x, IMAGE_SIZE_y))
#transrate grayscale
img_gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# transrate one row and 0-1 float
test_image.append(img_gry.flatten().astype(np.float32)/255.0)
tmp = np.zeros(NUM_CLASSES)
tmp[int(l[1])] = 1
test_label.append(tmp)
test_image = np.asarray(test_image)
test_label = np.asarray(test_label)
f.close()
with tf.Graph().as_default():
# Put the image Tensor
images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
# Put the label Tensor
labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES))
# Put dropout rate Tensor
keep_prob = tf.placeholder("float")
# Load inference() and make model
logits = inference(images_placeholder, keep_prob)
# Load loss() and calculate loss
loss_value = loss(logits, labels_placeholder)
# Load training() and train
train_op = training(loss_value, FLAGS.learning_rate)
# calculate accuracy
acc = accuracy(logits, labels_placeholder)
# save
saver = tf.train.Saver()
# Make Session
sess = tf.Session()
# Initialize variable
sess.run(tf.global_variables_initializer())
# TensorBoard
summary_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
# Start training
for step in range(FLAGS.max_steps):
for i in range(int(len(train_image)/FLAGS.batch_size)):
batch = FLAGS.batch_size*i
sess.run(train_op, feed_dict={
images_placeholder: train_image[batch:batch+FLAGS.batch_size],
labels_placeholder: train_label[batch:batch+FLAGS.batch_size],
keep_prob: 0.5})
# Accuracy calculation for every steps
train_accuracy = sess.run(acc, feed_dict={
images_placeholder: train_image,
labels_placeholder: train_label,
keep_prob: 1.0})
print("step %d, training accuracy %g" %(step, train_accuracy))
# Added value to be displayed in Tensorflow every 1step
summary_str = sess.run(summary_op, feed_dict={
images_placeholder: train_image,
labels_placeholder: train_label,
keep_prob: 1.0})
summary_writer.add_summary(summary_str, step)
# Display accuracy on test data after training
print(" test accuracy %g"%sess.run(acc, feed_dict={
images_placeholder: test_image,
labels_placeholder: test_label,
keep_prob: 1.0}))
duration = time.time() - start_time
print('%.3f sec' %duration)
# Save model
save_path = saver.save(sess, os.getcwd() + "\\model.ckpt")
The error is this:
images_placeholder = tf.placeholder("float", shape=(None, IMAGE_PIXELS))
...
x_image = images_placeholder
list = random.sample(x_image(IMAGE_PIXELS),SAMPLE_PIXELS)
x_image, just like images_placeholder is a variable node, so x_image(...) doesn't make sense and obviously leads to an error "TypeError: 'Tensor' object is not callable".
I assume you're trying to sample SAMPLE_PIXELS from each image in a batch. Note that random.sample won't work here, because x_image is a symbolic variable, its value is only known during the session. You have to use tf.boolean_mask with a random mask in order to select random pixels from the image.
I am trying to parallelize my code to have my tensorflow model run on multiple GPUs. For some reason, the code I wrote to parallelize the training works for a standard deep neural net, but throws errors when using a convolutional neural net.
Here is my code to compute the average gradients:
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
This is my deep neural net architecture: (this works)
def neuralNet(data):
hl_1 = {'weights':tf.get_variable('Weights1',[TF_SHAPE,n_nodes_hl1],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases1',[n_nodes_hl1],initializer=tf.random_normal_initializer())}
hl_2 = {'weights':tf.get_variable('Weights2',[n_nodes_hl1, n_nodes_hl2],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases2',[n_nodes_hl2],initializer=tf.random_normal_initializer())}
hl_3 = {'weights':tf.get_variable('Weights3',[n_nodes_hl2, n_nodes_hl3],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases3',[n_nodes_hl3],initializer=tf.random_normal_initializer())}
hl_4 = {'weights':tf.get_variable('Weights4',[n_nodes_hl3, n_nodes_hl4],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases4',[n_nodes_hl4],initializer=tf.random_normal_initializer())}
hl_5 = {'weights':tf.get_variable('Weights5',[n_nodes_hl4, n_nodes_hl5],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases5',[n_nodes_hl5],initializer=tf.random_normal_initializer())}
output_layer = {'weights':tf.get_variable('Weights-outputlayer',[n_nodes_hl5, n_classes],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases-outputlayer',[n_classes],initializer=tf.random_normal_initializer())}
l1 = tf.add(tf.matmul(data, hl_1['weights']), hl_1['biases'])
l1 = tf.nn.sigmoid(l1, name='op1')
l2 = tf.add(tf.matmul(l1, hl_2['weights']), hl_2['biases'])
l2 = tf.nn.sigmoid(l2, name='op2')
l3 = tf.add(tf.matmul(l2, hl_3['weights']), hl_3['biases'])
l3 = tf.nn.sigmoid(l3, name='op3')
l4 = tf.add(tf.matmul(l3, hl_4['weights']), hl_4['biases'])
l4 = tf.nn.sigmoid(l4, name='op4')
l5 = tf.add(tf.matmul(l4, hl_5['weights']), hl_5['biases'])
l5 = tf.nn.sigmoid(l5, name='op5')
dropout = tf.nn.dropout(l5,keep_prob, name='op6')
ol = tf.add(tf.matmul(dropout, output_layer['weights']), output_layer['biases'], name='op7')
return ol
This is my convnet: (this does not work)
def conv2d(x,W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
def convNeuralNet(x):
weights = {'w_conv1':tf.get_variable('w_conv1',[7,7,1,2],initializer=tf.random_normal_initializer()),
'w_conv2':tf.get_variable('w_conv2',[7,7,2,4],initializer=tf.random_normal_initializer()),
'w_conv3':tf.get_variable('w_conv3',[7,7,4,8],initializer=tf.random_normal_initializer()),
'w_conv4':tf.get_variable('w_conv4',[7,7,8,16],initializer=tf.random_normal_initializer()),
'w_conv5':tf.get_variable('w_conv5',[7,7,16,32],initializer=tf.random_normal_initializer()),
'w_conv6':tf.get_variable('w_conv6',[7,7,32,64],initializer=tf.random_normal_initializer()),
'w_conv7':tf.get_variable('w_conv7',[7,7,64,128],initializer=tf.random_normal_initializer()),
'w_conv8':tf.get_variable('w_conv8',[7,7,128,256],initializer=tf.random_normal_initializer()),
'w_conv9':tf.get_variable('w_conv9',[7,7,256,512],initializer=tf.random_normal_initializer()),
'w_fc1':tf.get_variable('w_fc1',[512,1024],initializer=tf.random_normal_initializer()),
'w_fc2':tf.get_variable('w_fc2',[1024,2048],initializer=tf.random_normal_initializer()),
'w_fc3':tf.get_variable('w_fc3',[2048,4096],initializer=tf.random_normal_initializer()),
'out':tf.get_variable('w_out',[4096,n_classes],initializer=tf.random_normal_initializer())}
biases = {'b_conv1':tf.get_variable('b_conv1',[2],initializer=tf.random_normal_initializer()),
'b_conv2':tf.get_variable('b_conv2',[4],initializer=tf.random_normal_initializer()),
'b_conv3':tf.get_variable('b_conv3',[8],initializer=tf.random_normal_initializer()),
'b_conv4':tf.get_variable('b_conv4',[16],initializer=tf.random_normal_initializer()),
'b_conv5':tf.get_variable('b_conv5',[32],initializer=tf.random_normal_initializer()),
'b_conv6':tf.get_variable('b_conv6',[64],initializer=tf.random_normal_initializer()),
'b_conv7':tf.get_variable('b_conv7',[128],initializer=tf.random_normal_initializer()),
'b_conv8':tf.get_variable('b_conv8',[256],initializer=tf.random_normal_initializer()),
'b_conv9':tf.get_variable('b_conv9',[512],initializer=tf.random_normal_initializer()),
'b_fc1':tf.get_variable('b_fc1',[1024],initializer=tf.random_normal_initializer()),
'b_fc2':tf.get_variable('b_fc2',[2048],initializer=tf.random_normal_initializer()),
'b_fc3':tf.get_variable('b_fc3',[4096],initializer=tf.random_normal_initializer()),
'out':tf.get_variable('b_out',[n_classes],initializer=tf.random_normal_initializer())}
x = tf.reshape(x,shape=[-1,7,len_puzzle,1])
conv1 = conv2d(x, weights['w_conv1'])
conv1 = maxpool2d(conv1)
conv2 = conv2d(conv1, weights['w_conv2'])
conv2 = maxpool2d(conv2)
conv3 = conv2d(conv2, weights['w_conv3'])
conv3 = maxpool2d(conv3)
conv4 = conv2d(conv3, weights['w_conv4'])
conv4 = maxpool2d(conv4)
conv5 = conv2d(conv4, weights['w_conv5'])
conv5 = maxpool2d(conv5)
conv6 = conv2d(conv5, weights['w_conv6'])
conv6 = maxpool2d(conv6)
conv7 = conv2d(conv6, weights['w_conv7'])
conv7 = maxpool2d(conv7)
conv8 = conv2d(conv7, weights['w_conv8'])
conv8 = maxpool2d(conv8)
conv9 = conv2d(conv8, weights['w_conv9'])
conv9 = maxpool2d(conv9)
fc1 = tf.reshape(conv9, [-1,512])
fc1 = tf.nn.sigmoid(tf.add(tf.matmul(fc1,weights['w_fc1']),biases['b_fc1']))
fc2 = tf.nn.sigmoid(tf.add(tf.matmul(fc1,weights['w_fc2']),biases['b_fc2']))
fc3 = tf.nn.sigmoid(tf.add(tf.matmul(fc2,weights['w_fc3']),biases['b_fc3']))
last = tf.nn.dropout(fc3,keep_prob)
output = tf.add(tf.matmul(last, weights['out']), biases['out'], name='op7')
return output
This is the code which runs the session:
def train(x):
tower_grads = []
opt = tf.train.AdamOptimizer(learning_rate)
for i in xrange(2):
with tf.device('/gpu:%d' % i):
with tf.variable_scope('NN',reuse=i>0):
prediction = convNeuralNet(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
tf.summary.scalar('cross_entropy',cost)
grads = opt.compute_gradients(cost)
tower_grads.append(grads)
print grads
print len(grads)
#scope.reuse_variables()
grads = average_gradients(tower_grads)
apply_gradient_op = opt.apply_gradients(grads)
train_op = tf.group(apply_gradient_op)
correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct,'float'))
tf.summary.scalar('accuracy',accuracy)
num_epochs = ne
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess:
saver = tf.train.Saver()
# UNCOMMENT THIS WHEN RESTARTING FROM Checkpoint
#saver.restore(sess, tf.train.latest_checkpoint(os.getcwd()+'/models/base/.'))
sess.run(tf.global_variables_initializer())
merged_summary = tf.summary.merge_all()
for epoch in range(num_epochs):
epoch_loss = 0
for i in range(int(real_X_9.shape[0])/batch_size):#mnist.train.num_examples/batch_size)): # X.shape[0]
randidx = np.random.choice(real_X_9.shape[0], batch_size, replace=False)
epoch_x,epoch_y = real_X_9[randidx,:],real_y_9[randidx,:] #mnist.train.next_batch(batch_size) # X,y
j,c = sess.run([train_op,cost],feed_dict={x:epoch_x,y:epoch_y,keep_prob:TRAIN_KEEP_PROB})
if i == 0:
[ta] = sess.run([accuracy],feed_dict={x:epoch_x,y:epoch_y,keep_prob:TRAIN_KEEP_PROB})
print 'Train Accuracy', ta
epoch_loss += c
print '\n','Epoch', epoch + 1, 'completed out of', num_epochs, '\nLoss:',epoch_loss
#saver.save(sess, os.getcwd()+'/models/base/baseDNN7')
#saver.export_meta_graph(os.getcwd()+'/models/base/baseDNN7.meta')
print '\n','Train Accuracy', accuracy.eval(feed_dict={x:real_X_9, y:real_y_9, keep_prob:TRAIN_KEEP_PROB})
print '\n','Test Accuracy', accuracy.eval(feed_dict={x:test_real_X, y:test_real_y, keep_prob:1.0}) #X, y #mnist.test.images, mnist.test.labels
train(x)
This is the error:
Traceback (most recent call last):
File "CNN_gpu.py", line 393, in <module>
train(x)
File "CNN_gpu.py", line 311, in train
grads = average_gradients(tower_grads)
expanded_g = tf.expand_dims(g, 0)
File "/share/sw/free/tensorflow.1/1.1.0/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.py", line 170, in expand_dims
return gen_array_ops._expand_dims(input, axis, name)
File "/share/sw/free/tensorflow.1/1.1.0/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 900, in _expand_dims
result = _op_def_lib.apply_op("ExpandDims", input=input, dim=dim, name=name)
File "/share/sw/free/tensorflow.1/1.1.0/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 509, in apply_op
(input_name, err))
ValueError: Tried to convert 'input' to a tensor and failed. Error: None values not supported.
I'm really confused. Parallelization across multiple GPUs should work regardless of the type of neural net being used.
Any help here would be appreciated.
I'm trying to write my own mnist example which could use all the two gpu of one machine.
It is a simple multi-layer perceptron.
Here is my code. You can run it directly.
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
learning_rate = 0.001
training_steps = 100000
batch_size = 100
display_step = 100
n_hidden_1 = 256
n_hidden_2 = 256
n_input = 784
n_classes = 10
def _variable_on_cpu(name, shape, initializer):
with tf.device('/cpu:0'):
dtype = tf.float32
var = tf.get_variable(name, shape, initializer=initializer, dtype=dtype)
return var
def build_model():
def multilayer_perceptron(x, weights, biases):
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
with tf.variable_scope('aaa'):
weights = {
'h1': _variable_on_cpu('h1',[n_input, n_hidden_1],tf.constant_initializer(0.0)),
'h2': _variable_on_cpu('h2',[n_hidden_1, n_hidden_2],tf.constant_initializer(0.0)),
'out': _variable_on_cpu('out_w',[n_hidden_2, n_classes],tf.constant_initializer(0.0))
}
biases = {
'b1': _variable_on_cpu('b1',[n_hidden_1],tf.constant_initializer(0.0)),
'b2': _variable_on_cpu('b2',[n_hidden_2],tf.constant_initializer(0.0)),
'out': _variable_on_cpu('out_b',[n_classes],tf.constant_initializer(0.0))
}
pred = multilayer_perceptron(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
return cost
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
grads = []
for g,_ in grad_and_vars:
expanded_g = tf.expand_dims(g, 0)
grads.append(expanded_g)
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
with tf.Graph().as_default(), tf.device('/cpu:0'):
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
tower_grads = []
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
with tf.variable_scope(tf.get_variable_scope()):
for i in xrange(2):
with tf.device('/gpu:%d' % i):
cost = build_model()
tf.get_variable_scope().reuse_variables()
grads = optimizer.compute_gradients(cost)
tower_grads.append(grads)
grads = average_gradients(tower_grads)
apply_gradient_op = optimizer.apply_gradients(grads)
train_op = apply_gradient_op
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
for step in range(training_steps):
image_batch, label_batch = mnist.train.next_batch(batch_size)
_, cost_print = sess.run([train_op, cost],
{x:image_batch,
y:label_batch})
if step % display_step == 0:
print("step=%04d" % (step+1)+ " cost=" + str(cost_print))
print("Optimization Finished!")
sess.close()
The print info looks like:
step=0001 cost=2.30258
step=0101 cost=2.30246
step=0201 cost=2.30128
step=0301 cost=2.30376
step=0401 cost=2.29817
step=0501 cost=2.2992
step=0601 cost=2.3104
step=0701 cost=2.29995
step=0801 cost=2.29802
step=0901 cost=2.30524
step=1001 cost=2.29673
step=1101 cost=2.30016
step=1201 cost=2.31057
step=1301 cost=2.29815
step=1401 cost=2.29669
step=1501 cost=2.30345
step=1601 cost=2.29811
step=1701 cost=2.30867
step=1801 cost=2.30757
step=1901 cost=2.29716
step=2001 cost=2.30394
The loss doesn't decrease. I don't know how to fix it.
By the way, GPU-Util is about 26% and 26%. How to increase the GPU-Util?
The problem is that,
I should use tf.constant_initializer(0.1) for the weights instead of tf.constant_initializer(0)