Spyder freezes on running session in tesorflow - python

I am trying to build a model that classifies images from belgium traffic datasets, but spyder freezes when i try to run the optimizer and train operation in sess.run[].
i have attached the code below.
graph = tf.Graph()
with graph.as_default():
images_X = tf.compat.v1.placeholder(tf.float32,shape = [None,32,32,3])
labels_X = tf.compat.v1.placeholder(tf.int32,shape = [None])
#biasInit = tf.constant_initializer(0.1, dtype=tf.float32)
# Initializer
biasInit = tf.initializers.GlorotUniform()
#conv layer 1 - num_filtewrs = 128, kernel size = [6,6]
conv_1 = Conv2D(filters = 128,kernel_size = [6,6],bias_initializer = biasInit)(images_X)
#batch normalization
bn_1 = BatchNormalization(center = True,scale = True)(conv_1)
#maxpoloing
pool_1 = MaxPooling2D(pool_size = (2,2))(bn_1)
#conv layer 2
conv_2 = Conv2D(filters = 256,kernel_size = [6,6] , strides = (2,2),
bias_initializer=biasInit)(pool_1)
#Batch normalization 2
bn_2 = BatchNormalization(center = True,scale = True)(conv_2)
pool_2 = MaxPooling2D(pool_size = (2,2))(bn_2)
#faltten
images_flat = Flatten()(pool_2)
#dense layer - units = 512
fc_1 = Dense(units = 512,activation = 'relu')(images_flat)
bn_3 = BatchNormalization(center = True,scale = True)(fc_1)
dropout = Dropout(0.25)(bn_3)
#logits will be of the size [None,62]
logits = Dense(units = 62,activation = 'relu')(dropout)
#converting the lofits - [None,62] to labels - [None]
predicted_labels = tf.argmax(logits, axis=1)
loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits = logits,
labels = labels_X))
update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
# Create an optimizer, which acts as the training op.
train = tf.compat.v1.train.AdamOptimizer(learning_rate=0.10).minimize(loss_op)
init_op = tf.compat.v1.global_variables_initializer()
print("images_flat: ", images_flat)
print("logits: ", logits)
print("loss: ", loss_op)
print("predicted_labels: ", predicted_labels)
# Create a session to run the graph we created.
session = tf.compat.v1.Session(graph=graph, config=tf.compat.v1.ConfigProto(log_device_placement=True))
session.run(init_op)
for i in range(10):
_, loss_value = session.run([train, loss_op], feed_dict={images_X: images_array, labels_X: labels_array})
print("Loss: ", loss_value)
when i run the program line by line, it runs well until the last for loop, when it reaches the last for loop the memory goes to 99% and the whole pc freezes.

Related

How to visualize mean edit distance in Tensorboard using Keras callback?

So far, I have been experimenting with Tensorflow and Keras. I took a code from image_ocr.py which allowed me to train printed text ocr. I want to see the training progress as it goes and have successfuly visualized the accuracy and loss of the training model. However, from what I have heard OCR RNN does not take accuracy as a validation but using mean edit distance instead to validate the accuracy of the words. In this case, I have been trying to get a variable called mean_ed and mean_norm_ed to be visualized in Tensorboard from class VizCallback. I have tried the method from this link but it still does not work. Can anyone help me with visualizing the mean edit distance variables? Here are the code snippets from my code:
class VizCallback(keras.callbacks.Callback):
def __init__(self, run_name, test_func, text_img_gen, num_display_words=6):
self.test_func = test_func
self.output_dir = os.path.join(
OUTPUT_DIR, run_name)
self.text_img_gen = text_img_gen
self.num_display_words = num_display_words
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
def on_train_begin(self, logs={}):
self.med = []
self.nmed = []
def show_edit_distance(self, num, logs={}):
num_left = num
mean_norm_ed = 0.0
mean_ed = 0.0
while num_left > 0:
word_batch = next(self.text_img_gen)[0]
num_proc = min(word_batch['the_input'].shape[0], num_left)
decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])
for j in range(num_proc):
edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
mean_ed += float(edit_dist)
mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
num_left -= num_proc
mean_norm_ed = mean_norm_ed / num
mean_ed = mean_ed / num
#Create scalar summaries for both mean edit distance and normalized mean edit distance
tf_med_ph = tf.placeholder(tf.float32,shape=None,name='med_summary')
tf_nmed_ph = tf.placeholder(tf.float32,shape=None,name='nmed_summary')
tf_med = tf.summary.scalar('med', tf_med_ph)
tf_nmed = tf.summary.scalar('nmed', tf_nmed_ph)
performance_summaries = tf.summary.merge([tf_med,tf_nmed])
#Create a session for displaying the summary
config = tf.ConfigProto(allow_soft_placement=True)
session = tf.InteractiveSession(config=config)
summ_writer = tf.summary.FileWriter(os.path.join('summaries','first'), session.graph)
# Execute the summaries defined above
summ = session.run(performance_summaries, feed_dict={tf_med_ph:mean_ed, tf_nmed_ph:mean_norm_ed})
# Write the obtained summaries to the file, so it can be displayed in the TensorBoard
summ_writer.add_summary(summ, epoch)
session.close()
print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f'
% (num, mean_ed, mean_norm_ed))
def on_epoch_end(self, epoch, logs={}):
self.model.save_weights(os.path.join(self.output_dir, 'weights%02d.h5' % (epoch)))
self.show_edit_distance(256)
word_batch = next(self.text_img_gen)[0]
res = decode_batch(self.test_func, word_batch['the_input'][0:self.num_display_words])
if word_batch['the_input'][0].shape[0] < 256:
cols = 2
else:
cols = 1
for i in range(self.num_display_words):
plt.subplot(self.num_display_words // cols, cols, i + 1)
if K.image_data_format() == 'channels_first':
the_input = word_batch['the_input'][i, 0, :, :]
else:
the_input = word_batch['the_input'][i, :, :, 0]
plt.imshow(the_input.T, cmap='Greys_r')
plt.xlabel('Truth = \'%s\'\nDecoded = \'%s\'' % (word_batch['source_str'][i], res[i]))
fig = plt.gcf()
fig.set_size_inches(10, 13)
plt.savefig(os.path.join(self.output_dir, 'e%02d.png' % (epoch)))
plt.close()
def train(run_name, start_epoch, stop_epoch, img_w):
# Input Parameters
img_h = 64
words_per_epoch = 16000
val_split = 0.2
val_words = int(words_per_epoch * (val_split))
# Network parameters
conv_filters = 16
kernel_size = (3, 3)
pool_size = 2
time_dense_size = 32
rnn_size = 512
minibatch_size = 32
if K.image_data_format() == 'channels_first':
input_shape = (1, img_w, img_h)
else:
input_shape = (img_w, img_h, 1)
fdir = os.path.dirname(get_file('wordlists.tgz',
origin='http://test.com/wordlist.tgz', untar=True))
img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
minibatch_size=minibatch_size,
img_w=img_w,
img_h=img_h,
downsample_factor=(pool_size ** 2),
val_split=words_per_epoch - val_words
)
act = 'relu'
input_data = Input(name='the_input', shape=input_shape, dtype='float32')
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv1')(input_data)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv2')(inner)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
# cuts down input size going into RNN:
inner = Dense(time_dense_size, activation=act, name='dense1')(inner)
# Two layers of bidirectional GRUs
# GRU seems to work as well, if not better than LSTM:
gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner)
gru1_merged = add([gru_1, gru_1b])
gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)
# transforms RNN output to character activations:
inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal',
name='dense2')(concatenate([gru_2, gru_2b]))
y_pred = Activation('softmax', name='softmax')(inner)
Model(inputs=input_data, outputs=y_pred).summary()
labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
# clipnorm seems to speeds up convergence
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)
#Make tensorboard instance
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
tbname="tensorboard-of-{}".format(int(time.time()))
tensorboard = keras.callbacks.TensorBoard(
log_dir="logs/{}".format(tbname),
histogram_freq=0,
write_images=True)
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd,
metrics=['accuracy'])
if start_epoch > 0:
weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
model.load_weights(weight_file)
# captures output of softmax so we can decode the output during visualization
test_func = K.function([input_data], [y_pred])
viz_cb = VizCallback(run_name, test_func, img_gen.next_val())
model.fit_generator(generator=img_gen.next_train(),
steps_per_epoch=(words_per_epoch - val_words) // minibatch_size,
epochs=stop_epoch,
validation_data=img_gen.next_val(),
validation_steps=val_words // minibatch_size,
callbacks=[tensorboard,viz_cb, img_gen],
initial_epoch=start_epoch)
Any help would be much appriciated. Thank you!
P.S. I am using Tensorflow 1.9.0 and Python 3.6.8
UPDATE
now it is just a matter of passing the variable performance_summaries from the VizCallbak class towards the metrics in the train function. Any help here?
You could modify show_edit_distance to add the summaries every time this function is being called:
def show_edit_distance(self, num, epoch):
...
summary = tf.Summary()
summary.value.add(tag='mean_ed', simple_value=mean_ed)
summ_writer.add_summary(summary, epoch)
summary = tf.Summary()
summary.value.add(tag='mean_norm_ed', simple_value=mean_norm_ed)
summ_writer.add_summary(summary, epoch)
...
Note that you will need an extra argument epoch:
def on_epoch_end(self, epoch, logs={}):
...
self.show_edit_distance(256, epoch)
...
The Tensorboard callback should automatically pick up these summaries, as they're being added to the GraphKeys.SUMMARIES collection.
NOTE: Unfortunately, I couldn't test the solution. Please let me know if there is something I am missing.

Shape incompatible in Keras

I have the following code. When I am trying to merge([user_latent, item_latent], mode = 'concat') I am getting
Shape (2, ?, ?) and () are incompatible. On searching, it says that it is tensorflow API issue but I am not sure how can I change it since, I am not using tf.concat().
def get_model(num_users, num_items, layers = [20,10], reg_layers=[0,0]):
assert len(layers) == len(reg_layers)
num_layer = len(layers) #Number of layers in the MLP
# Input variables
user_input = Input(shape=(1,), dtype='int32', name = 'user_input')
item_input = Input(shape=(1,), dtype='int32', name = 'item_input')
MLP_Embedding_User = Embedding(input_dim = num_users, output_dim = layers[0]/2, name = 'user_embedding',
init = init_normal, W_regularizer = l2(reg_layers[0]), input_length=1)
MLP_Embedding_Item = Embedding(input_dim = num_items, output_dim = layers[0]/2, name = 'item_embedding',
init = init_normal, W_regularizer = l2(reg_layers[0]), input_length=1)
# Crucial to flatten an embedding vector!
user_latent = Flatten()(MLP_Embedding_User(user_input))
item_latent = Flatten()(MLP_Embedding_Item(item_input))
# The 0-th layer is the concatenation of embedding layers
vector = merge([user_latent, item_latent], mode = 'concat')
# MLP layers
for idx in xrange(1, num_layer):
layer = Dense(layers[idx], W_regularizer= l2(reg_layers[idx]), activation='relu', name = 'layer%d' %idx)
vector = layer(vector)
# Final prediction layer
prediction = Dense(1, activation='sigmoid', init='lecun_uniform', name = 'prediction')(vector)
model = Model(input=[user_input, item_input],
output=prediction)
return model

TensorFlow tf.train.Saver() not working on tf.contrib.layers.fully_connected()

So I wrote this generalised TensorFlow code and want to save and restore models. But apparently the error is that there is no variables to save. I did everything as given in this official example. Ignore the __init__ method except the last line, since it only takes relevant parameters to train the model with, also there is no Syntax Errors. The error it produces is given below the code.
class Neural_Network(object):
def __init__(self, numberOfLayers, nodes, activations, learningRate,
optimiser = 'GradientDescent', regularizer = None,
dropout = 0.5, initializer = tf.contrib.layers.xavier_initializer()):
self.numberOfLayers = numberOfLayers
self.nodes = nodes
self.activations = activations
self.learningRate = learningRate
self.regularizer = regularizer
self.dropout = dropout
self.initializer = initializer
if(optimiser == 'GradientDescent'):
self.optimiser = tf.train.GradientDescentOptimizer(self.learningRate)
elif(optimiser == 'AdamOptimiser'):
self.optimiser = tf.train.AdamOptimizer(self.learningRate)
self.saver = tf.train.Saver()
def create_Neural_Net(self, numberOfFeatures):
self.numberOfFeatures = numberOfFeatures
self.X = tf.placeholder(dtype = tf.float32, shape = (None, self.numberOfFeatures), name = 'Input_Dataset')
#self.output = None
for i in range(0, self.numberOfLayers):
if(i == 0):
layer = tf.contrib.layers.fully_connected(self.X, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
elif(i == self.numberOfLayers-1):
self.output = tf.contrib.layers.fully_connected(layer, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
else:
layer = tf.contrib.layers.fully_connected(layer, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
def train_Neural_Net(self, dataset, labels, epochs):
entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = self.output, labels = labels, name = 'cross_entropy')
loss = tf.reduce_mean(entropy, name = 'loss')
hypothesis = tf.nn.softmax(self.output)
correct_preds = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
train_op = self.optimiser.minimize(loss)
self.loss=[]
self.accuracy = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(0, epochs):
_, l, acc = sess.run([train_op, loss, accuracy], feed_dict = {self.X:dataset})
print('Loss in epoch ' + str(i) + ' is: ' + str(l))
self.loss.append(l)
self.accuracy.append(acc)
self.saver.save(sess, './try.ckpt')
return self.loss, self.accuracy
And ran this code as:
nn = Neural_Network(2, [20,3], [tf.nn.relu, tf.nn.relu], 0.001, optimiser = 'AdamOptimiser')
nn.create_Neural_Net(4)
nn.train_Neural_Net(dataset, labels, 1000)
The error it gives is:
ValueError: No variables to save
So what is wrong in this code? And how can I fix it?

How do I correcty reshape my data in Tensorflow?

I have been trying to make the CNN in the Tensorflow CNN tutorial work with 3D Data. But I always get this one dimension error. I apologize in advance, because this is my first time asking a question on stackoverflow so this might look a little bit messy. Here is the code:
import numpy as np
import tensorflow as tf
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
IMG_SIZE_PX = 50
SLICE_COUNT = 20
n_classes = 2
keep_rate = 0.8
tf.logging.set_verbosity(tf.logging.INFO)
def neural_network(features, labels, mode):
input_layer = tf.reshape(features["x"],shape = [-1,IMG_SIZE_PX, IMG_SIZE_PX, SLICE_COUNT, 1])
conv1 = tf.layers.conv3d(inputs = input_layer, filters = 32, kernel_size = [1,2,2,2,1], strides = [1,2,2,2,1],
padding = "same", activation = tf.nn.relu)
pool1 = tf.layers.max_pooling3d(inputs = conv1, pool_size = [1,2,2,2,1], strides = [1,2,2,2,1], padding = "same")
conv2 = tf.layers.conv3d(inputs = pool1, filters = 64, kernel_size = [1,2,2,2,1], strides = [1,2,2,2,1],
padding = "same", activation = tf.nn.relu)
pool2 = tf.layers.max_pooling3d(inputs = conv2, pool_size = [1,2,2,2,1], strides = [1,2,2,2,1], padding = "same")
pool2_flat = tf.reshape(pool2, [-1, 50000])
dense = tf.layers.dense(inputs = pool2_flat, units = 1024, activation = tf.nn.relu)
dropout = tf.layers.dropout(inputs = dense, rate = 0.4, training = mode == tf.estimator.ModeKeys.TRAIN)
logits = tf.layers.dense(inputs = dropout, units = 2)
predictions = {
"classes" : tf.argmax(input = logits, axis = 1),
"probabilities" : tf.nn.softmax(logits, name = "softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode = mode, predictions = predictions)
onehot_labels = tf.one_hot(indices = tf.cast(labels, tf.int32), depth = 2)
loss = tf.losses.softmax_cross_entropy(onehot_labels = onehot_labels, logits = logits)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.001)
train_op = optimizer.minimize(loss = loss, global_step = tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode = mode, loss = loss, train_op = train_op)
eval_metric_ops = {"accuracy" : tf.metrics.accuracy(labels = labels, predictions = predictions["classes"])}
return tf.estimator.EstimatorSpec(mode = mode, loss = loss, eval_metric_ops = eval_metric_ops)
#main
def main(unused_argv):
training_data = train_data[0]
training_labels = train_data[1]
eval_data = train_data[0]
eval_labels = train_data[1]
lung_classifier = tf.estimator.Estimator(model_fn = neural_network,
model_dir = "SOME_DIR")
tensors_to_log = {"probabilities" : "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(tensors = tensors_to_log, every_n_iter = 50)
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x = {"x" : np.array(training_data)},
y = np.array(training_labels),
batch_size = 50,
num_epochs = None,
shuffle = True)
lung_classifier.train(input_fn = train_input_fn, steps = 20000, hooks = [logging_hook])
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x = {"x" : eval_data},
y = eval_labels,
num_epochs = 1,
shuffle = false)
eval_results = lung_classifier.evaluate(input_fn = eval_input_fn)
print(eval_results)
if __name__ == "__main__":
with tf.device("/gpu:0"):
tf.app.run()
I get this Error Message:
ValueError: Dimension size must be evenly divisible by 50000 but is 50 for
'Reshape' (op: 'Reshape') with input shapes: [50], [5] and with input
tensors computed as partial shapes: input[1] = [?,50,50,20,1].
The Error message is pretty long so I simply show you the origin of it.
input_layer = tf.reshape(features["x"],shape = [-1,IMG_SIZE_PX, IMG_SIZE_PX, SLICE_COUNT, 1])
It seems that your features["x"] has dimensions 50x5 and you are trying to give it a dimension of batch_size x 50 x 50 x 20 x 1 which is not possible.
Ensure that you are reading your data correctly by printing out its shape.

TensorFlow CNN on multiple GPUs

I am trying to parallelize my code to have my tensorflow model run on multiple GPUs. For some reason, the code I wrote to parallelize the training works for a standard deep neural net, but throws errors when using a convolutional neural net.
Here is my code to compute the average gradients:
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
This is my deep neural net architecture: (this works)
def neuralNet(data):
hl_1 = {'weights':tf.get_variable('Weights1',[TF_SHAPE,n_nodes_hl1],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases1',[n_nodes_hl1],initializer=tf.random_normal_initializer())}
hl_2 = {'weights':tf.get_variable('Weights2',[n_nodes_hl1, n_nodes_hl2],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases2',[n_nodes_hl2],initializer=tf.random_normal_initializer())}
hl_3 = {'weights':tf.get_variable('Weights3',[n_nodes_hl2, n_nodes_hl3],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases3',[n_nodes_hl3],initializer=tf.random_normal_initializer())}
hl_4 = {'weights':tf.get_variable('Weights4',[n_nodes_hl3, n_nodes_hl4],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases4',[n_nodes_hl4],initializer=tf.random_normal_initializer())}
hl_5 = {'weights':tf.get_variable('Weights5',[n_nodes_hl4, n_nodes_hl5],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases5',[n_nodes_hl5],initializer=tf.random_normal_initializer())}
output_layer = {'weights':tf.get_variable('Weights-outputlayer',[n_nodes_hl5, n_classes],initializer=tf.random_normal_initializer()),
'biases':tf.get_variable('Biases-outputlayer',[n_classes],initializer=tf.random_normal_initializer())}
l1 = tf.add(tf.matmul(data, hl_1['weights']), hl_1['biases'])
l1 = tf.nn.sigmoid(l1, name='op1')
l2 = tf.add(tf.matmul(l1, hl_2['weights']), hl_2['biases'])
l2 = tf.nn.sigmoid(l2, name='op2')
l3 = tf.add(tf.matmul(l2, hl_3['weights']), hl_3['biases'])
l3 = tf.nn.sigmoid(l3, name='op3')
l4 = tf.add(tf.matmul(l3, hl_4['weights']), hl_4['biases'])
l4 = tf.nn.sigmoid(l4, name='op4')
l5 = tf.add(tf.matmul(l4, hl_5['weights']), hl_5['biases'])
l5 = tf.nn.sigmoid(l5, name='op5')
dropout = tf.nn.dropout(l5,keep_prob, name='op6')
ol = tf.add(tf.matmul(dropout, output_layer['weights']), output_layer['biases'], name='op7')
return ol
This is my convnet: (this does not work)
def conv2d(x,W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="SAME")
def convNeuralNet(x):
weights = {'w_conv1':tf.get_variable('w_conv1',[7,7,1,2],initializer=tf.random_normal_initializer()),
'w_conv2':tf.get_variable('w_conv2',[7,7,2,4],initializer=tf.random_normal_initializer()),
'w_conv3':tf.get_variable('w_conv3',[7,7,4,8],initializer=tf.random_normal_initializer()),
'w_conv4':tf.get_variable('w_conv4',[7,7,8,16],initializer=tf.random_normal_initializer()),
'w_conv5':tf.get_variable('w_conv5',[7,7,16,32],initializer=tf.random_normal_initializer()),
'w_conv6':tf.get_variable('w_conv6',[7,7,32,64],initializer=tf.random_normal_initializer()),
'w_conv7':tf.get_variable('w_conv7',[7,7,64,128],initializer=tf.random_normal_initializer()),
'w_conv8':tf.get_variable('w_conv8',[7,7,128,256],initializer=tf.random_normal_initializer()),
'w_conv9':tf.get_variable('w_conv9',[7,7,256,512],initializer=tf.random_normal_initializer()),
'w_fc1':tf.get_variable('w_fc1',[512,1024],initializer=tf.random_normal_initializer()),
'w_fc2':tf.get_variable('w_fc2',[1024,2048],initializer=tf.random_normal_initializer()),
'w_fc3':tf.get_variable('w_fc3',[2048,4096],initializer=tf.random_normal_initializer()),
'out':tf.get_variable('w_out',[4096,n_classes],initializer=tf.random_normal_initializer())}
biases = {'b_conv1':tf.get_variable('b_conv1',[2],initializer=tf.random_normal_initializer()),
'b_conv2':tf.get_variable('b_conv2',[4],initializer=tf.random_normal_initializer()),
'b_conv3':tf.get_variable('b_conv3',[8],initializer=tf.random_normal_initializer()),
'b_conv4':tf.get_variable('b_conv4',[16],initializer=tf.random_normal_initializer()),
'b_conv5':tf.get_variable('b_conv5',[32],initializer=tf.random_normal_initializer()),
'b_conv6':tf.get_variable('b_conv6',[64],initializer=tf.random_normal_initializer()),
'b_conv7':tf.get_variable('b_conv7',[128],initializer=tf.random_normal_initializer()),
'b_conv8':tf.get_variable('b_conv8',[256],initializer=tf.random_normal_initializer()),
'b_conv9':tf.get_variable('b_conv9',[512],initializer=tf.random_normal_initializer()),
'b_fc1':tf.get_variable('b_fc1',[1024],initializer=tf.random_normal_initializer()),
'b_fc2':tf.get_variable('b_fc2',[2048],initializer=tf.random_normal_initializer()),
'b_fc3':tf.get_variable('b_fc3',[4096],initializer=tf.random_normal_initializer()),
'out':tf.get_variable('b_out',[n_classes],initializer=tf.random_normal_initializer())}
x = tf.reshape(x,shape=[-1,7,len_puzzle,1])
conv1 = conv2d(x, weights['w_conv1'])
conv1 = maxpool2d(conv1)
conv2 = conv2d(conv1, weights['w_conv2'])
conv2 = maxpool2d(conv2)
conv3 = conv2d(conv2, weights['w_conv3'])
conv3 = maxpool2d(conv3)
conv4 = conv2d(conv3, weights['w_conv4'])
conv4 = maxpool2d(conv4)
conv5 = conv2d(conv4, weights['w_conv5'])
conv5 = maxpool2d(conv5)
conv6 = conv2d(conv5, weights['w_conv6'])
conv6 = maxpool2d(conv6)
conv7 = conv2d(conv6, weights['w_conv7'])
conv7 = maxpool2d(conv7)
conv8 = conv2d(conv7, weights['w_conv8'])
conv8 = maxpool2d(conv8)
conv9 = conv2d(conv8, weights['w_conv9'])
conv9 = maxpool2d(conv9)
fc1 = tf.reshape(conv9, [-1,512])
fc1 = tf.nn.sigmoid(tf.add(tf.matmul(fc1,weights['w_fc1']),biases['b_fc1']))
fc2 = tf.nn.sigmoid(tf.add(tf.matmul(fc1,weights['w_fc2']),biases['b_fc2']))
fc3 = tf.nn.sigmoid(tf.add(tf.matmul(fc2,weights['w_fc3']),biases['b_fc3']))
last = tf.nn.dropout(fc3,keep_prob)
output = tf.add(tf.matmul(last, weights['out']), biases['out'], name='op7')
return output
This is the code which runs the session:
def train(x):
tower_grads = []
opt = tf.train.AdamOptimizer(learning_rate)
for i in xrange(2):
with tf.device('/gpu:%d' % i):
with tf.variable_scope('NN',reuse=i>0):
prediction = convNeuralNet(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
tf.summary.scalar('cross_entropy',cost)
grads = opt.compute_gradients(cost)
tower_grads.append(grads)
print grads
print len(grads)
#scope.reuse_variables()
grads = average_gradients(tower_grads)
apply_gradient_op = opt.apply_gradients(grads)
train_op = tf.group(apply_gradient_op)
correct = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct,'float'))
tf.summary.scalar('accuracy',accuracy)
num_epochs = ne
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess:
saver = tf.train.Saver()
# UNCOMMENT THIS WHEN RESTARTING FROM Checkpoint
#saver.restore(sess, tf.train.latest_checkpoint(os.getcwd()+'/models/base/.'))
sess.run(tf.global_variables_initializer())
merged_summary = tf.summary.merge_all()
for epoch in range(num_epochs):
epoch_loss = 0
for i in range(int(real_X_9.shape[0])/batch_size):#mnist.train.num_examples/batch_size)): # X.shape[0]
randidx = np.random.choice(real_X_9.shape[0], batch_size, replace=False)
epoch_x,epoch_y = real_X_9[randidx,:],real_y_9[randidx,:] #mnist.train.next_batch(batch_size) # X,y
j,c = sess.run([train_op,cost],feed_dict={x:epoch_x,y:epoch_y,keep_prob:TRAIN_KEEP_PROB})
if i == 0:
[ta] = sess.run([accuracy],feed_dict={x:epoch_x,y:epoch_y,keep_prob:TRAIN_KEEP_PROB})
print 'Train Accuracy', ta
epoch_loss += c
print '\n','Epoch', epoch + 1, 'completed out of', num_epochs, '\nLoss:',epoch_loss
#saver.save(sess, os.getcwd()+'/models/base/baseDNN7')
#saver.export_meta_graph(os.getcwd()+'/models/base/baseDNN7.meta')
print '\n','Train Accuracy', accuracy.eval(feed_dict={x:real_X_9, y:real_y_9, keep_prob:TRAIN_KEEP_PROB})
print '\n','Test Accuracy', accuracy.eval(feed_dict={x:test_real_X, y:test_real_y, keep_prob:1.0}) #X, y #mnist.test.images, mnist.test.labels
train(x)
This is the error:
Traceback (most recent call last):
File "CNN_gpu.py", line 393, in <module>
train(x)
File "CNN_gpu.py", line 311, in train
grads = average_gradients(tower_grads)
expanded_g = tf.expand_dims(g, 0)
File "/share/sw/free/tensorflow.1/1.1.0/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.py", line 170, in expand_dims
return gen_array_ops._expand_dims(input, axis, name)
File "/share/sw/free/tensorflow.1/1.1.0/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 900, in _expand_dims
result = _op_def_lib.apply_op("ExpandDims", input=input, dim=dim, name=name)
File "/share/sw/free/tensorflow.1/1.1.0/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 509, in apply_op
(input_name, err))
ValueError: Tried to convert 'input' to a tensor and failed. Error: None values not supported.
I'm really confused. Parallelization across multiple GPUs should work regardless of the type of neural net being used.
Any help here would be appreciated.

Categories

Resources