I have implemented a CNN for detecting human activity using accelrometer data, my model is working really fine but when i visualize my grapgh on tensorboard, everythin seems to be diconnected. Right now i am not using Namescopes but even without it grpagh should make some sense right?
EDIT After implementing answer given by #user1735003 , this is the output. What i still don't understand is why i'm getting all the nodes at left
What i have implemented is: i have two convolution layer and two max-pooling layers and on top of that i have two hidden layers with 1024 and 512 neurons.
so Here is my code:
#Weights
def init_weights(shape):
init_random_dist = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(init_random_dist)
#Bias
def init_bias(shape):
init_bias = tf.constant(0.1,shape=shape)
return tf.Variable(init_bias)
def conv1d(x,weights):
#x is input accelration data and W is corresponding weight
return tf.nn.conv1d(value=x,filters = weights,stride=1,padding='VALID')
def convolution_layer(input_x,shape):
w1 = init_weights(shape)
b = init_bias([shape[2]])
return tf.nn.relu(conv1d(input_x,weights=w1)+b)
def normal_full_layer(input_layer,size):
input_size = int(input_layer.get_shape()[1])
W = init_weights([input_size, size])
b = init_bias([size])
return tf.matmul(input_layer, W) +b
x = tf.placeholder(tf.float32,shape=[None ,window_size,3]) #input tensor with 3 input channels
y = tf.placeholder(tf.float32,shape=[None,6]) #Labels
con_layer_1 = convolution_layer(x,shape=[4,3,32])#filter of shape [filter_width, in_channels, out_channels]
max_pool_1=tf.layers.max_pooling1d(inputs=con_layer_1,pool_size=2,strides=2,padding='Valid')
con_layer_2 = convolution_layer(max_pool_1,shape=[4,32,64])
max_pool_2 = tf.layers.max_pooling1d(inputs=con_layer_2,pool_size=2,strides=2,padding='Valid')
flat = tf.reshape(max_pool_2,[-1,max_pool_2.get_shape()[1]*max_pool_2.get_shape()[2]])
fully_conected = tf.nn.relu(normal_full_layer(flat,1024))
second_hidden_layer = tf.nn.relu(normal_full_layer(fully_conected,512))
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(second_hidden_layer,keep_prob=hold_prob)
y_pred = normal_full_layer(full_one_dropout,6)
pred_softmax = tf.nn.softmax(y_pred)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_pred))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train = optimizer.minimize(cross_entropy)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
filename="./summary_log11/run"
summary_writer = tf.summary.FileWriter(filename, graph_def=sess.graph_def)
for i in range(5000):
batch_x,batch_y = next_batch(100,X_train,y_train)
sess.run(train, feed_dict={x: batch_x, y: batch_y, hold_prob: 0.5})
# PRINT OUT A MESSAGE EVERY 100 STEPS
if i%100 == 0:
print('Currently on step {}'.format(i))
print('Accuracy is:')
# Test the Train Model
matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y,1))
acc = tf.reduce_mean(tf.cast(matches,tf.float32))
print(sess.run(acc,feed_dict={x:X_test,y:y_test,hold_prob:1.0}))
print('\n')
Try organizing your nodes into scopes. That will help Tensorboard to figure out your graph hierarchy. For example,
with tf.variable_scope('input'):
x = tf.placeholder(tf.float32,shape=[None ,window_size,3]) #input tensor with 3 input channels
y = tf.placeholder(tf.float32,shape=[None,6]) #Labels
with tf.variable_scope('net'):
con_layer_1 = convolution_layer(x,shape=[4,3,32])#filter of shape [filter_width, in_channels, out_channels]
max_pool_1=tf.layers.max_pooling1d(inputs=con_layer_1,pool_size=2,strides=2,padding='Valid')
con_layer_2 = convolution_layer(max_pool_1,shape=[4,32,64])
max_pool_2 = tf.layers.max_pooling1d(inputs=con_layer_2,pool_size=2,strides=2,padding='Valid')
flat = tf.reshape(max_pool_2,[-1,max_pool_2.get_shape()[1]*max_pool_2.get_shape()[2]])
fully_conected = tf.nn.relu(normal_full_layer(flat,1024))
second_hidden_layer = tf.nn.relu(normal_full_layer(fully_conected,512))
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(second_hidden_layer,keep_prob=hold_prob)
y_pred = normal_full_layer(full_one_dropout,6)
pred_softmax = tf.nn.softmax(y_pred)
with tf.variable_scope('loss'):
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_pred))
with tf.variable_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train = optimizer.minimize(cross_entropy)
Since you didn't explicitly name your tf operations it was done automatically by tensorflow, e.g. ReLu operators were named ReLu_1, ReLu_2, ... . According to tensorboard documentation:
One last structural simplification is series collapsing. Sequential motifs--that is, nodes whose names differ by a number at the end and have isomorphic structures--are collapsed into a single stack of nodes, as shown below. For networks with long sequences, this greatly simplifies the view.
As you can see at the right side of your graph, all add_[0-7], MatMul_[0-5] and Relu_[0-5] nodes were grouped together because they have similar names, this doesn't mean that nodes are disconnected in your graph, it's just the tensorboard's node grouping policy.
If you want to avoid this then give your operations the names that are more different than just by a number at the end. Or use tf.name_scope() as you mentioned, e.g.:
with tf.name_scope("conv1"):
con_layer_1 = convolution_layer(x,shape=[4,3,32])
max_pool_1=tf.layers.max_pooling1d(inputs=con_layer_1,pool_size=2,strides=2,padding='Valid')
with tf.name_scope("conv2"):
con_layer_2 = convolution_layer(max_pool_1,shape=[4,32,64])
max_pool_2 = tf.layers.max_pooling1d(inputs=con_layer_2,pool_size=2,strides=2,padding='Valid')
# etc.
Related
I am new to tensorflow and more advanced machine learning, so I tried to get a better grasp of RNNs by implementing one by hand instead of using tf.contrib.rnn.RNNCell. My first problem was that I needed to unroll the net for backpropogation so I looped through my sequence and I needed to keep consistent weights and biases, so I couldn't reinitialize a dense layer with tf.layers.dense each time, but I also needed to have my layer connected to the current timestep of my sequence and I couldn't find a way to change what a dense layer was connected to. To work around this I tried to implement my own version of tf.layers.dense, and this worked fine until I got the error: NotImplementedError("Trying to update a Tensor " ...) when I tried to optimize my custom dense layers.
My code:
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
import random
# -----------------
# WORD PARAMETERS
# -----------------
target_string = ['Hello ','Hello ','World ','World ', '!']
number_input_words = 1
# --------------------------
# TRAINING HYPERPARAMETERS
# --------------------------
training_steps = 4000
batch_size = 9
learning_rate = 0.01
display_step = 150
hidden_cells = 20
# ----------------------
# PREPARE DATA AS DICT
# ----------------------
# TODO AUTOMATICALLY CREATE DICT
dictionary = {'Hello ': 0, 'World ': 1, '!': 2}
reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
vocab_size = len(dictionary)
# ------------
# LSTM MODEL
# ------------
class LSTM:
def __init__(self, sequence_length, number_input_words, hidden_cells, mem_size_x, mem_size_y, learning_rate):
self.sequence = tf.placeholder(tf.float32, (sequence_length, vocab_size), 'sequence')
self.memory = tf.zeros([mem_size_x, mem_size_y])
# sequence_length = self.sequence.shape[0]
units = [vocab_size, 5,4,2,6, vocab_size]
weights = [tf.random_uniform((units[i-1], units[i])) for i in range(len(units))[1:]]
biases = [tf.random_uniform((1, units[i])) for i in range(len(units))[1:]]
self.total_loss = 0
self.outputs = []
for word in range(sequence_length-1):
sequence_w = tf.reshape(self.sequence[word], [1, vocab_size])
layers = []
for i in range(len(weights)):
if i == 0:
layers.append(tf.matmul(sequence_w, weights[0]) + biases[0])
else:
layers.append(tf.matmul(layers[i-1], weights[i]) + biases[i])
percentages = tf.nn.softmax(logits=layers[-1])
self.outputs.append(percentages)
self.total_loss += tf.losses.absolute_difference(tf.reshape(self.sequence[word+1], (1, vocab_size)), tf.reshape(percentages, (1, vocab_size)))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
self.train_operation = optimizer.minimize(loss=self.total_loss, var_list=weights+biases, global_step=tf.train.get_global_step())
lstm = LSTM(len(target_string), number_input_words, hidden_cells, 10, 5, learning_rate)
# ---------------
# START SESSION
# ---------------
with tf.Session() as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
sequence = []
for i in range(len(target_string)):
x = [0]*vocab_size
x[dictionary[target_string[i]]] = 1
sequence.append(x)
print(sequence)
for x in range(1000):
sess.run(lstm.train_operation, feed_dict={lstm.sequence: sequence})
prediction, loss = sess.run((lstm.outputs, lstm.total_loss), feed_dict= {lstm.sequence: sequence})
print(prediction)
print(loss)
Any answers that tell me either how to either connect tf.layers.dense to different variables each time or tell me how to get around my NotImplementedError would be greatly appreciated. I apologize if this question is lengthy or just badly worded, i'm still new to stackoverflow.
EDIT:
I've updated the LSTM class part of my code to:
(Inside def init)
self.sequence = [tf.placeholder(tf.float32, (batch_size, vocab_size), 'sequence') for _ in range(sequence_length-1)]
self.total_loss = 0
self.outputs = []
rnn_cell = rnn.BasicLSTMCell(hidden_cells)
h = tf.zeros((batch_size, hidden_cells))
for i in range(sequence_length-1):
current_sequence = self.sequence[i]
h = rnn_cell(current_sequence, h)
self.outputs.append(h)
But I still get an error on the line: h = rnn_cell(current_sequence, h) about not being able to iterate over tensors. I'm not trying to iterate over any tensors, and if I am I don't mean to.
So there's a standard way of approaching this issue (this is the best approach I know from my knowledge) Instead of trying to create a new list of dense layers. Do the following. Before that lets assume your hidden layer size is h_dim and number of steps to unroll is num_unroll and batch size batch_size
In a for loop, you calculate the output of the RNNCell for each unrolled input
h = tf.zeros(...)
outputs= []
for ui in range(num_unroll):
out, state = rnn_cell(x[ui],state)
outputs.append(out)
Now concat all the outputs to a single tensor of size, [batch_size*num_unroll, h_dim]
Send this through a single dense layer of size [h_dim, num_classes]
logits = tf.matmul(tf.concat(outputs,...), w) + b
predictions = tf.nn.softmax(logits)
You have the logits for all the unrolled inputs now. Now it's just a matter of reshaping the tensor to a [batch_size, num_unroll, num_classes] tensor.
Edited (Feeding in Data): The data will be presented in the form of a list of num_unroll many placeholders. So,
x = [tf.placeholder(shape=[batch_size,3]...) for ui in range(num_unroll)]
Now say you have data like below,
Hello world bye
Bye hello world
Here batch size is 2, sequence length is 3. Once converted to one hot encoding, you're data looks like below (shape [time_steps, batch_size, 3].
data = [ [ [1,0,0], [0,0,1] ], [ [0,1,0], [1,0,0] ], [ [0,0,1], [0,1,0] ] ]
Now feed data in, in the following format.
feed_dict = {}
for ui in range(3):
feed_dict[x[ui]] = data[ui]
I am trying to create a simple 3D U-net for image segmentation, just to learn how to use the layers. Therefore I do a 3D convolution with stride 2 and then a transpose deconvolution to get back the same image size. I am also overfitting to a small set (test set) just to see if my network is learning.
I created the same net in Keras and it works just fine. Now I want to create in tensorflow but I been having trouble with it.
The cost changes slightly but no matter what I do (reduce learning rate, add more epochs, add more layers, change batch size...) the output is always the same. I believe the net is not updating the weights. I am sure I am doing something wrong but I can find what it is. Any help would be greatly appreciate it.
Here is my code:
def forward_propagation(X):
if ( mode == 'train'): print(" --------- Net --------- ")
# Convolutional Layer 1
with tf.variable_scope('CONV1'):
Z1 = tf.layers.conv3d(X, filters = 16, kernel =[3,3,3], strides = [ 2, 2, 2], padding='SAME', name = 'S2/conv3d')
A1 = tf.nn.relu(Z1, name = 'S2/ReLU')
if ( mode == 'train'): print("Convolutional Layer 1 S2 " + str(A1.get_shape()))
# DEConvolutional Layer 1
with tf.variable_scope('DeCONV1'):
output_deconv1 = tf.stack([X.get_shape()[0] , X.get_shape()[1], X.get_shape()[2], X.get_shape()[3], 1])
dZ1 = tf.nn.conv3d_transpose(A1, filters = 1, kernel =[3,3,3], strides = [2, 2, 2], padding='SAME', name = 'S2/conv3d_transpose')
dA1 = tf.nn.relu(dZ1, name = 'S2/ReLU')
if ( mode == 'train'): print("Deconvolutional Layer 1 S1 " + str(dA1.get_shape()))
return dA1
def compute_cost(output, target, method = 'dice_hard_coe'):
with tf.variable_scope('COST'):
if (method == 'sigmoid_cross_entropy') :
# Make them vectors
output = tf.reshape( output, [-1, output.get_shape().as_list()[0]] )
target = tf.reshape( target, [-1, target.get_shape().as_list()[0]] )
loss = tf.nn.sigmoid_cross_entropy_with_logits(logits = output, labels = target)
cost = tf.reduce_mean(loss)
return cost
and the main function for the model:
def model(X_h5, Y_h5, learning_rate = 0.009,
num_epochs = 100, minibatch_size = 64, print_cost = True):
ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables
#tf.set_random_seed(1) # to keep results consistent (tensorflow seed)
#seed = 3 # to keep results consistent (numpy seed)
(m, n_D, n_H, n_W, num_channels) = X_h5["test_data"].shape #TTT
num_labels = Y_h5["test_mask"].shape[4] #TTT
img_size = Y_h5["test_mask"].shape[1] #TTT
costs = [] # To keep track of the cost
accuracies = [] # To keep track of the accuracy
# Create Placeholders of the correct shape
X, Y = create_placeholders(n_H, n_W, n_D, minibatch_size)
# Forward propagation: Build the forward propagation in the tensorflow graph
nn_output = forward_propagation(X)
prediction = tf.nn.sigmoid(nn_output)
# Cost function: Add cost function to tensorflow graph
cost_method = 'sigmoid_cross_entropy'
cost = compute_cost(nn_output, Y, cost_method)
# Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer that minimizes the cost.
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
# Initialize all the variables globally
init = tf.global_variables_initializer()
# Start the session to compute the tensorflow graph
with tf.Session() as sess:
print('------ Training ------')
# Run the initialization
tf.local_variables_initializer().run(session=sess)
sess.run(init)
# Do the training loop
for i in range(num_epochs*m):
# ----- TRAIN -------
current_epoch = i//m
patient_start = i-(current_epoch * m)
patient_end = patient_start + minibatch_size
current_X_train = np.zeros((minibatch_size, n_D, n_H, n_W,num_channels))
current_X_train[:,:,:,:,:] = np.array(X_h5["test_data"][patient_start:patient_end,:,:,:,:]) #TTT
current_X_train = np.nan_to_num(current_X_train) # make nan zero
current_Y_train = np.zeros((minibatch_size, n_D, n_H, n_W, num_labels))
current_Y_train[:,:,:,:,:] = np.array(Y_h5["test_mask"][patient_start:patient_end,:,:,:,:]) #TTT
current_Y_train = np.nan_to_num(current_Y_train) # make nan zero
feed_dict = {X: current_X_train, Y: current_Y_train}
_ , temp_cost = sess.run([optimizer, cost], feed_dict=feed_dict)
# ----- TEST -------
# Print the cost every 1/5 epoch
if ((i % (num_epochs*m/5) )== 0):
# Calculate the predictions
test_predictions = np.zeros(Y_h5["test_mask"].shape)
for j in range(0, X_h5["test_data"].shape[0], minibatch_size):
patient_start = j
patient_end = patient_start + minibatch_size
current_X_test = np.zeros((minibatch_size, n_D, n_H, n_W, num_channels))
current_X_test[:,:,:,:,:] = np.array(X_h5["test_data"][patient_start:patient_end,:,:,:,:])
current_X_test = np.nan_to_num(current_X_test) # make nan zero
current_Y_test = np.zeros((minibatch_size, n_D, n_H, n_W, num_labels))
current_Y_test[:,:,:,:,:] = np.array(Y_h5["test_mask"][patient_start:patient_end,:,:,:,:])
current_Y_test = np.nan_to_num(current_Y_test) # make nan zero
feed_dict = {X: current_X_test, Y: current_Y_test}
_, current_prediction = sess.run([cost, prediction], feed_dict=feed_dict)
test_predictions[j:j + minibatch_size,:,:,:,:] = current_prediction
costs.append(temp_cost)
print ("[" + str(current_epoch) + "|" + str(num_epochs) + "] " + "Cost : " + str(costs[-1]))
display_progress(X_h5["test_data"], Y_h5["test_mask"], test_predictions, 5, n_H, n_W)
# plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('epochs')
plt.show()
return
I call the model with:
model(hdf5_data_file, hdf5_mask_file, num_epochs = 500, minibatch_size = 1, learning_rate = 1e-3)
These are the results that I am currently getting:
Edit:
I have tried reducing the learning rate and it doesn't help. I also tried using tensorboard debug and the weights are not being updated:
I am not sure why this is happening.
I Created the same simple model in keras and it works fine. I am not sure what I am doing wrong in tensorflow.
Not sure if you are still looking for help, as I am answering this question half a year later your posted date. :) I've listed my observations and also some suggestions for you to try below. It my primary observation is right... then you probably just need a coffee break / a night of good sleep.
primary observation:
tf.reshape( output, [-1, output.get_shape().as_list()[0]] ) seems wrong. If you prefer to flatten the vector, it should be something like tf.reshape(output,[-1,np.prod(image_shape_list)]).
other observations:
With such a shallow network, I doubt the network have enough spatial resolution to differentiate tumor voxels from non-tumor voxels. Can you show the keras implementation and the performance compared to a pure tf implementation? I would probably go with 2+ layers, let's .
say with 3 layers, with a stride of 2 per layer, and an input image width of 256, you will end with a width of 32 at your deepest encoder layer. (If you have a limited GPU memory, downsample the input image.)
if changing the loss computation does not work, as #bremen_matt mentioned, reduce LR to say maybe 1e-5.
after the basic architecture tweaks and you "feel" that the network is sort of learning and not stuck, try augmenting the training data, add dropout, batch norm during training, and then maybe fancy up your loss by adding a discriminator.
Getting to know Tensorflow, I built a toy network for classification. It consists of 15 input nodes for features identical to the one-hot encoding of the corresponding class label (with indexing beginning at 1) - so the data to be loaded from an input CSV may look like this:
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2
...
0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,15
The network has only one hidden layer and an output layer, the latter containing probabilities for a given class. Here's my problem: during training the network assings a growing probability for whatever was fed in as the very first input.
Here are the relevant lines of code (some lines are omitted):
# number_of_p : number of samples
# number_of_a : number of attributes (features) -> 15
# number_of_s : number of styles (labels) -> 15
# function for generating hidden layers
# nodes is a list of nodes in each layer (len(nodes) = number of hidden layers)
def hidden_generation(nodes):
hidden_nodes = [number_of_a] + nodes + [number_of_s]
number_of_layers = len(hidden_nodes) - 1
print(hidden_nodes)
hidden_layer = list()
for i in range (0,number_of_layers):
hidden_layer.append(tf.zeros([hidden_nodes[i],batch_size]))
hidden_weights = list()
for i in range (0,number_of_layers):
hidden_weights.append(tf.Variable(tf.random_normal([hidden_nodes[i+1], hidden_nodes[i]])))
hidden_biases = list()
for i in range (0,number_of_layers):
hidden_biases.append(tf.Variable(tf.zeros([hidden_nodes[i+1],batch_size])))
return hidden_layer, hidden_weights, hidden_biases
#loss function
def loss(labels, logits):
cross_entropy = tf.losses.softmax_cross_entropy(
onehot_labels = labels, logits = logits)
return tf.reduce_mean(cross_entropy, name = 'xentropy_mean')
hidden_layer, hidden_weights, hidden_biases = hidden_generation(hidden_layers)
with tf.Session() as training_sess:
training_sess.run(tf.global_variables_initializer())
training_sess.run(a_iterator.initializer, feed_dict = {a_placeholder_feed: training_set.data})
current_a = training_sess.run(next_a)
training_sess.run(s_iterator.initializer, feed_dict = {s_placeholder_feed: training_set.target})
current_s = training_sess.run(next_s)
s_one_hot = training_sess.run(tf.one_hot((current_s - 1), number_of_s))
for i in range (1,len(hidden_layers)+1):
hidden_layer[i] = tf.tanh(tf.matmul(hidden_weights[i-1], (hidden_layer[i-1])) + hidden_biases[i-1])
output = tf.nn.softmax(tf.transpose(tf.matmul(hidden_weights[-1],hidden_layer[-1]) + hidden_biases[-1]))
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.1)
# using the AdamOptimizer does not help, nor does choosing a much bigger and smaller learning rate
train = optimizer.minimize(loss(s_one_hot, output))
training_sess.run(train)
for i in range (0, (number_of_p)):
current_a = training_sess.run(next_a)
current_s = training_sess.run(next_s)
s_one_hot = training_sess.run(tf.transpose(tf.one_hot((current_s - 1), number_of_s)))
# (no idea why I have to declare those twice for the datastream to move)
training_sess.run(train)
I assume the loss function is being declared at the wrong place and always references the same vectors. However, replacing the loss function did not help me by now.
I will gladly provide the rest of the code if anyone is kind enough to help me.
EDIT: I've already discovered and fixed one major (and dumb) mistake: weights go before values node values in tf.matmul.
You do not want to be declaring the training op over and over again. That is unnecessary and like you pointed out is slower. You are not feeding your current_a into the neural net. So you are not going to be getting new outputs, also how you are using iterators isn't correct which could also be the cause of the problem.
with tf.Session() as training_sess:
training_sess.run(tf.global_variables_initializer())
training_sess.run(a_iterator.initializer, feed_dict = {a_placeholder_feed: training_set.data})
current_a = training_sess.run(next_a)
training_sess.run(s_iterator.initializer, feed_dict = {s_placeholder_feed: training_set.target})
current_s = training_sess.run(next_s)
s_one_hot = training_sess.run(tf.one_hot((current_s - 1), number_of_s))
for i in range (1,len(hidden_layers)+1):
hidden_layer[i] = tf.tanh(tf.matmul(hidden_weights[i-1], (hidden_layer[i-1])) + hidden_biases[i-1])
output = tf.nn.softmax(tf.transpose(tf.matmul(hidden_weights[-1],hidden_layer[-1]) + hidden_biases[-1]))
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.1)
# using the AdamOptimizer does not help, nor does choosing a much bigger and smaller learning rate
train = optimizer.minimize(loss(s_one_hot, output))
training_sess.run(train)
for i in range (0, (number_of_p)):
current_a = training_sess.run(next_a)
current_s = training_sess.run(next_s)
s_one_hot = training_sess.run(tf.transpose(tf.one_hot((current_s - 1), number_of_s)))
# (no idea why I have to declare those twice for the datastream to move)
training_sess.run(train)
Here is some pseudocode to help you get the correct data flow. I would do the one hot encoding prior to this just to make things easier for loading the data during training.
train_dataset = tf.data.Dataset.from_tensor_slices((inputs, targets))
train_dataset = train_dataset.batch(batch_size)
train_dataset = train_dataset.repeat(num_epochs)
iterator = train_dataset.make_one_shot_iterator()
next_inputs, next_targets = iterator.get_next()
# Define Training procedure
global_step = tf.Variable(0, name="global_step", trainable=False)
loss = Neural_net_function(next_inputs, next_targets)
optimizer = tf.train.AdamOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
with tf.Session() as training_sess:
for i in range(number_of_training_samples * num_epochs):
taining_sess.run(train_op)
Solved it! Backpropagation works properly when the training procedure is redeclared for every new dataset.
for i in range (0, (number_of_p)):
current_a = training_sess.run(next_a)
current_s = training_sess.run(next_s)
s_one_hot = training_sess.run(tf.transpose(tf.one_hot((current_s - 1), number_of_s)))
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.1)
train = optimizer.minimize(loss(s_one_hot, output))
training_sess.run(train)
...makes training considerably slower, but it works.
This is the main part of my code.
I'm confused on function shuffle_batch and feed_dict.
In my code below, the features and labels I put into the function are "list".(I also tried "array" before.But it seems doesn't matter.)
What I want to do is make my testing data(6144,26) and training data(1024,13) into batch:(100,26) and (100,13),then set them as the feed_dict for the placeholders.
My questions are:
1.The outputs of the function tf.train.batch_shuffle are Tensors.But I can not put tensors in the feed_dict,right?
2.When I compiled the last two rows,error says,got shape [6144, 26], but wanted [6144] .I know it may be a dimension error,but how can I fix it.
Thanks a lot.
import tensorflow as tf
import scipy.io as sio
#import signal matfile
#[('label', (8192, 13), 'double'), ('clipped_DMT', (8192, 26), 'double')]
file = sio.loadmat('DMTsignal.mat')
#get array(clipped_DMT)
data_cDMT = file['clipped_DMT']
#get array(label)
data_label = file['label']
with tf.variable_scope('split_cDMT'):
cDMT_test_list = []
cDMT_training_list = []
for i in range(0,8192):
if i % 4 == 0:
cDMT_test_list.append(data_cDMT[i])
else:
cDMT_training_list.append(data_cDMT[i])
with tf.variable_scope('split_label'):
label_test_list = []
label_training_list = []
for i in range(0,8192):
if i % 4 == 0:
label_test_list.append(data_label[i])
else:
label_training_list.append(data_label[i])
#set parameters
n_features = cDMT_training.shape[1]
n_labels = label_training.shape[1]
learning_rate = 0.8
hidden_1 = 256
hidden_2 = 128
training_steps = 1000
BATCH_SIZE = 100
#set Graph input
with tf.variable_scope('cDMT_Inputs'):
X = tf.placeholder(tf.float32,[None, n_features],name = 'Input_Data')
with tf.variable_scope('labels_Inputs'):
Y = tf.placeholder(tf.float32,[None, n_labels],name = 'Label_Data')
#set variables
#Initialize both W and b as tensors full of zeros
with tf.variable_scope('layerWeights'):
h1 = tf.Variable(tf.random_normal([n_features,hidden_1]))
h2 = tf.Variable(tf.random_normal([hidden_1,hidden_2]))
w_out = tf.Variable(tf.random_normal([hidden_2,n_labels]))
with tf.variable_scope('layerBias'):
b1 = tf.Variable(tf.random_normal([hidden_1]))
b2 = tf.Variable(tf.random_normal([hidden_2]))
b_out = tf.Variable(tf.random_normal([n_labels]))
#create model
def neural_net(x):
layer_1 = tf.add(tf.matmul(x,h1),b1)
layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1,h2),b2))
out_layer = tf.add(tf.matmul(layer_2,w_out),b_out)
return out_layer
nn_out = neural_net(X)
#loss and optimizer
with tf.variable_scope('Loss'):
loss = tf.reduce_mean(tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = nn_out,labels = Y)))
with tf.name_scope('Train'):
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
with tf.name_scope('Accuracy'):
correct_prediction = tf.equal(tf.argmax(nn_out,1),tf.argmax(Y,1))
#correct_prediction = tf.metrics.accuracy (labels = Y, predictions =nn_out)
acc = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
# Initialize
init = tf.global_variables_initializer()
# start computing & training
with tf.Session() as sess:
sess.run(init)
for step in range(training_steps):
#set batch
cmt_train_bat,label_train_bat = sess.run(tf.train.shuffle_batch([cDMT_training_list,label_training_list],batch_size = BATCH_SIZE,capacity=50000,min_after_dequeue=10000))
cmt_test_bat,label_test_bat = sess.run(tf.train.shuffle_batch([cDMT_test_list,label_test_list],batch_size = BATCH_SIZE,capacity=50000,min_after_dequeue=10000))
From the Session.run doc:
The optional feed_dict argument allows the caller to override the
value of tensors in the graph. Each key in feed_dict can be one of the
following types:
If the key is a tf.Tensor, the value may be a Python scalar, string,
list, or numpy ndarray that can be converted to the same dtype as that
tensor. Additionally, if the key is a tf.placeholder, the shape of the
value will be checked for compatibility with the placeholder.
...
So you are right: for X and Y (which are placeholders) you can't feed a tensor and tf.train.shuffle_batch is not designed to work with placeholders.
You can follow one of two ways:
get rid of placeholders and use tf.TFRecordReader in combination with tf.train.shuffle_batch, as suggested here. This way you'll have only tensors in your model and you won't need to "feed" anything additionally.
batch and shuffle the data yourself in numpy and feed into placeholders. This takes just several lines of code, so I find it easier, though both paths are valid.
Take also into account performance considerations.
I am using a recurrent neural network in tensorflow with BasicLSTMCells. Basically, I have an input sequence of word ids, I convert each id to word embeddings, pass the word embeddings one at a time through the rnn, and then make a prediction for a single word after reading the whole sequence. My embedding matrix is of dimension V x H where V is the size of my vocabulary and H is the number of hidden units in my rnn. In order to make a prediction for the next word, I multiply my hidden vector by a weight matrix of size H x V and then compute a softmax. With the setup I described, everything seems to work as expected. I'm able to train on some examples and make reasonable predictions.
However, I've noticed that if I try to use the transpose of the embedding matrix, which will be a matrix of size H x V, instead of a separate matrix for the softmax layer, tensorflow raises a value error claiming that the dimensions of something it's not specifying don't have the same rank. I've verified that the dimensions of my embedding matrix (well, its transpose) are the same as those of the separate softmax matrix I'm creating. Changing just the one line of code from using my embedding matrix vs a separate softmax weight matrix causes the error.
I created a relatively small program to demonstrate what I'm trying to do and to show what causes the error. I was not able to make the error occur on a smaller network when I tried with just a single hidden layer network.
import sys
import time
import tensorflow as tf
from tensorflow.models.rnn import rnn
from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn.rnn_cell import BasicLSTMCell
import numpy as np
INPUT_LENGTH = 17
BATCH_SIZE = 20
VOCAB_SIZE = 11
NUM_EPOCHS = 1000
HIDDEN_UNITS = 100
class Model(object):
def __init__(self, is_training):
initializer = tf.random_uniform_initializer(-1.0, 1.0)
self._target = tf.placeholder(tf.float32, [BATCH_SIZE, VOCAB_SIZE])
self._input_data=tf.placeholder(tf.int32,[BATCH_SIZE, INPUT_LENGTH])
self.embedding = tf.get_variable("embedding",
[VOCAB_SIZE, HIDDEN_UNITS],
initializer=initializer)
self.inputs = tf.split(1, INPUT_LENGTH,
tf.nn.embedding_lookup(self.embedding, self._input_data))
self.inputs2 = [tf.squeeze(input_, [1]) for input_ in self.inputs]
cell = rnn_cell.BasicLSTMCell(num_units=HIDDEN_UNITS)
initial_state = cell.zero_state(BATCH_SIZE, tf.float32)
outputs, states = rnn.rnn(cell, self.inputs2,
initial_state=initial_state)
self._outputs = outputs[-1]
self.soft_w = tf.get_variable("softmax_w",
[HIDDEN_UNITS, VOCAB_SIZE],
initializer=initializer)
prod = tf.matmul(self._outputs, self.soft_w)
#uncommenting out the following line causes the error
# prod = tf.matmul(self._outputs, self.embedding, False, True)
soft_b = tf.get_variable("softmax_b", [VOCAB_SIZE],
initializer=initializer)
self._logits = tf.nn.bias_add(prod,soft_b)
self._loss = tf.nn.softmax_cross_entropy_with_logits(self._logits,
self._target)
if not is_training:
return
learning_rate = .010001
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
self._train_op = optimizer.minimize(self._loss)
def train(self, sess, inputs, targets):
t = np.zeros((BATCH_SIZE, VOCAB_SIZE))
for i, target in enumerate(targets):
t[i,target] = 1.0
inputs = np.array(inputs)
inputs = inputs.reshape(BATCH_SIZE,INPUT_LENGTH)
fd = {self._target:t,
self._input_data:inputs}
o = sess.run([self._train_op, self._loss, self._outputs, self.embedding, self.soft_w], feed_dict = fd)
print o[2].shape
print o[3].shape
print o[4].shape
sys.exit()
return np.mean(o[1])
#this just generates dummy data
def read_data_rows(count):
ret = []
for i in range(count):
inputs = [4] * INPUT_LENGTH
output = 1
ret.append((inputs, output))
return ret
def main():
start = time.time()
tf.set_random_seed(1)
print "creating model",time.time()-start
m = Model(is_training=True)
with tf.Session() as sess:
print "initializing variables", time.time()-start
tf.initialize_all_variables().run()
for epoch in range(NUM_EPOCHS):
train_rows = read_data_rows(100)
for row_num in range(0, len(train_rows), BATCH_SIZE):
qs = []
ans = []
batch = train_rows[row_num:row_num+BATCH_SIZE]
for b in batch:
qs.append(b[0])
ans.append(b[1])
m.train(sess, qs, ans)
if __name__ == "__main__":
main()
The error I see is ValueError: Shapes TensorShape([Dimension(100)]) and TensorShape([Dimension(17), Dimension(100)]) must have the same rank
when uncommenting the line I mentioned above. What is the cause of the error I'm seeing? Why is the embedding matrix not treated the same way as the matrix self.soft_w?
The 0.6.0 (and earlier) release of TensorFlow had a bug in the implementation of gradients for tf.nn.embedding_lookup() and tf.gather() when the indices argument (self._input_data in your code) had more than one dimension.
Upgrading to the latest source release should fix this error. Otherwise, this commit has the relevant change (to array_grad.py) that will enable your program to work.