Implementing stack denoising autoencoder with tensorflow - python

I was trying to implement a stack denoising autoencoder in tensorflow. Here is the code I got. It worked with one layer, but when I tried to stack it(by changing the list of parameter n_neuron). It doesn't work anymore. I was trying to debug it for a long time but still couldn't get the answer.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
#Reading MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
#parameters
examples_to_show = 10 #finally display 10 pic
mnist_width =28
n_visible = mnist_width * mnist_width #input layer
n_neuron = [n_visible,500] #n_visible is input layer size, the numbers after are hidden size neuorn unit nunmbers
corruption_level = 0.3
batch_size=128
train_epochs=10
hidden_size=len(n_neuron)-1
Z=[None]*hidden_size #Estimated output
cost=[None]*hidden_size
train_op=[None]*hidden_size #trainning operation
# X as input for each layer
X = tf.placeholder("float", name='X') #demensinonality of input is not defined
# set dictionary for all the parameter in the hidden layer
weights_encoder=dict()
weights_decoder=dict()
biases_encoder=dict()
biases_decoder=dict()
for i in range(hidden_size): #initialize variables for each hidden layer
W_init_max = 4 * np.sqrt(6. / (n_neuron[i] + n_neuron[i+1])) #initialize variables with random values
W_init = tf.random_uniform(shape=[n_neuron[i], n_neuron[i+1]],
minval=-W_init_max,
maxval=W_init_max)
weights_encoder[i]=tf.Variable(W_init)
weights_decoder[i]=tf.transpose(weights_encoder[i]) #decoder weights are tied with encoder size
biases_encoder[i]=tf.Variable(tf.random_normal([n_neuron[i+1]]))
biases_decoder[i]=tf.Variable(tf.random_normal([n_neuron[i]]))
def model(input, W, b, W_prime, b_prime): # One layer model. Output is the estimated output
Y = tf.nn.sigmoid(tf.matmul(input, W) + b) # hidden state
Z = tf.nn.sigmoid(tf.matmul(Y, W_prime) + b_prime) # reconstructed input
return Z
def corruption(input): #corruption of the input
mask=np.random.binomial(1, 1 - corruption_level,input.shape ) #mask with several zeros at certain position
corrupted_input=input*mask
return corrupted_input
def encode(input,W,b,n): #W,b weights_encoder and biases_encoder, X is the input, n indicates how many layer encode(i.e: n=0: input layer. n=1: first hidden layer etc.)
if n==0:
Y = input #input layer no encode needed
else:
for i in range(n): #encode the input layer by layer
Y=tf.nn.sigmoid(tf.add(tf.matmul(input, W[i]), b[i]))
input = Y #output become input for next layer encode
Y = Y.eval() #convert tensor.object to ndarray
return Y
def decode(input,W_prime,b_prime,n):
if n == 0: #when it is zero, no decode needed, original output
Y = input # input layer
else:
for i in range(n):
Y = tf.nn.sigmoid(tf.add(tf.matmul(input, W_prime[n-i-1]), b_prime[n-i-1]))
input = Y
Y = Y.eval() # convert tensor.object to ndarray
return Y
#build the graph
for i in range(hidden_size): #how many layers need to be trained
Z[i]= model(X, weights_encoder[i], biases_encoder[i], weights_decoder[i], biases_decoder[i])
#create cost function
cost[i] = tf.reduce_mean(tf.square(tf.subtract(X,Z[i])))
train_op[i]=tf.train.GradientDescentOptimizer(0.02).minimize(cost[i])
# Launch the graph in a session
with tf.Session() as sess:
# you need to initialize all variables!
tf.global_variables_initializer().run()
for j in range(hidden_size): #j start from 0
encoded_trX = encode(trX, weights_encoder, biases_encoder, j) #Encode the original input to the certain layer
encoded_teX = encode(teX, weights_encoder, biases_encoder, j) #Also encode the test data to the certain layer
for i in range(train_epochs):
for start, end in zip(range(0, len(trX),batch_size), range(batch_size, len(trX)+1, batch_size)): #Give all the batches
input_= encoded_trX[start:end] #take one batch as input to train
sess.run(train_op[j], feed_dict={X: corruption(input_)}) #trainning step, feed the corrupted input
print("Layer:",j,i, sess.run(cost[j], feed_dict={X: encoded_teX})) #calculate the loss after one epoch. Cost should be calculated with uncorrupted data
print("One layer Optimization Finished!")
print("All parameters optimized")
#applying encode and decode over test set
output=tf.constant(decode(encode(teX[:examples_to_show], weights_encoder, biases_encoder, hidden_size), weights_decoder, biases_decoder, hidden_size)) #put the test data into the whole neuron network
final_result=sess.run(output)
# Compare original images with their reconstructions
f,a = plt.subplots(2, 10, figsize=(10, 2))
for i in range(examples_to_show):
a[0][i].imshow(np.reshape(mnist.test.images[i], (28, 28)))
a[1][i].imshow(np.reshape(final_result[i], (28, 28)))
f.show()
plt.draw()
plt.waitforbuttonpress()

Can you try this?
n_neuron = [n_visible,500,400] #n_visible is input layer size, the numbers after are hidden size neuorn unit nunmbers
This works perfectly for me on my computer. If it does not work for you, please let us know what error you get.

Related

Have tf.layers.dense connected to different inputs OR have tf.train.optimizer optimize tensors?

I am new to tensorflow and more advanced machine learning, so I tried to get a better grasp of RNNs by implementing one by hand instead of using tf.contrib.rnn.RNNCell. My first problem was that I needed to unroll the net for backpropogation so I looped through my sequence and I needed to keep consistent weights and biases, so I couldn't reinitialize a dense layer with tf.layers.dense each time, but I also needed to have my layer connected to the current timestep of my sequence and I couldn't find a way to change what a dense layer was connected to. To work around this I tried to implement my own version of tf.layers.dense, and this worked fine until I got the error: NotImplementedError("Trying to update a Tensor " ...) when I tried to optimize my custom dense layers.
My code:
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
import random
# -----------------
# WORD PARAMETERS
# -----------------
target_string = ['Hello ','Hello ','World ','World ', '!']
number_input_words = 1
# --------------------------
# TRAINING HYPERPARAMETERS
# --------------------------
training_steps = 4000
batch_size = 9
learning_rate = 0.01
display_step = 150
hidden_cells = 20
# ----------------------
# PREPARE DATA AS DICT
# ----------------------
# TODO AUTOMATICALLY CREATE DICT
dictionary = {'Hello ': 0, 'World ': 1, '!': 2}
reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
vocab_size = len(dictionary)
# ------------
# LSTM MODEL
# ------------
class LSTM:
def __init__(self, sequence_length, number_input_words, hidden_cells, mem_size_x, mem_size_y, learning_rate):
self.sequence = tf.placeholder(tf.float32, (sequence_length, vocab_size), 'sequence')
self.memory = tf.zeros([mem_size_x, mem_size_y])
# sequence_length = self.sequence.shape[0]
units = [vocab_size, 5,4,2,6, vocab_size]
weights = [tf.random_uniform((units[i-1], units[i])) for i in range(len(units))[1:]]
biases = [tf.random_uniform((1, units[i])) for i in range(len(units))[1:]]
self.total_loss = 0
self.outputs = []
for word in range(sequence_length-1):
sequence_w = tf.reshape(self.sequence[word], [1, vocab_size])
layers = []
for i in range(len(weights)):
if i == 0:
layers.append(tf.matmul(sequence_w, weights[0]) + biases[0])
else:
layers.append(tf.matmul(layers[i-1], weights[i]) + biases[i])
percentages = tf.nn.softmax(logits=layers[-1])
self.outputs.append(percentages)
self.total_loss += tf.losses.absolute_difference(tf.reshape(self.sequence[word+1], (1, vocab_size)), tf.reshape(percentages, (1, vocab_size)))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
self.train_operation = optimizer.minimize(loss=self.total_loss, var_list=weights+biases, global_step=tf.train.get_global_step())
lstm = LSTM(len(target_string), number_input_words, hidden_cells, 10, 5, learning_rate)
# ---------------
# START SESSION
# ---------------
with tf.Session() as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
sequence = []
for i in range(len(target_string)):
x = [0]*vocab_size
x[dictionary[target_string[i]]] = 1
sequence.append(x)
print(sequence)
for x in range(1000):
sess.run(lstm.train_operation, feed_dict={lstm.sequence: sequence})
prediction, loss = sess.run((lstm.outputs, lstm.total_loss), feed_dict= {lstm.sequence: sequence})
print(prediction)
print(loss)
Any answers that tell me either how to either connect tf.layers.dense to different variables each time or tell me how to get around my NotImplementedError would be greatly appreciated. I apologize if this question is lengthy or just badly worded, i'm still new to stackoverflow.
EDIT:
I've updated the LSTM class part of my code to:
(Inside def init)
self.sequence = [tf.placeholder(tf.float32, (batch_size, vocab_size), 'sequence') for _ in range(sequence_length-1)]
self.total_loss = 0
self.outputs = []
rnn_cell = rnn.BasicLSTMCell(hidden_cells)
h = tf.zeros((batch_size, hidden_cells))
for i in range(sequence_length-1):
current_sequence = self.sequence[i]
h = rnn_cell(current_sequence, h)
self.outputs.append(h)
But I still get an error on the line: h = rnn_cell(current_sequence, h) about not being able to iterate over tensors. I'm not trying to iterate over any tensors, and if I am I don't mean to.
So there's a standard way of approaching this issue (this is the best approach I know from my knowledge) Instead of trying to create a new list of dense layers. Do the following. Before that lets assume your hidden layer size is h_dim and number of steps to unroll is num_unroll and batch size batch_size
In a for loop, you calculate the output of the RNNCell for each unrolled input
h = tf.zeros(...)
outputs= []
for ui in range(num_unroll):
out, state = rnn_cell(x[ui],state)
outputs.append(out)
Now concat all the outputs to a single tensor of size, [batch_size*num_unroll, h_dim]
Send this through a single dense layer of size [h_dim, num_classes]
logits = tf.matmul(tf.concat(outputs,...), w) + b
predictions = tf.nn.softmax(logits)
You have the logits for all the unrolled inputs now. Now it's just a matter of reshaping the tensor to a [batch_size, num_unroll, num_classes] tensor.
Edited (Feeding in Data): The data will be presented in the form of a list of num_unroll many placeholders. So,
x = [tf.placeholder(shape=[batch_size,3]...) for ui in range(num_unroll)]
Now say you have data like below,
Hello world bye
Bye hello world
Here batch size is 2, sequence length is 3. Once converted to one hot encoding, you're data looks like below (shape [time_steps, batch_size, 3].
data = [ [ [1,0,0], [0,0,1] ], [ [0,1,0], [1,0,0] ], [ [0,0,1], [0,1,0] ] ]
Now feed data in, in the following format.
feed_dict = {}
for ui in range(3):
feed_dict[x[ui]] = data[ui]

Tensorflow - replace MNIST on other dataset

I have a problem with using diffrent dataset then default from tensorflow.
I have code using MNIST dataset to recognize digits. In this application there is generated graph, which is imported later by android app.
Now I would like to recognize digits and math's operators (basic one: +, -, *, /).
I found script to generate data I need. I have two .pickle files.
But even with the dataset which suits for me, still I don't know how to import this dataset to my app with tensorflow.
I would be grateful for help with this or maybe to give me other (maybe easier) solution.
EDIT
I did some changes in the code which were adviced by gabriele.
Now I have error:
(x, label) = train_pickle_reader('train.pickle')
ValueError: too many values to unpack (expected 2)
I found the description of the dataset I used:
Extracts trace groups from inkml files.
Converts extracted trace groups into images. Images are square shaped bitmaps with only black (value 0) and white (value 1) pixels. Black color denotes patterns (ROI).
Labels those images (according to inkml files).
Flattens images to one-dimensional vectors.
Converts labels to one-hot format.
Dumps training and testing sets separately into outputs folder.
Below there is code in python:
import tensorflow as tf
import pickle
def train_pickle_reader(filename):
with open(filename, 'rb') as f:
x = pickle.load(f)
# assuming x is already of the form (all_train_input, all_train_labels):
return x
def test_pickle_reader(filename):
with open(filename, 'rb') as f:
x = pickle.load(f)
# assuming x is already of the form (all_train_input, all_train_labels):
return x
# Function to create a weight neuron using a random number. Training will assign a real weight later
def weight_variable(shape, name):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial, name=name)
# Function to create a bias neuron. Bias of 0.1 will help to prevent any 1 neuron from being chosen too often
def biases_variable(shape, name):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial, name=name)
# Function to create a convolutional neuron. Convolutes input from 4d to 2d. This helps streamline inputs
def conv_2d(x, W, name):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME', name=name)
# Function to create a neuron to represent the max input. Helps to make the best prediction for what comes next
def max_pool(x, name):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
# A way to input images (as 784 element arrays of pixel values 0 - 1)
x_input = tf.placeholder(dtype=tf.float32, shape=[None, 784], name='x_input')
# A way to input labels to show model what the correct answer is during training
y_input = tf.placeholder(dtype=tf.float32, shape=[None, 10], name='y_input')
# First convolutional layer - reshape/resize images
# A weight variable that examines batches of 5x5 pixels, returns 32 features (1 feature per bit value in 32 bit float)
W_conv1 = weight_variable([5, 5, 1, 32], 'W_conv1')
# Bias variable to add to each of the 32 features
b_conv1 = biases_variable([32], 'b_conv1')
# Reshape each input image into a 28 x 28 x 1 pixel matrix
x_image = tf.reshape(x_input, [-1, 28, 28, 1], name='x_image')
# Flattens filter (W_conv1) to [5 * 5 * 1, 32], multiplies by [None, 28, 28, 1] to associate each 5x5 batch with the
# 32 features, and adds biases
h_conv1 = tf.nn.relu(conv_2d(x_image, W_conv1, name='conv1') + b_conv1, name='h_conv1')
# Takes windows of size 2x2 and computes a reduction on the output of h_conv1 (computes max, used for better prediction)
# Images are reduced to size 14 x 14 for analysis
h_pool1 = max_pool(h_conv1, name='h_pool1')
# Second convolutional layer, reshape/resize images
# Does mostly the same as above but converts each 32 unit output tensor from layer 1 to a 64 feature tensor
W_conv2 = weight_variable([5, 5, 32, 64], 'W_conv2')
b_conv2 = biases_variable([64], 'b_conv2')
h_conv2 = tf.nn.relu(conv_2d(h_pool1, W_conv2, name='conv2') + b_conv2, name='h_conv2')
# Images at this point are reduced to size 7 x 7 for analysis
h_pool2 = max_pool(h_conv2, name='h_pool2')
# First dense layer, performing calculation based on previous layer output
# Each image is 7 x 7 at the end of the previous section and outputs 64 features, we want 32 x 32 neurons = 1024
W_dense1 = weight_variable([7 * 7 * 64, 1024], name='W_dense1')
# bias variable added to each output feature
b_dense1 = biases_variable([1024], name='b_dense1')
# Flatten each of the images into size [None, 7 x 7 x 64]
h_pool_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64], name='h_pool_flat')
# Multiply weights by the outputs of the flatten neuron and add biases
h_dense1 = tf.nn.relu(tf.matmul(h_pool_flat, W_dense1, name='matmul_dense1') + b_dense1, name='h_dense1')
# Dropout layer prevents overfitting or recognizing patterns where none exist
# Depending on what value we enter into keep_prob, it will apply or not apply dropout layer
keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob')
# Dropout layer will be applied during training but not testing or predicting
h_drop1 = tf.nn.dropout(h_dense1, keep_prob, name='h_drop1')
# Readout layer used to format output
# Weight variable takes inputs from each of the 1024 neurons from before and outputs an array of 10 elements
W_readout1 = weight_variable([1024, 10], name='W_readout1')
# Apply bias to each of the 10 outputs
b_readout1 = biases_variable([10], name='b_readout1')
# Perform final calculation by multiplying each of the neurons from dropout layer by weights and adding biases
y_readout1 = tf.add(tf.matmul(h_drop1, W_readout1, name='matmul_readout1'), b_readout1, name='y_readout1')
# Softmax cross entropy loss function compares expected answers (labels) vs actual answers (logits)
cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_input, logits=y_readout1))
# Adam optimizer aims to minimize loss
train_step = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy_loss)
# Compare actual vs expected outputs to see if highest number is at the same index, true if they match and false if not
correct_prediction = tf.equal(tf.argmax(y_input, 1), tf.argmax(y_readout1, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Used to save the graph and weights
saver = tf.train.Saver()
# Run in with statement so session only exists within it
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Save the graph shape and node names to pbtxt file
tf.train.write_graph(sess.graph_def, '.', 'advanced_mnist.pbtxt', False)
(x, label) = train_pickle_reader('train.pickle')
batch_size = 64 # the batch size you want to use
num_batches = len(x)//batch_size
# Train the model, running through data 20000 times in batches of 50
# Print out step # and accuracy every 100 steps and final accuracy at the end of training
# Train by running train_step and apply dropout by setting keep_prob to 0.5
for i in range(20000):
for j in range(num_batches):
x_batch = x[j * batch_size: (j + 1) * batch_size]
label_batch = label[j * batch_size: (j + 1)*batch_size]
train_step.run(feed_dict={x_input: x_batch, y_input: label_batch, keep_prob: 0.5})
# Save the session with graph shape and node weights
saver.save(sess, 'advanced_mnist.ckpt')
# Make a prediction
(x, labels) = test_pickle_reader('test.pickle')
print(sess.run(y_readout1, feed_dict={x_input: x, keep_prob: 1.0}))
In your code, after instantiating a tf.Session(), the line batch = mnist_data.train.next_batch(50) calls a built in function which returns a tuple of the kind (input, label). In order to feed the network with your data, here you need to define some function returning i.e. a numpy array having the input data and the associated label. For example, assuming you have a pickle file containing your training data, your code should look something like:
def pikle_reader(filename):
with open(filename, 'r') as f:
x = pickle.load(f)
# assuming x is already of the form (all_train_input, all_train_labels):
return x
[...]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
[...]
# get your data:
(x, label) = pikle_reader(filename)
batch_size = 64 # the batch size you want to use
num_batches = len(x)//batch_size
for i in range(20000): # number of epochs
for j in range(num_batches):
x_batch = x[j*batch_size: (j+1)*batch_size]
label_batch = label[j* batch_size: (j+1)batch_size]
train_step.run(feed_dict={x_input: x_batch, y_input: label_batch, keep_prob: 0.5})
Here, feed_dict feeds the placeholders x_input with the values in x_batch and the placeholder y_input with label_batch. Then in the session the code will run the train_step operation.
Instead, when you want to make a prediction the code is basically the same:
(x, label) = pikle_reader(test_data_filename)
print(sess.run(y_readout1, feed_dict={x_input: x, keep_prob: 1.0}))

Why everything is disconnected in my Tensorboard graph?

I have implemented a CNN for detecting human activity using accelrometer data, my model is working really fine but when i visualize my grapgh on tensorboard, everythin seems to be diconnected. Right now i am not using Namescopes but even without it grpagh should make some sense right?
EDIT After implementing answer given by #user1735003 , this is the output. What i still don't understand is why i'm getting all the nodes at left
What i have implemented is: i have two convolution layer and two max-pooling layers and on top of that i have two hidden layers with 1024 and 512 neurons.
so Here is my code:
#Weights
def init_weights(shape):
init_random_dist = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(init_random_dist)
#Bias
def init_bias(shape):
init_bias = tf.constant(0.1,shape=shape)
return tf.Variable(init_bias)
def conv1d(x,weights):
#x is input accelration data and W is corresponding weight
return tf.nn.conv1d(value=x,filters = weights,stride=1,padding='VALID')
def convolution_layer(input_x,shape):
w1 = init_weights(shape)
b = init_bias([shape[2]])
return tf.nn.relu(conv1d(input_x,weights=w1)+b)
def normal_full_layer(input_layer,size):
input_size = int(input_layer.get_shape()[1])
W = init_weights([input_size, size])
b = init_bias([size])
return tf.matmul(input_layer, W) +b
x = tf.placeholder(tf.float32,shape=[None ,window_size,3]) #input tensor with 3 input channels
y = tf.placeholder(tf.float32,shape=[None,6]) #Labels
con_layer_1 = convolution_layer(x,shape=[4,3,32])#filter of shape [filter_width, in_channels, out_channels]
max_pool_1=tf.layers.max_pooling1d(inputs=con_layer_1,pool_size=2,strides=2,padding='Valid')
con_layer_2 = convolution_layer(max_pool_1,shape=[4,32,64])
max_pool_2 = tf.layers.max_pooling1d(inputs=con_layer_2,pool_size=2,strides=2,padding='Valid')
flat = tf.reshape(max_pool_2,[-1,max_pool_2.get_shape()[1]*max_pool_2.get_shape()[2]])
fully_conected = tf.nn.relu(normal_full_layer(flat,1024))
second_hidden_layer = tf.nn.relu(normal_full_layer(fully_conected,512))
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(second_hidden_layer,keep_prob=hold_prob)
y_pred = normal_full_layer(full_one_dropout,6)
pred_softmax = tf.nn.softmax(y_pred)
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_pred))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train = optimizer.minimize(cross_entropy)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
filename="./summary_log11/run"
summary_writer = tf.summary.FileWriter(filename, graph_def=sess.graph_def)
for i in range(5000):
batch_x,batch_y = next_batch(100,X_train,y_train)
sess.run(train, feed_dict={x: batch_x, y: batch_y, hold_prob: 0.5})
# PRINT OUT A MESSAGE EVERY 100 STEPS
if i%100 == 0:
print('Currently on step {}'.format(i))
print('Accuracy is:')
# Test the Train Model
matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y,1))
acc = tf.reduce_mean(tf.cast(matches,tf.float32))
print(sess.run(acc,feed_dict={x:X_test,y:y_test,hold_prob:1.0}))
print('\n')
Try organizing your nodes into scopes. That will help Tensorboard to figure out your graph hierarchy. For example,
with tf.variable_scope('input'):
x = tf.placeholder(tf.float32,shape=[None ,window_size,3]) #input tensor with 3 input channels
y = tf.placeholder(tf.float32,shape=[None,6]) #Labels
with tf.variable_scope('net'):
con_layer_1 = convolution_layer(x,shape=[4,3,32])#filter of shape [filter_width, in_channels, out_channels]
max_pool_1=tf.layers.max_pooling1d(inputs=con_layer_1,pool_size=2,strides=2,padding='Valid')
con_layer_2 = convolution_layer(max_pool_1,shape=[4,32,64])
max_pool_2 = tf.layers.max_pooling1d(inputs=con_layer_2,pool_size=2,strides=2,padding='Valid')
flat = tf.reshape(max_pool_2,[-1,max_pool_2.get_shape()[1]*max_pool_2.get_shape()[2]])
fully_conected = tf.nn.relu(normal_full_layer(flat,1024))
second_hidden_layer = tf.nn.relu(normal_full_layer(fully_conected,512))
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(second_hidden_layer,keep_prob=hold_prob)
y_pred = normal_full_layer(full_one_dropout,6)
pred_softmax = tf.nn.softmax(y_pred)
with tf.variable_scope('loss'):
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=y_pred))
with tf.variable_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train = optimizer.minimize(cross_entropy)
Since you didn't explicitly name your tf operations it was done automatically by tensorflow, e.g. ReLu operators were named ReLu_1, ReLu_2, ... . According to tensorboard documentation:
One last structural simplification is series collapsing. Sequential motifs--that is, nodes whose names differ by a number at the end and have isomorphic structures--are collapsed into a single stack of nodes, as shown below. For networks with long sequences, this greatly simplifies the view.
As you can see at the right side of your graph, all add_[0-7], MatMul_[0-5] and Relu_[0-5] nodes were grouped together because they have similar names, this doesn't mean that nodes are disconnected in your graph, it's just the tensorboard's node grouping policy.
If you want to avoid this then give your operations the names that are more different than just by a number at the end. Or use tf.name_scope() as you mentioned, e.g.:
with tf.name_scope("conv1"):
con_layer_1 = convolution_layer(x,shape=[4,3,32])
max_pool_1=tf.layers.max_pooling1d(inputs=con_layer_1,pool_size=2,strides=2,padding='Valid')
with tf.name_scope("conv2"):
con_layer_2 = convolution_layer(max_pool_1,shape=[4,32,64])
max_pool_2 = tf.layers.max_pooling1d(inputs=con_layer_2,pool_size=2,strides=2,padding='Valid')
# etc.

tensorflow shuffle_batch and feed_dict error

This is the main part of my code.
I'm confused on function shuffle_batch and feed_dict.
In my code below, the features and labels I put into the function are "list".(I also tried "array" before.But it seems doesn't matter.)
What I want to do is make my testing data(6144,26) and training data(1024,13) into batch:(100,26) and (100,13),then set them as the feed_dict for the placeholders.
My questions are:
1.The outputs of the function tf.train.batch_shuffle are Tensors.But I can not put tensors in the feed_dict,right?
2.When I compiled the last two rows,error says,got shape [6144, 26], but wanted [6144] .I know it may be a dimension error,but how can I fix it.
Thanks a lot.
import tensorflow as tf
import scipy.io as sio
#import signal matfile
#[('label', (8192, 13), 'double'), ('clipped_DMT', (8192, 26), 'double')]
file = sio.loadmat('DMTsignal.mat')
#get array(clipped_DMT)
data_cDMT = file['clipped_DMT']
#get array(label)
data_label = file['label']
with tf.variable_scope('split_cDMT'):
cDMT_test_list = []
cDMT_training_list = []
for i in range(0,8192):
if i % 4 == 0:
cDMT_test_list.append(data_cDMT[i])
else:
cDMT_training_list.append(data_cDMT[i])
with tf.variable_scope('split_label'):
label_test_list = []
label_training_list = []
for i in range(0,8192):
if i % 4 == 0:
label_test_list.append(data_label[i])
else:
label_training_list.append(data_label[i])
#set parameters
n_features = cDMT_training.shape[1]
n_labels = label_training.shape[1]
learning_rate = 0.8
hidden_1 = 256
hidden_2 = 128
training_steps = 1000
BATCH_SIZE = 100
#set Graph input
with tf.variable_scope('cDMT_Inputs'):
X = tf.placeholder(tf.float32,[None, n_features],name = 'Input_Data')
with tf.variable_scope('labels_Inputs'):
Y = tf.placeholder(tf.float32,[None, n_labels],name = 'Label_Data')
#set variables
#Initialize both W and b as tensors full of zeros
with tf.variable_scope('layerWeights'):
h1 = tf.Variable(tf.random_normal([n_features,hidden_1]))
h2 = tf.Variable(tf.random_normal([hidden_1,hidden_2]))
w_out = tf.Variable(tf.random_normal([hidden_2,n_labels]))
with tf.variable_scope('layerBias'):
b1 = tf.Variable(tf.random_normal([hidden_1]))
b2 = tf.Variable(tf.random_normal([hidden_2]))
b_out = tf.Variable(tf.random_normal([n_labels]))
#create model
def neural_net(x):
layer_1 = tf.add(tf.matmul(x,h1),b1)
layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1,h2),b2))
out_layer = tf.add(tf.matmul(layer_2,w_out),b_out)
return out_layer
nn_out = neural_net(X)
#loss and optimizer
with tf.variable_scope('Loss'):
loss = tf.reduce_mean(tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = nn_out,labels = Y)))
with tf.name_scope('Train'):
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
with tf.name_scope('Accuracy'):
correct_prediction = tf.equal(tf.argmax(nn_out,1),tf.argmax(Y,1))
#correct_prediction = tf.metrics.accuracy (labels = Y, predictions =nn_out)
acc = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
# Initialize
init = tf.global_variables_initializer()
# start computing & training
with tf.Session() as sess:
sess.run(init)
for step in range(training_steps):
#set batch
cmt_train_bat,label_train_bat = sess.run(tf.train.shuffle_batch([cDMT_training_list,label_training_list],batch_size = BATCH_SIZE,capacity=50000,min_after_dequeue=10000))
cmt_test_bat,label_test_bat = sess.run(tf.train.shuffle_batch([cDMT_test_list,label_test_list],batch_size = BATCH_SIZE,capacity=50000,min_after_dequeue=10000))
From the Session.run doc:
The optional feed_dict argument allows the caller to override the
value of tensors in the graph. Each key in feed_dict can be one of the
following types:
If the key is a tf.Tensor, the value may be a Python scalar, string,
list, or numpy ndarray that can be converted to the same dtype as that
tensor. Additionally, if the key is a tf.placeholder, the shape of the
value will be checked for compatibility with the placeholder.
...
So you are right: for X and Y (which are placeholders) you can't feed a tensor and tf.train.shuffle_batch is not designed to work with placeholders.
You can follow one of two ways:
get rid of placeholders and use tf.TFRecordReader in combination with tf.train.shuffle_batch, as suggested here. This way you'll have only tensors in your model and you won't need to "feed" anything additionally.
batch and shuffle the data yourself in numpy and feed into placeholders. This takes just several lines of code, so I find it easier, though both paths are valid.
Take also into account performance considerations.

Tensorflow backprop through rnn ValueError

I am using a recurrent neural network in tensorflow with BasicLSTMCells. Basically, I have an input sequence of word ids, I convert each id to word embeddings, pass the word embeddings one at a time through the rnn, and then make a prediction for a single word after reading the whole sequence. My embedding matrix is of dimension V x H where V is the size of my vocabulary and H is the number of hidden units in my rnn. In order to make a prediction for the next word, I multiply my hidden vector by a weight matrix of size H x V and then compute a softmax. With the setup I described, everything seems to work as expected. I'm able to train on some examples and make reasonable predictions.
However, I've noticed that if I try to use the transpose of the embedding matrix, which will be a matrix of size H x V, instead of a separate matrix for the softmax layer, tensorflow raises a value error claiming that the dimensions of something it's not specifying don't have the same rank. I've verified that the dimensions of my embedding matrix (well, its transpose) are the same as those of the separate softmax matrix I'm creating. Changing just the one line of code from using my embedding matrix vs a separate softmax weight matrix causes the error.
I created a relatively small program to demonstrate what I'm trying to do and to show what causes the error. I was not able to make the error occur on a smaller network when I tried with just a single hidden layer network.
import sys
import time
import tensorflow as tf
from tensorflow.models.rnn import rnn
from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn.rnn_cell import BasicLSTMCell
import numpy as np
INPUT_LENGTH = 17
BATCH_SIZE = 20
VOCAB_SIZE = 11
NUM_EPOCHS = 1000
HIDDEN_UNITS = 100
class Model(object):
def __init__(self, is_training):
initializer = tf.random_uniform_initializer(-1.0, 1.0)
self._target = tf.placeholder(tf.float32, [BATCH_SIZE, VOCAB_SIZE])
self._input_data=tf.placeholder(tf.int32,[BATCH_SIZE, INPUT_LENGTH])
self.embedding = tf.get_variable("embedding",
[VOCAB_SIZE, HIDDEN_UNITS],
initializer=initializer)
self.inputs = tf.split(1, INPUT_LENGTH,
tf.nn.embedding_lookup(self.embedding, self._input_data))
self.inputs2 = [tf.squeeze(input_, [1]) for input_ in self.inputs]
cell = rnn_cell.BasicLSTMCell(num_units=HIDDEN_UNITS)
initial_state = cell.zero_state(BATCH_SIZE, tf.float32)
outputs, states = rnn.rnn(cell, self.inputs2,
initial_state=initial_state)
self._outputs = outputs[-1]
self.soft_w = tf.get_variable("softmax_w",
[HIDDEN_UNITS, VOCAB_SIZE],
initializer=initializer)
prod = tf.matmul(self._outputs, self.soft_w)
#uncommenting out the following line causes the error
# prod = tf.matmul(self._outputs, self.embedding, False, True)
soft_b = tf.get_variable("softmax_b", [VOCAB_SIZE],
initializer=initializer)
self._logits = tf.nn.bias_add(prod,soft_b)
self._loss = tf.nn.softmax_cross_entropy_with_logits(self._logits,
self._target)
if not is_training:
return
learning_rate = .010001
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
self._train_op = optimizer.minimize(self._loss)
def train(self, sess, inputs, targets):
t = np.zeros((BATCH_SIZE, VOCAB_SIZE))
for i, target in enumerate(targets):
t[i,target] = 1.0
inputs = np.array(inputs)
inputs = inputs.reshape(BATCH_SIZE,INPUT_LENGTH)
fd = {self._target:t,
self._input_data:inputs}
o = sess.run([self._train_op, self._loss, self._outputs, self.embedding, self.soft_w], feed_dict = fd)
print o[2].shape
print o[3].shape
print o[4].shape
sys.exit()
return np.mean(o[1])
#this just generates dummy data
def read_data_rows(count):
ret = []
for i in range(count):
inputs = [4] * INPUT_LENGTH
output = 1
ret.append((inputs, output))
return ret
def main():
start = time.time()
tf.set_random_seed(1)
print "creating model",time.time()-start
m = Model(is_training=True)
with tf.Session() as sess:
print "initializing variables", time.time()-start
tf.initialize_all_variables().run()
for epoch in range(NUM_EPOCHS):
train_rows = read_data_rows(100)
for row_num in range(0, len(train_rows), BATCH_SIZE):
qs = []
ans = []
batch = train_rows[row_num:row_num+BATCH_SIZE]
for b in batch:
qs.append(b[0])
ans.append(b[1])
m.train(sess, qs, ans)
if __name__ == "__main__":
main()
The error I see is ValueError: Shapes TensorShape([Dimension(100)]) and TensorShape([Dimension(17), Dimension(100)]) must have the same rank
when uncommenting the line I mentioned above. What is the cause of the error I'm seeing? Why is the embedding matrix not treated the same way as the matrix self.soft_w?
The 0.6.0 (and earlier) release of TensorFlow had a bug in the implementation of gradients for tf.nn.embedding_lookup() and tf.gather() when the indices argument (self._input_data in your code) had more than one dimension.
Upgrading to the latest source release should fix this error. Otherwise, this commit has the relevant change (to array_grad.py) that will enable your program to work.

Categories

Resources