I am trying to implement a Fully Convolutional network (5 layers) in TensorFlow.
But after few time of training, all my logits fall to 0.
Did anyone have the same problem before ?
Here is how I implemented my CONV-ReLU-maxPOOL layer :
def conv_relu_layer (in_data, nb_filters, filter_shape) :
nb_in_channels = int (in_data_reshaped.shape[3])
conv_shape = [filter_shape[0], filter_shape[1],
nb_in_channels, nb_filters]
weights = tf.Variable (
tf.truncated_normal (conv_shape, mean=0., stddev=.05))
bias = tf.Variable (
tf.truncated_normal ([nb_filters], mean=0., stddev=1.))
output = tf.nn.conv2d (in_data_reshaped, weights,
[1,1,1,1], padding="SAME")
output += bias
output = tf.nn.relu (output)
return output
def conv_relu_pool_layer (in_data, nb_filters, filter_shape, pool_shape,
pooling=tf.nn.max_pool) :
conv_out = conv_relu_layer (in_data, nb_filters, filter_shape)
ksize = [1, pool_shape[0], pool_shape[1], 1]
strides = [1, pool_shape[0], pool_shape[1], 1]
return pooling (conv_out, ksize=ksize, strides=strides, padding="SAME")
Here is my network :
def create_network_5C (in_data, name="5C") :
c1 = conv_relu_pool_layer (in_data, 64, [5,5], [2,2])
c2 = conv_relu_pool_layer (c1, 128, [5,5], [2,2])
c3 = conv_relu_pool_layer (c2, 256, [5,5], [2,2])
c4 = conv_relu_pool_layer (c3, 64, [5,5], [2,2])
return conv_relu_layer (c4, 2, [5,5])
The loss function :
def loss (logits, labels, num_classes) :
with tf.name_scope('loss'):
logits = tf.reshape(logits, (-1, num_classes))
epsilon = tf.constant(value=1e-4)
labels = tf.to_float(tf.reshape(labels, (-1, num_classes)))
softmax = tf.nn.softmax(logits) + epsilon
cross_entropy = - tf.reduce_sum (
tf.multiply (labels * tf.log (softmax), head),
reduction_indices=[1])
cross_entropy_mean = tf.reduce_mean (cross_entropy)
tf.add_to_collection('losses', cross_entropy_mean)
loss = tf.add_n(tf.get_collection('losses'))
return loss
My main routine :
batch_size = 5
# Load data
x = tf.placeholder (tf.float32, [None, 416, 416, 3], name="x")
y = tf.placeholder (tf.float32, [None, 416, 416, 1], name="y")
# Contrast normalization and computation
x_gcn = tf.map_fn (lambda img : tf.image.per_image_standardization (img), x)
logits = create_network_5C (x_gcn)
# Having label at the same dimension as the output
y_p = tf.nn.avg_pool (tf.sign (y),
ksize=[1,16,16,1], strides=[1,16,16,1], padding="SAME")
y_rshp = tf.reshape (y_p, [batch_size, 416//16, 416//16])
y_bin = tf.cast (y_rshp > .5, tf.int32)
y_1hot = tf.one_hot (y_bin, 2)
# Compute error
error = loss (logits, y_1hot, 2)
optimizer = tf.train.AdamOptimizer (learning_rate=args.eta).minimize (error)
# Run the session
init_op = tf.global_variables_initializer ()
with tf.Session () as session :
session.run (init_op)
err, _ = session.run ([error, optimizer],
feed_dict={ x: image_batch,
y: label_batch })
I note that, if I reduce my network to 2 layers only, it won't drop the logits to 0, but it won't learn anything either. If I reduce it to 3 layers, it will drop to 0, but after a many iterations (while 5 layers drop to 0 in few batches).
Can this be linked to what is called "gradient vanish" ?
If it's relevant, my spec are : Ubuntu 16.04 - Python 3.6.4 - tensorflow 1.6.0
[EDIT] My problem really look like dead-ReLU, as mentioned here : StackOverflow : FCN training error, but my data is normalized (between something like -2 and +2, and I already tried to change the mean and stddev initial value of my weights and biases
[EDIT 2] I tried to replace the ReLUs with Leaky ReLU, or a softplus, in both cases, logits get stucked under 0.1 and loss stay between 0.6 and 0.7
Using some leaky relu was actually enough, then I just needed to let him train for a hudge amount of time.
Related
I'm trying to create a 2 layer lstm (incl. dropout) but get an error message that 'inputs must be a sequence'.
I use embeddings as the input and not sure how to change these to be a sequence? Any explanations are greatly appreciated.
This is my graph definition:
with tf.name_scope('Placeholders'):
input_x = tf.placeholder(tf.int32, [None, n_steps], name='input_x')
input_y = tf.placeholder(tf.float32, [None, n_classes], name='input_y')
dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
with tf.name_scope('Embedding_layer'):
embeddings_var = tf.Variable(tf.random_uniform([vocab_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True)
embedded_chars = tf.nn.embedding_lookup(embeddings_var, input_x)
print(embedded_chars, 'embed')
def get_a_cell(lstm_size, keep_prob):
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=dropout_keep_prob)
return drop
with tf.name_scope('lstm'):
cell = tf.nn.rnn_cell.MultiRNNCell(
[get_a_cell(num_hidden, dropout_keep_prob) for _ in range(num_layers)]
)
lstm_outputs, state = tf.nn.static_rnn(cell=cell,inputs=embedded_chars, dtype=tf.float32)
with tf.name_scope('Fully_connected'):
W = tf.Variable(tf.truncated_normal([num_hidden, n_classes], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=n_classes))
output = tf.nn.xw_plus_b(lstm_outputs,W,b)
predictions = tf.argmax(output, 1, name='predictions')
with tf.name_scope('Loss'):
# Cross-entropy loss and optimizer initialization
loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=input_y))
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss1, global_step=global_step)
with tf.name_scope('Accuracy'):
# Accuracy metrics
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.nn.softmax(output)), input_y), tf.float32))
with tf.name_scope('num_correct'):
correct_predictions = tf.equal(predictions, tf.argmax(input_y, 1))
num_correct = tf.reduce_sum(tf.cast(correct_predictions, 'float'), name='num_correct')
EDIT:
when changing static_rnn to dynamic_rnn the error message changes to the following, failing on the bias (b) variable:
TypeError: 'int' object is not iterable
After I changed the bias term to this:
b = tf.Variable(tf.random_normal([n_classes]))
and get a new error message:
ValueError: Shape must be rank 2 but is rank 3 for 'Fully_connected/xw_plus_b/MatMul' (op: 'MatMul') with input shapes: [?,27,128], [128,6].
If we assume you use tf.dynamic_rnn (for the case of tf.static_rnn, the first problem is because you don't give the input in the right format, tf.static_rnn except a sequence of tensor such as list of tensors [batch_size x seq_len] and not a single tensor with shape [batch_size x seq_len x dim] whereas tf.dynamic_rnn deals with such tensors as input)
I invite you to read the documentation of tf.nn_dynamic_rnn to see that for your classification problem you might not want to use lstm_outputs but state which basically contain the last output of your RNN, because lstm_output contains all the outputs , whereas here you are interested on only in the last_output (except if you want to do something like attention for classification , here you'll need all the outputs).
To get the last output you'll basically need to do that:
lstm_outputs, state = tf.nn.dynamic_rnn(cell=cell,inputs=embedded_chars, dtype=tf.float32)
last_output = state[-1].h
state[-1] to take the state of the last cell, then h contains the last output and pass last_output to your feed forward network.
Full code
(working, but compute wrong accuracy see comments)
n_classes = 6
n_steps = 27
num_hidden=128
dropout_keep_prob =0.5
vocab_size=10000
EMBEDDING_DIM=300
num_layers = 2
with tf.name_scope('Placeholders'):
input_x = tf.placeholder(tf.int32, [None, n_steps], name='input_x')
input_y = tf.placeholder(tf.float32, [None, n_classes], name='input_y')
dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
with tf.name_scope('Embedding_layer'):
embeddings_var = tf.Variable(tf.random_uniform([vocab_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True)
embedded_chars = tf.nn.embedding_lookup(embeddings_var, input_x)
print(embedded_chars, 'embed')
def get_a_cell(lstm_size, keep_prob):
lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)
drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob=dropout_keep_prob)
return drop
with tf.name_scope('lstm'):
cell = tf.nn.rnn_cell.MultiRNNCell(
[get_a_cell(num_hidden, dropout_keep_prob) for _ in range(num_layers)]
)
lstm_outputs, state = tf.nn.dynamic_rnn(cell=cell,inputs=embedded_chars, dtype=tf.float32)
last_output = state[-1].h
with tf.name_scope('Fully_connected'):
W = tf.Variable(tf.truncated_normal([num_hidden, n_classes], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[n_classes]))
output = tf.nn.xw_plus_b(last_output,W,b)
predictions = tf.argmax(output, 1, name='predictions')
with tf.name_scope('Loss'):
# Cross-entropy loss and optimizer initialization
loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=input_y))
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss1, global_step=global_step)
with tf.name_scope('Accuracy'):
# Accuracy metrics
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.nn.softmax(output)), input_y), tf.float32))
with tf.name_scope('num_correct'):
correct_predictions = tf.equal(predictions, tf.argmax(input_y, 1))
num_correct = tf.reduce_sum(tf.cast(correct_predictions, 'float'), name='num_correct')
I've been trying to get a CNN working for the Omligot dataset (105 x 105 x 1 images) via two tutorials I found: CNN tutorial 1 and CNN tutorial 2, working on the usual MNIST dataset (28 x 28 x 1 images).
I'm still struggling with a shaping conflict in the implementation (after a week with on- & off time for debugging) No one was able to provide any help thus far, and in the mean time I could debug in a way, where I think I can provide better description of the shaping error.
The majority of my code is as follows (just skipping few irrelevant stuff here and there). So my placeholders are defined as follows right:
x = tf.placeholder(tf.float32, shape=(None, 105, 105, 1) ) # placeholder for train data
y = tf.placeholder(tf.float32, shape=(None, 20) ) # placeholder for labels
lr = tf.placeholder(tf.float32,shape=(), name="learnRate") # for varying learning rates during training
label dimension of 20 given that there's 20 different characters per alphabet. So one-hot-encoded vector of length 20.
From here on out my model is defined as follow (where I commented the dimension results from each output):
# weight and bias dimension definitions
self.ConFltSize = 3
self.ConOutSize = 7
self.weights = {
'wc1': tf.Variable(tf.random_normal([self.ConFltSize,self.ConFltSize,1, 32], stddev=0.01, name='W0')),
'wc2': tf.Variable(tf.random_normal([self.ConFltSize,self.ConFltSize,32, 64], stddev=0.01, name='W1')),
'wc3': tf.Variable(tf.random_normal([self.ConFltSize,self.ConFltSize,64, 128], stddev=0.01, name='W2')),
'wd1': tf.Variable(tf.random_normal([self.ConOutSize * self.ConOutSize * 128, 128], stddev=0.01, name='W3')),
'out': tf.Variable(tf.random_normal([128, self.InLabels.shape[1]], stddev=0.01, name='W4')),
}
self.biases = {
'bc1': tf.Variable(tf.random_normal([32], stddev=0.01, name='B0')),
'bc2': tf.Variable(tf.random_normal([64], stddev=0.01, name='B1')),
'bc3': tf.Variable(tf.random_normal([128], stddev=0.01, name='B2')),
'bd1': tf.Variable(tf.random_normal([128], stddev=0.01, name='B3')),
'out': tf.Variable(tf.random_normal([self.InLabels.shape[1]], stddev=0.01, name='B4')),
}
# Model definition + shaping results
# x = provide the input data
# weights = dictionary variables for weights
# biases = dictionary variables for biases
def Architecture(self, x, weights, biases):
conv1 = self.conv(x, weights['wc1'], biases['bc1']) # convolution layer 1
conv1 = self.maxPool(conv1) # max pool layer 1
# out shape -> [None, 53, 53, 32]
conv2 = self.conv(conv1, weights['wc2'], biases['bc2']) # convolution layer 2
conv2 = self.maxPool(conv2) # max pool layer 2
# out shape -> [None, 27, 27, 64]
conv3 = self.conv(conv2, weights['wc3'], biases['bc3']) # convolution layer 3
conv3 = self.maxPool(conv3) # max pool layer 3
# out shape -> [None, 14, 14, 128]
flayer = tf.reshape(conv3, [-1, weights['wd1'].shape[0]]) # flatten the output from convo layer
# for 7 x 7 x 128 this is -> [None, 6272]
flayer = tf.add(tf.matmul(flayer, weights['wd1']), biases['bd1']) # fully connected layer 1
flayer = tf.nn.relu(flayer)
# out shape -> [None, 128]
out = tf.add( tf.matmul(flayer, weights['out']), biases['out'] ) # do last set of output weight * vals + bias
# out shape -> [None, 20]
return out # net input to output layer
But so now, in my main program I feed input data to my model in batches, basically with:
out = self.Architecture(x, self.weights, self.biases) # Implement network architecture, and get output tensor (net input to output layer)
# normalize, softmax and entropy the net input, in comparison with provided labels
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=out, labels=y) ) # cost function
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr).minimize(cost) # gradient descent optimizer
pred = tf.equal(tf.argmax(out, 1), tf.argmax(y , 1)) # output true / false if predicted value matches label
accuracy = tf.reduce_mean(tf.cast(pred, tf.float32)) # percentage value of correct predictions
for i in range(iters):
[BX, _, BY, _] = batch.split(trainX, trainY, Bsize) # random split in batch size
# data shapes: BX -> [160, 105, 105, 1], BY -> [160, 20]
# Code bombs out after feeding with input data
opt = sess.run(optimizer, feed_dict={lr:learnr, x:BX, y:BY } )
The exception what I then get after with the sess.run command is:
'logits and labels must be broadcastable: logits_size=[640,20] labels_size=[160,20]\n\t [[Node: softmax_cross_entropy_with_logits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](Add_1, softmax_cross_entropy_with_logits/Reshape_1)]]'
From this I interpret that the softmax is getting [640, 20] as input while it's expecting [160, 20]... I do not understand how, where the data could be shaped to [640, 20]???
Please show me if I'm missing something, or misinterpreting the error?
Hi I'm new to neural networks and I'm currently working on Tensoflow.
First I did the MNIST tutorial which worked quite well. Now I wanted to deepen the whole by means of an own network for Cifar10 in Google Colab. For this purpose I wrote the following code:
def conv2d(input, size, inputDim, outputCount):
with tf.variable_scope("conv2d"):
## -> This area causes problems <- ##
##########variant1
weight = tf.Variable(tf.truncated_normal([size, size, inputDim, outputCount], stddev=0.1),name="weight")
bias = tf.Variable( tf.constant(0.1, shape=[outputCount]),name="bias")
##########variant2
weight = tf.get_variable("weight", tf.truncated_normal([size, size, inputDim, outputCount], stddev=0.1))
bias = tf.get_variable("bias", tf.constant(0.1, shape=[outputCount]))
##################
conv = tf.nn.relu(tf.nn.conv2d(input, weight, strides=[1, 1, 1, 1], padding='SAME') + bias)
return conv
def maxPool(conv2d):....
def fullyConnect(input, inputSize, outputCount, relu):
with tf.variable_scope("fullyConnect"):
## -> This area causes problems <- ##
##########variant1
weight = tf.Variable( tf.truncated_normal([inputSize, outputCount], stddev=0.1),name="weight")
bias = tf.Variable( tf.constant(0.1, shape=[outputCount]),name="bias")
##########variant2
weight = tf.get_variable("weight", tf.truncated_normal([inputSize, outputCount], stddev=0.1))
bias = tf.get_variable("bias", tf.constant(0.1, shape=[outputCount]))
##################
fullyIn = tf.reshape(input, [-1, inputSize])
fullyCon = fullyIn
if relu:
fullyCon = tf.nn.relu(tf.matmul(fullyIn, weight) + bias)
return fullyCon
#Model Def.
def getVGG16A(grafic,width,height,dim):
with tf.name_scope("VGG16A"):
img = tf.reshape(grafic, [-1,width,height,dim])
with tf.name_scope("Layer1"):
with tf.variable_scope("Layer1"):
with tf.variable_scope("conv1"):
l1_c = conv2d(img,3, dim, 64)
with tf.variable_scope("mp1"):
l1_mp = maxPool(l1_c) #32 > 16
with tf.name_scope("Layer2"):
with tf.variable_scope("Layer2"):
with tf.variable_scope("conv1"):
l2_c = conv2d(l1_mp,3, 64, 128)
with tf.variable_scope("mp1"):
l2_mp = maxPool(l2_c) #16 > 8
with tf.name_scope("Layer6"):
with tf.variable_scope("Layer6"):
with tf.variable_scope("fully1"):
L6_fc1 = fullyConnect(l2_mp, 8*8*128 , 1024, True)
with tf.variable_scope("fully2"):
L6_fc2 = fullyConnect(L6_fc1, 1024, 1024, True)
keep_prob = tf.placeholder(tf.float32)
drop = tf.nn.dropout(L6_fc2, keep_prob)
with tf.variable_scope("fully3"):
L6_fc3 = fullyConnect(drop,1024, 3, False)
return L6_fc3, keep_prob
x = tf.placeholder(tf.float32, [None, 3072]) #input
y_ = tf.placeholder(tf.float32, [None, 3]) #output
# Build the graph for the deep net
y_conv, keep_prob = getVGG16A(x,32,32,3) #create Model
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-3).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for batch in getBatchData(prep_filter_dataBatch1,2): #a self-written method for custom batch return
train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.8})
print('test accuracy %g' % accuracy.eval(feed_dict={
x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))
For the definition of the tensorflow variables I first used variant1 (tf.variable).
This caused an overflow of the graphics memory after repeated execution.
Then I used variant2 (tf.get_variable). If I have understood the documentation correctly, this should use already existing variables if they exist.
But as soon as I do this I get the following error message:
TypeError: Tensor objects are not iterable when eager execution is not enabled. To iterate over this tensor use tf.map_fn.
I've been looking the hole day, but I haven't found an explanation for this.
Now I hope that there is someone here who can explain to me why this is not possible, or where I can find further information. The error message is getting me nowhere. I don't want a solution because I want to and have to understand this, because I want to write my bachelor thesis in the field of CNN.
Why can I use tf.variable but not tf.get_variable which should do the same?
Thanks for the help,
best regards, Pascal :)
I found my mistake.
I forgot the keyword initializer.
the correct line looks like this:
weight = tf.get_variable("weight",initializer=tf.truncated_normal([size, size, inputDim, outputCount], stddev=anpassung))
I am little interested in sequence tagging for NER. I follow the code "https://github.com/monikkinom/ner-lstm/blob/master/model.py" to make my model like below:
X = tf.placeholder(tf.float32, shape=[None, timesteps , num_input])
Y = tf.placeholder("float", [None, timesteps, num_classes])
y_true = tf.reshape(tf.stack(Y), [-1, num_classes])
the input is,
X: (batch_size,max_sent_length,word_embed_dim)
and
Y: (batch_size,max_sent_length,number_of_labels)
Then I pass the value to a Bi-direction LSTM unit:
def BiRNN(x):
x=tf.unstack(tf.transpose(x, perm=[1, 0, 2]))
def rnn_cell():
cell = tf.nn.rnn_cell.LSTMCell(rnn_size, forget_bias=1,state_is_tuple=True)
return cell
fw_cell=rnn_cell()
bw_cell=rnn_cell()
output,_, _ = tf.nn.static_bidirectional_rnn(fw_cell, bw_cell,x, dtype=tf.float32)
weight, bias = weight_and_bias(2 * rnn_size, num_classes)
output = tf.reshape(tf.transpose(tf.stack(output), perm=[1, 0, 2]), [-1, 2 * rnn_size])
return (tf.matmul(output, weight) + bias)
Where, rnn_size = 128
Then I am doing the below calculations:
logits = BiRNN(X)
logits = tf.reshape(tf.stack(logits), [-1, timesteps,num_classes])
prediction = tf.reshape(logits, [-1, num_classes])
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y_true))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(cost)
I took, batch_size = 64 and 30 epochs.
But in my model only one label is detected every time. I am not able to point out the problem in my code. Please help.
Please check the dimensions of the tensors y_true, output(both the places), logits and prediction and check whether it comes as per your expectation.
I'm quite new to tensorflow and I can't quite get how to shape tensors so that I would get the output as a single number. Basically, my recurrent network should guess the next number. Instead, with each prediction it returns me a list with five numbers? I guess either one or more of my tensors are misshaped.
My input data is formatted to be around 2000 lists with 5 features each like this:
[
np.array ([
[1],[2],[3],[4],[5]
])
]
This is the code for the RNN:
cell_units = 400
batch_size = 5
no_of_epochs = 500
data = tf.placeholder (tf.float32, [None, 5, 1])
target = tf.placeholder (tf.float32, [None, 1, 1])
weight = tf.Variable (tf.random_normal ([cell_units, 5, 1]))
bias = tf.Variable (tf.random_normal([1, 1]))
cell = tf.contrib.rnn.BasicRNNCell (num_units = cell_units)
output, states = tf.nn.dynamic_rnn (cell, data, dtype=tf.float32)
output = tf.transpose (output, [1, 0, 2])
activation = tf.matmul (output, weight) + bias
cost = tf.reduce_mean (
(
tf.log (tf.square (activation - target))
)
)
optimizer = tf.train.AdamOptimizer (learning_rate = 0.01).minimize(cost)
with tf.Session () as sess:
sess.run (tf.global_variables_initializer ())
no_of_batches = int (len (train_x) / batch_size)
for i in range(no_of_epochs):
start = 0
for j in range(no_of_batches):
inp = train_x [start:start+batch_size]
out = train_y [start:start+batch_size]
start += batch_size
sess.run (optimizer, {data: inp, target: out})
tf.nn.dynamic_rnn expects inputs of shape [batch_size, max_time, ...]. In your example batch_size is dynamic (i.e., unknown) and max_time is 5 (i.e., number of time steps.). Naturally RNN's output contains 5 entries, one per input step: [None, 5, cell_units].
As #Ishant Mrinal suggested you can select the last output step.
weight = tf.Variable (tf.random_normal ([cell_units, 1]))
bias = tf.Variable (tf.random_normal([1, 1]))
cell = tf.contrib.rnn.BasicRNNCell (num_units = cell_units)
output, states = tf.nn.dynamic_rnn (cell, data, dtype=tf.float32)
# Get the last step (4th index).
output = tf.squeeze(tf.transpose (output, [0, 2, 1])[:,:,4]) # Shape of [batch_size, cell_units].
activation = tf.matmul (output, weight) + bias
activation has shape of [batch_size, 1].