I am using two tutorials to figure out how to take a CVS file of format:
feature1,feature2....feature20,label
feature1,feature2....feature20,label
...
and train a neural network on it. What I do in the code below is read in the CVS file and group 100 lines at a time into batches: x_batch and y_batch. Next, i try to have the NN learn in batches. However, I get the following error:
"ValueError: Cannot feed value of shape (99,) for Tensor 'Placeholder_1:0', which has shape '(?, 4)'"
I am wondering what i am doing wrong and what another approach might be.
import tensorflow as tf
filename_queue = tf.train.string_input_producer(["VOL_TRAIN.csv"])
line_reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = line_reader.read(filename_queue)
# Type information and column names based on the decoded CSV.
[[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[""]]
record_defaults = [[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0]]
in1,in2,in3,in4,in5,in6,in7,in8,in9,in10,in11,in12,in13,in14,in15,in16,in17,in18,in19,in20,out = \
tf.decode_csv(csv_row, record_defaults=record_defaults)
# Turn the features back into a tensor.
features = tf.pack([in1,in2,in3,in4,in5,in6,in7,in8,in9,in10,in11,in12,in13,in14,in15,in16,in17,in18,in19,in20])
# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1
num_examples= 33500
# Network Parameters
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_input = 20 # MNIST data input (img shape: 28*28)
n_classes = 4 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
#tf.initialize_all_variables().run()
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(num_examples/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_x = []
batch_y = []
for iteration in range(1, batch_size):
example, label = sess.run([features, out])
batch_x.append(example)
batch_y.append(label)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("Optimization Finished!")
coord.request_stop()
coord.join(threads)
Your placeholder y specifies you input an array of unknown length, with arrays of length "n_classes" (which is 4). In your feed_dict you give the array batch_y, which is an array of length 99 (your batch_size) with numbers.
What you want to do is change your batch_y variable to have one-hot vectors as input. Please let me know if this works!
Related
I'm trying to approximate noisy data from the sin(2x) function using a multilayer perceptron:
# Get data
datasets = gen_datasets()
# Add noise
datasets["ysin_train"] = add_noise(datasets["ysin_train"])
datasets["ysin_test"] = add_noise(datasets["ysin_test"])
# Extract wanted data
patterns_train = datasets["x_train"]
targets_train = datasets["ysin_train"]
patterns_test = datasets["x_test"]
targets_test = datasets["ysin_test"]
# Reshape to fit model
patterns_train = patterns_train.reshape(62, 1)
targets_train = targets_train.reshape(62, 1)
patterns_test = patterns_test.reshape(62, 1)
targets_test = targets_test.reshape(62, 1)
# Parameters
learning_rate = 0.001
training_epochs = 10000
batch_size = patterns_train.shape[0]
display_step = 1
# Network Parameters
n_hidden_1 = 2
n_hidden_2 = 2
n_input = 1
n_classes = 1
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Create model
def multilayer_perceptron(x):
# Hidden fully connected layer with 2 neurons
layer_1 = tf.sigmoid(tf.add(tf.matmul(x, weights['h1']), biases['b1']))
# Hidden fully connected layer with 2 neurons
layer_2 = tf.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
# Output fully connected layer
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Construct model
logits = multilayer_perceptron(X)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.losses.absolute_difference(labels = Y, predictions = logits, reduction=tf.losses.Reduction.NONE))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# Training Cycle
for epoch in range(training_epochs):
_ = sess.run(train_op, feed_dict={X: patterns_train,
Y: targets_train})
c = sess.run(loss_op, feed_dict={X: patterns_test,
Y: targets_test})
if epoch % display_step == 0:
print("Epoch: {0: 4} cost={1:9}".format(epoch+1, c))
print("Optimization finished!")
outputs = sess.run(logits, feed_dict={X: patterns_test})
print("outputs: {0}".format(outputs.T))
plt.plot(patterns_test, outputs, "r.", label="outputs")
plt.plot(patterns_test, targets_test, "b.", label="targets")
plt.legend()
plt.show()
When I plot this at the end, I get a straight line, as if I have a linear network. Take a look at the plot:
This is a correct minimization of the error for a linear network. But I shouldn't have a linear betwork because I'm using the sigmoid function in my multilayer_perceptron() function! Why is my network behaving like this?
The default value of stddev=1.0 in tf.random_normal, which you use for weight & bias initialization, is huge. Try an explicit value of stddev=0.01 for the weights; as for the biases, common practice is to initialize them to zero.
As an initial approach, I would also try a higher learning_rate of 0.01 (or maybe not - see answer in a related question here)
I wrote a convolutional neural network in tensorflow to perform on the mnist dataset. Everything works just fine, but i want to save the model with the tf.train.Saver(). How am i gonna do it?
Here is my code:
from __future__ import print_function
import tensorflow as tf
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
# Parameters
learning_rate = 0.001
training_iters = 200000
batch_size = 128
display_step = 10
# Network Parameters
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
dropout = 0.75 # Dropout, probability to keep units
# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
# Create model
def conv_net(x, weights, biases, dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, 28, 28, 1])
# Convolution Layer
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
# Max Pooling (down-sampling)
conv1 = maxpool2d(conv1, k=2)
# Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
# Max Pooling (down-sampling)
conv2 = maxpool2d(conv2, k=2)
# Fully connected layer
# Reshape conv2 output to fit fully connected layer input
fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
# Apply Dropout
fc1 = tf.nn.dropout(fc1, dropout)
# Output, class prediction
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
# Store layers weight & bias
weights = {
# 5x5 conv, 1 input, 32 outputs
'wc1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
# 5x5 conv, 32 inputs, 64 outputs
'wc2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
# fully connected, 7*7*64 inputs, 1024 outputs
'wd1': tf.Variable(tf.random_normal([7*7*64, 1024])),
# 1024 inputs, 10 outputs (class prediction)
'out': tf.Variable(tf.random_normal([1024, n_classes]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([32])),
'bc2': tf.Variable(tf.random_normal([64])),
'bd1': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = conv_net(x, weights, biases, keep_prob)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,
labels=y))
optimizer =
tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
keep_prob: dropout})
if step % display_step == 0:
# Calculate batch loss and accuracy
loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x,
y: batch_y,
keep_prob: 1.})
print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
step += 1
print("Optimization Finished!")
# Calculate accuracy for 256 mnist test images
print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: mnist.test.images[:256],
y: mnist.test.labels[:256],
keep_prob: 1.}))
Simplest way to save and restore:
To save:
saver = tf.train.Saver(max_to_keep=1)
with tf.Session() as sess:
# train your model, then:
savePath = saver.save(sess, 'someDir/my_model.ckpt')
To restore:
with tf.Session() as sess:
saver = tf.train.import_meta_graph('someDir/my_model.ckpt.meta')
saver.restore(sess, pathModel + 'someDir/my_model.ckpt')
# access a variable from the saved Graph, and so on:
someVar = sess.run('varName:0')
This should do it
Consider saving results of Neural Network training to database.
The root of the idea is described here:
A neural network scoring engine in PL/SQL for recognizing handwritten digits
https://db-blog.web.cern.ch/blog/luca-canali/2016-07-neural-network-scoring-engine-plsql-recognizing-handwritten-digits
The code repository for this example is located here:
https://github.com/LucaCanali/Miscellaneous/tree/master/PLSQL_Neural_Network
By doing so it's possible to train the network once and then use database procedures to use it.
I have a dataframe of shape (38307, 26) with timestamp as index:
I'm trying to implement a LSTM classifier but I'm struggling to feed it into the DataFlow
The final arrays I'm trying to feed are of shape '(X_train = (38307, 25), y_train = (38307, 2))'
I have added the code in case
# Parametres
learning_rate = 0.001
training_epochs = 100
batch_size = 128
display_step = 10
# Network Parameters
n_input = 25 # features= 25
n_steps = 28 # timesteps
n_hidden = 128 # hidden layer num of features
n_classes = 2 # Binary classification
# TF Graph input
x = tf.placeholder("float32", [None, n_steps, n_input])
y = tf.placeholder("float32", [None, n_classes])
# TF Weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
pred = RNN(x, weights, biases)
# Initialize the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
for epoch in range(training_epochs):
avg_cost = 0
total_batch = int(len(X_train)/batch_size)
X_batches = np.array_split(X_train, total_batch)
Y_batches = np.array_split(y_train, total_batch)
#Loop over all batches
for i in range(total_batch):
batch_x, batch_y = X_batches[i], Y_batches[i]
# batch_y.shape = (batch_y.shape[0]), 1)
# Run optimization op (backprop) and cost op(to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
#Display logs per epoch step
if epoch % display_step == 0:
print(("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)))
print('Optimization finished')
# Store session for analysis with TensorBoard
writer = tf.summary.FileWriter("/tmp/test", sess.graph)
#Test model
print("Accuracy:", accuracy.eval({x: X_test, y: y_test}))
global result
result = tf.argmax(pred, 1).eval({x: X_test, y: y_test})
EDIT the RNN function:
def RNN(x, weights, biases):
# Prepare data shape to match 'rnn' function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required Shape: 'n_steps' tensors list of shape (batch size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_input])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_steps, x)
# x = tf.split(x, n_steps, 0) # Syntax change this version
# LSTM tensorflow using rnn from tensorflow.contrib
lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0)
# Get LSTM cell output
outputs, states = rnn.rnn(lstm_cell, x, dtype=tf.float32)
# Linear activation, using rnn inner loop last output
return tf.matmul(outputs[-1], weights['out']) + biases['out']
Unfortunately, the most important part of your code, is hidden in the RNN function.
Some tips to help you out: I guess you are trying to build a dynamic RNN... (is that correct? ) In that case, a common mistake I see is that people confuse the time major and batch major setting of these RNNs. In other words, is you input data [batch,time,variables], or [time,batch,variables].
More about this can be found here: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.dynamic_rnn.md
I am trying to train a single layer perceptron (basing my code on this) on the following data file in tensor flow:
1,1,0.05,-1.05
1,1,0.1,-1.1
....
where the last column is the label (function of 3 parameters) and the first three columns are the function argument. The code that reads the data and trains the model (I simplify it for readability):
import tensorflow as tf
... # some basics to read the data
example, label = read_file_format(filename_queue)
... # model construction and parameter setting
n_hidden_1 = 4 # 1st layer number of features
n_input = 3
n_output = 1
...
# calls a function which produces a prediction
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
_, c = sess.run([optimizer, cost], feed_dict={x: example.reshape(1,3), y: label.reshape(-1,1)})
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "Cost:",c)
but when I run it, something seems to be very wrong:
('Epoch:', '0001', 'Cost:', nan)
('Epoch:', '0002', 'Cost:', nan)
....
('Epoch:', '0015', 'Cost:', nan)
This is the complete code for the multilaye_perceptron function, etc:
# Parameters
learning_rate = 0.001
training_epochs = 15
display_step = 1
# Network Parameters
n_hidden_1 = 4 # 1st layer number of features
n_input = 3
n_output = 1
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_output])
# Create model
def multilayer_perceptron(x, weights, biases):
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Output layer with linear activation
out_layer = tf.matmul(layer_1, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_hidden_1, n_output]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'out': tf.Variable(tf.random_normal([n_output]))
}
Is this one example at a time? I would go batches and increase batch size to 128 or similar, as long as you are getting nans.
When I am getting nans it is usually either of the three:
- batch size too small (in your case then just 1)
- log(0) somewhere
- learning rate too high and uncapped gradients
I have an implementation of the AlexNet. I'm interested in extracting the vector of features of a trained model before the fully-connected classification layers
I want to first train the model (below I included the evaluation methods for training and testing).
How do I get a list of final output feature vectors (during the forward pass) for all the images in the training/test set before they get classified?
Here is the code (full version available https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3%20-%20Neural%20Networks/alexnet.py) :
weights = {
'wc1': tf.Variable(tf.random_normal([3, 3, 1, 64])),
'wc2': tf.Variable(tf.random_normal([3, 3, 64, 128])),
'wc3': tf.Variable(tf.random_normal([3, 3, 128, 256])),
'wd1': tf.Variable(tf.random_normal([4*4*256, 1024])),
'wd2': tf.Variable(tf.random_normal([1024, 1024])),
'out': tf.Variable(tf.random_normal([1024, 10]))
}
biases = {
'bc1': tf.Variable(tf.random_normal([64])),
'bc2': tf.Variable(tf.random_normal([128])),
'bc3': tf.Variable(tf.random_normal([256])),
'bd1': tf.Variable(tf.random_normal([1024])),
'bd2': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
def alex_net(_X, _weights, _biases, _dropout):
# Reshape input picture
_X = tf.reshape(_X, shape=[-1, 28, 28, 1])
# Convolution Layer
conv1 = conv2d('conv1', _X, _weights['wc1'], _biases['bc1'])
# Max Pooling (down-sampling)
pool1 = max_pool('pool1', conv1, k=2)
# Apply Normalization
norm1 = norm('norm1', pool1, lsize=4)
# Apply Dropout
norm1 = tf.nn.dropout(norm1, _dropout)
# Convolution Layer
conv2 = conv2d('conv2', norm1, _weights['wc2'], _biases['bc2'])
...
# right before feeding the fully connected, classification layers
# I'm interested in the vector after the weights
# are applied during the forward pass of a trained model.
dense1 = tf.reshape(norm3, [-1, _weights['wd1'].get_shape().as_list()[0]])
# Relu activation
dense1 = tf.nn.relu(tf.matmul(dense1, _weights['wd1']) + _biases['bd1'], name='fc1')
# Relu activation
dense2 = tf.nn.relu(tf.matmul(dense1, _weights['wd2']) + _biases['bd2'], name='fc2')
# Output, class prediction
out = tf.matmul(dense2, _weights['out']) + _biases['out']
return out
pred = alex_net(x, weights, biases, keep_prob)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
summary_writer = tf.train.SummaryWriter('/tmp/tensorflow_logs', graph_def=sess.graph_def)
while step * batch_size < training_iters:
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
# Fit training using batch data
sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, keep_prob: dropout})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.})
print "Iter " + str(step*batch_size) + ", Minibatch Loss= " \
+ "{:.6f}".format(loss) + ", Training Accuracy= " + "{:.5f}".format(acc)
step += 1
print "Optimization Finished!"
# Calculate accuracy for 256 mnist test images
print "Testing Accuracy:", sess.run(accuracy, feed_dict={x: mnist.test.images[:256],
y: mnist.test.labels[:256],
keep_prob: 1.})
It sounds like you want the value of dense2 from alex_net()? If so, you will need to return that from alex_net() in addition to out, so
return out
becomes
return dense2, out
and
pred = alex_net(x, weights, biases, keep_prob)
becomes
before_classification_layer, pred = alex_net(...)
Then you can fetch before_classification_layer when calling sess.run() whenever you want that value. See tf.Session.run in https://www.tensorflow.org/versions/0.6.0/api_docs/python/client.html#Session.run. Note that the fetches may be a list, so to avoid evaluating your graph twice in your example code, you can do
# Calculate batch accuracy and loss
acc, loss = sess.run([accuracy, cost], feed_dict={...})
instead of
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={...})
# Calculate batch loss
loss = sess.run(cost, feed_dict={...})
(Adding before_classification_layer to that list when desired.)