Cost function in classification gives NaN as a result - python

I am quite new to both python and machine learning and I am trying to create an ANN for a binary classification model in order to determine whether something is faulty or not. When I run the code down below, the cost function seem to come up as nan. Unfortunately, in order to come up with an accuracy percentage, I need the cost to be an actual number. Is there an obvious error in my code or is there another way to circumvent this problem by writing it differently? Any help will be much appreciated. Thanks.
import csv
import tensorflow as tf
import numpy as np
import pandas as pd
import urllib.request as request
import matplotlib.pyplot as plt
train_data = pd.read_csv("C:/Python35/train_data.csv", sep=',', header = None)
test_data = pd.read_csv("C:/Python35/test_data.csv", sep=',', header = None)
X_train = np.asarray(train_data)
X_test = np.asarray(test_data)
train_label = pd.read_csv("C:/Python35/train_label.csv", sep=',', header = None)
test_label = pd.read_csv("C:/Python35/test_label.csv", sep=',', header = None)
y_train = np.asarray(train_label)
y_test = np.asarray(test_label)
labels_train = (np.arange(2) == y_train[:,None]).astype(np.float32)
labels_test = (np.arange(2) == y_test[:,None]).astype(np.float32)
inputs = tf.placeholder(tf.float32, shape=(None, X_train.shape[1]), name='inputs')
label = tf.placeholder(tf.float32, shape=(None, 2), name='labels')
hid1_size = 128
w1 = tf.Variable(tf.random_normal([hid1_size, X_train.shape[1]], stddev=0.01), name='w1')
b1 = tf.Variable(tf.constant(0.1, shape=(hid1_size, 1)), name='b1')
y1 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(w1, tf.transpose(inputs)), b1)), keep_prob=0.5)
hid2_size = 256
w2 = tf.Variable(tf.random_normal([hid2_size, hid1_size], stddev=0.01), name='w2')
b2 = tf.Variable(tf.constant(0.1, shape=(hid2_size, 1)), name='b2')
y2 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(w2, y1), b2)), keep_prob=0.5)
wo = tf.Variable(tf.random_normal([2, hid2_size], stddev=0.01), name='wo')
bo = tf.Variable(tf.random_normal([2, 1]), name='bo')
yo = tf.transpose(tf.add(tf.matmul(wo, y2), bo))
lr = tf.placeholder(tf.float32, shape=(), name='learning_rate')
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=yo, labels=label))
optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss)
pred = tf.nn.softmax(yo)
pred_label = tf.argmax(pred, 1)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(label, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.InteractiveSession(config=config)
sess.run(init)
for learning_rate in [0.05, 0.01]:
for epoch in range(50):
avg_cost = 0.0
for i in range(X_train.shape[0]):
_, c = sess.run([optimizer, loss], feed_dict={lr:learning_rate,
inputs: X_train[i, None],
label: labels_train[i, None].reshape(-1,2)})
avg_cost += c
avg_cost /= X_train.shape[0]
if epoch % 10 == 0:
print("Epoch: {:3d} Train Cost: {:.4f}".format(epoch, avg_cost))
acc_train = accuracy.eval(feed_dict={inputs: X_train, label: labels_train.reshape(-1,2)})
print("Train accuracy: {:3.2f}%".format(acc_train*100.0))
acc_test = accuracy.eval(feed_dict={inputs: X_test, label: labels_test.reshape(-1,2)})
print("Test accuracy: {:3.2f}%".format(acc_test*100.0))

Related

Classification Module from Capsule Network Code

Refer to the Capsule Network Code, I am using just the classification module from the mentioned code, So following is the complete classification code that I extracted from the link.
from __future__ import division, print_function, unicode_literals
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
tf.reset_default_graph()
np.random.seed(42)
tf.set_random_seed(42)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")
X = tf.placeholder(shape=[None, 28, 28, 1], dtype=tf.float32, name="X")
caps1_n_maps = 32
caps1_n_caps = caps1_n_maps * 6 * 6 # 1152 primary capsules
caps1_n_dims = 8
conv1_params = {
"filters": 256,
"kernel_size": 9,
"strides": 1,
"padding": "valid",
"activation": tf.nn.relu,
}
conv2_params = {
"filters": caps1_n_maps * caps1_n_dims, # 256 convolutional filters
"kernel_size": 9,
"strides": 2,
"padding": "valid",
"activation": tf.nn.relu
}
conv1 = tf.layers.conv2d(X, name="conv1", **conv1_params)
conv2 = tf.layers.conv2d(conv1, name="conv2", **conv2_params)
caps1_raw = tf.reshape(conv2, [-1, caps1_n_caps, caps1_n_dims],name="caps1_raw")
def squash(s, axis=-1, epsilon=1e-7, name=None):
with tf.name_scope(name, default_name="squash"):
squared_norm = tf.reduce_sum(tf.square(s), axis=axis,keep_dims=True)
safe_norm = tf.sqrt(squared_norm + epsilon)
squash_factor = squared_norm / (1. + squared_norm)
unit_vector = s / safe_norm
return squash_factor * unit_vector
caps1_output = squash(caps1_raw, name="caps1_output")
caps2_n_caps = 10
caps2_n_dims = 16
init_sigma = 0.1
W_init = tf.random_normal(
shape=(1, caps1_n_caps, caps2_n_caps, caps2_n_dims, caps1_n_dims),
stddev=init_sigma, dtype=tf.float32, name="W_init")
W = tf.Variable(W_init, name="W")
batch_size = tf.shape(X)[0]
W_tiled = tf.tile(W, [batch_size, 1, 1, 1, 1], name="W_tiled")
caps1_output_expanded = tf.expand_dims(caps1_output, -1,
name="caps1_output_expanded")
caps1_output_tile = tf.expand_dims(caps1_output_expanded, 2,
name="caps1_output_tile")
caps1_output_tiled = tf.tile(caps1_output_tile, [1, 1, caps2_n_caps, 1, 1],
name="caps1_output_tiled")
caps2_predicted = tf.matmul(W_tiled, caps1_output_tiled,
name="caps2_predicted")
raw_weights = tf.zeros([batch_size, caps1_n_caps, caps2_n_caps, 1, 1],
dtype=np.float32, name="raw_weights")
#ROUND 1
routing_weights = tf.nn.softmax(raw_weights, dim=2, name="routing_weights")
weighted_predictions = tf.multiply(routing_weights, caps2_predicted,
name="weighted_predictions")
weighted_sum = tf.reduce_sum(weighted_predictions, axis=1, keep_dims=True,
name="weighted_sum")
caps2_output_round_1 = squash(weighted_sum, axis=-2,
name="caps2_output_round_1")
#ROUND 2
caps2_output_round_1_tiled = tf.tile(
caps2_output_round_1, [1, caps1_n_caps, 1, 1, 1],
name="caps2_output_round_1_tiled")
agreement = tf.matmul(caps2_predicted, caps2_output_round_1_tiled,
transpose_a=True, name="agreement")
raw_weights_round_2 = tf.add(raw_weights, agreement,
name="raw_weights_round_2")
routing_weights_round_2 = tf.nn.softmax(raw_weights_round_2,
dim=2,
name="routing_weights_round_2")
weighted_predictions_round_2 = tf.multiply(routing_weights_round_2,
caps2_predicted,
name="weighted_predictions_round_2")
weighted_sum_round_2 = tf.reduce_sum(weighted_predictions_round_2,
axis=1, keep_dims=True,
name="weighted_sum_round_2")
caps2_output_round_2 = squash(weighted_sum_round_2,
axis=-2,
name="caps2_output_round_2")
caps2_output = caps2_output_round_2
#ESTIMATE CLASS PROBABILITIES
def safe_norm(s, axis=-1, epsilon=1e-7, keep_dims=False, name=None):
with tf.name_scope(name, default_name="safe_norm"):
squared_norm = tf.reduce_sum(tf.square(s), axis=axis,
keep_dims=keep_dims)
return tf.sqrt(squared_norm + epsilon)
y_proba = safe_norm(caps2_output, axis=-2, name="y_proba")
y_proba_argmax = tf.argmax(y_proba, axis=2, name="y_proba")
y_pred = tf.squeeze(y_proba_argmax, axis=[1,2], name="y_pred")
y = tf.placeholder(shape=[None], dtype=tf.int64, name="y")
m_plus = 0.9
m_minus = 0.1
lambda_ = 0.5
T = tf.one_hot(y, depth=caps2_n_caps, name="T")
caps2_output_norm = safe_norm(caps2_output, axis=-2, keep_dims=True,
name="caps2_output_norm")
present_error_raw = tf.square(tf.maximum(0., m_plus - caps2_output_norm),
name="present_error_raw")
present_error = tf.reshape(present_error_raw, shape=(-1, 10),
name="present_error")
absent_error_raw = tf.square(tf.maximum(0., caps2_output_norm - m_minus),
name="absent_error_raw")
absent_error = tf.reshape(absent_error_raw, shape=(-1, 10),
name="absent_error")
L = tf.add(T * present_error, lambda_ * (1.0 - T) * absent_error,
name="L")
margin_loss = tf.reduce_mean(tf.reduce_sum(L, axis=1), name="margin_loss")
loss = tf.add(margin_loss, 0, name="loss")
correct = tf.equal(y, y_pred, name="correct")
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss, name="training_op")
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 10
batch_size = 50
restore_checkpoint = True
n_iterations_per_epoch = mnist.train.num_examples // batch_size
n_iterations_validation = mnist.validation.num_examples // batch_size
best_loss_val = np.infty
checkpoint_path = "./my_capsule_network"
with tf.Session() as sess:
if restore_checkpoint and tf.train.checkpoint_exists(checkpoint_path):
saver.restore(sess, checkpoint_path)
else:
init.run()
for epoch in range(n_epochs):
for iteration in range(1, n_iterations_per_epoch + 1):
X_batch, y_batch = mnist.train.next_batch(batch_size)
# Run the training operation and measure the loss:
_, loss_train = sess.run(
[training_op, loss],
feed_dict={X: X_batch.reshape([-1, 28, 28, 1]),
y: y_batch})
print("\rIteration: {}/{} ({:.1f}%) Loss: {:.5f}".format(
iteration, n_iterations_per_epoch,
iteration * 100 / n_iterations_per_epoch,
loss_train),
end="")
# At the end of each epoch,
# measure the validation loss and accuracy:
loss_vals = []
acc_vals = []
for iteration in range(1, n_iterations_validation + 1):
X_batch, y_batch = mnist.validation.next_batch(batch_size)
loss_val, acc_val = sess.run(
[loss, accuracy],
feed_dict={X: X_batch.reshape([-1, 28, 28, 1]),
y: y_batch})
loss_vals.append(loss_val)
acc_vals.append(acc_val)
print("\rEvaluating the model: {}/{} ({:.1f}%)".format(
iteration, n_iterations_validation,
iteration * 100 / n_iterations_validation),
end=" " * 10)
loss_val = np.mean(loss_vals)
acc_val = np.mean(acc_vals)
print("\rEpoch: {} Val accuracy: {:.4f}% Loss: {:.6f}{}".format(
epoch + 1, acc_val * 100, loss_val,
" (improved)" if loss_val < best_loss_val else ""))
# And save the model if it improved:
if loss_val < best_loss_val:
save_path = saver.save(sess, checkpoint_path)
best_loss_val = loss_val
n_iterations_test = mnist.test.num_examples // batch_size
with tf.Session() as sess:
saver.restore(sess, checkpoint_path)
loss_tests = []
acc_tests = []
for iteration in range(1, n_iterations_test + 1):
X_batch, y_batch = mnist.test.next_batch(batch_size)
loss_test, acc_test = sess.run(
[loss, accuracy],
feed_dict={X: X_batch.reshape([-1, 28, 28, 1]),
y: y_batch})
loss_tests.append(loss_test)
acc_tests.append(acc_test)
print("\rEvaluating the model: {}/{} ({:.1f}%)".format(
iteration, n_iterations_test,
iteration * 100 / n_iterations_test),
end=" " * 10)
loss_test = np.mean(loss_tests)
acc_test = np.mean(acc_tests)
print("\rFinal test accuracy: {:.4f}% Loss: {:.6f}".format(
acc_test * 100, loss_test))
Since reconstruction module is not required so in the final loss function I have set loss = 0 for the reconstruction part so that optimizer only work on the classification loss.
loss = tf.add(margin_loss, 0, name="loss")
Now the classification module is working but the accuracy is pathetically low even for MNIST dataset, I got around 10% validation accuracy, Can somebody explain to me what is the problem here because on MNIST the Capsule Networks perform well in classification as discussed in Paper?
Regards

ValueError: Cannot feed value of shape (24500, 50, 50, 1) for Tensor 'Placeholder_34:0', which has shape '(?, 2500)'

This is the cat vs dog problem from kaggle competition. My code is look like correct but still a value error annoying me. I think I have given the correct size of the input but still the error comes.
Please help me to find out the error.
Here's my full code:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
from random import shuffle
import tensorflow
from tqdm import tqdm
TRAIN_DIR = 'C:\\Users\Kashif\PycharmProjects\DeepLearning-Tensorflow (Sentdex)\Learnings\Cat_VS_Dog\TrainingData'
TEST_DIR = 'C:\\Users\Kashif\PycharmProjects\DeepLearning-Tensorflow (Sentdex)\Learnings\Cat_VS_Dog\TestingData'
IMG_SIZE = 50
MODEL_NAME = 'dogvscat-{}-{}.model'.format(LR, '2conv-basic')
def label_img(img):
word_label = img.split('.')[-3]
if word_label == 'cat': return [1,0]
elif word_label == 'dog': return [0,1]
def create_train_data():
training_data = []
for img in tqdm(os.listdir(TRAIN_DIR)):
label = label_img(img)
path = os.path.join(TRAIN_DIR,img)
img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
training_data.append([np.array(img),np.array(label)])
shuffle(training_data)
np.save('train_data.npy', training_data)
return training_data
def process_test_data():
testing_data=[]
for img in tqdm(os.listdir(TEST_DIR)):
path = os.path.join(TEST_DIR, img)
img_num = img.split('.')[0]
img = cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE), (IMG_SIZE, IMG_SIZE))
testing_data.append([np.array(img), img_num])
np.save('test_data.npy', testing_data)
return testing_data
train_data = create_train_data()
learning_rate = 0.01
epochs = 10
batch_size = 128
n_classes = 2
drop_out = 0.8
filter_h_w = 5
depth_in = 1
depth_out_1 = 32
depth_out_2 = 64
x = tf.placeholder('float', [None, IMG_SIZE * IMG_SIZE])
y = tf.placeholder('float', [None, n_classes])
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def conv_nural_network(x):
weights = {
'W_conv1': tf.Variable(tf.random_normal([filter_h_w, filter_h_w, depth_in, depth_out_1])),
'W_conv2': tf.Variable(tf.random_normal([filter_h_w, filter_h_w, depth_out_1, depth_out_2])),
'W_fc': tf.Variable(tf.random_normal([ int(IMG_SIZE/4) * int(IMG_SIZE/4) * depth_out_2, 1024])),
'out': tf.Variable(tf.random_normal([1024, n_classes]))
}
biases = {
'b_conv1': tf.Variable(tf.random_normal([depth_out_1])),
'b_conv2': tf.Variable(tf.random_normal([depth_out_2])),
'b_fc': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
x = tf.reshape(x, shape=[-1, IMG_SIZE, IMG_SIZE, 1])
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool2d(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, int(IMG_SIZE/4) * int(IMG_SIZE/4) * depth_out_2])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
fc = tf.nn.dropout(fc, drop_out)
output = tf.matmul(fc, weights['out']) + biases['out']
return output
train = train_data[:-500]
test = train_data[-500:]
train_X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
train_y = [i[1] for i in train]
test_X = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
test_y = [i[1] for i in test]
def train_neural_network(x):
prediction = conv_nural_network(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
init = tf.global_variables_initializer()
loss_trace = []
accuracy_trace = []
with tf.Session() as sess:
sess.run(init)
for i in range(epochs):
sess.run(optimizer, feed_dict={x: train_X, y: train_y})
loss = sess.run(cost_function, feed_dict={x: train_X, y: train_y})
accuracy = np.mean(np.argmax(sess.run(prediction,feed_dict={x:train_X,y:train_y}),axis=1) == np.argmax(train_y,axis=1))
loss_trace.append(loss)
accuracy_trace.append(accuracy)
print('Epoch:', (i + 1), 'loss:', loss, 'accuracy:', accuracy)
print('Final training result:', 'loss:', loss, 'accuracy:', accuracy)
loss_test = sess.run(cost_function, feed_dict={x: test_X, y: test_y})
test_pred = np.argmax(sess.run(prediction, feed_dict={x: test_X, y: test_y}), axis=1)
accuracy_test = np.mean(test_pred == np.argmax(test_y, axis=1))
print('Results on test dataset:', 'loss:', loss_test, 'accuracy:', accuracy_test)
train_neural_network(x)
This error comes after that. A value error has come but I don't know where I have given the wrong input type.
ValueError Traceback (most recent call last)
<ipython-input-91-7682c5a4d0ec> in <module>
25
26
---> 27 train_neural_network(x)
<ipython-input-91-7682c5a4d0ec> in train_neural_network(x)
11 sess.run(init)
12 for i in range(epochs):
---> 13 sess.run(optimizer, feed_dict={x: train_X, y: train_y})
14 loss = sess.run(cost_function, feed_dict={x: train_X, y: train_y})
15 accuracy = np.mean(np.argmax(sess.run(prediction,feed_dict={x:train_X,y:train_y}),axis=1) == np.argmax(train_y,axis=1))
ValueError: Cannot feed value of shape (24500, 50, 50, 1) for Tensor 'Placeholder_34:0', which has shape '(?, 2500)'

Projector in Tensorboard python application

I have the following code and sample which is working fine and exactly I want it to:
import numpy as np
import pandas as pd
import sklearn
import sklearn.preprocessing
import datetime
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
valid_set_size_percentage = 3
test_set_size_percentage = 3
seq_len = 5 # choose sequence length
df = pd.read_csv("Test.csv", encoding = 'utf-16',sep=',',index_col = 0)
df.head()
def normalize_data(df):
cols = list(df_stock.columns.values)
min_max_scaler = sklearn.preprocessing.MinMaxScaler()
df = pd.DataFrame(min_max_scaler.fit_transform(df.values))
df.columns = cols
return df
def load_data(stock, seq_len):
data_raw = stock.as_matrix() # convert to numpy array
data = []
print(data_raw.shape)
for index in range(len(data_raw) - seq_len):
data.append(data_raw[index: index + seq_len])
data = np.array(data);
valid_set_size = int(np.round(valid_set_size_percentage/100*data.shape[0]));
test_set_size = int(np.round(test_set_size_percentage/100*data.shape[0]));
train_set_size = data.shape[0] - (valid_set_size + test_set_size);
x_train = data[:train_set_size,:-1,:]
y_train = data[:train_set_size,-1,:4]
x_valid = data[train_set_size:train_set_size+valid_set_size,:-1,:]
y_valid = data[train_set_size:train_set_size+valid_set_size,-1,:4]
x_test = data[train_set_size+valid_set_size:,:-1,:]
y_test = data[train_set_size+valid_set_size:,-1,:4]
return [x_train, y_train, x_valid, y_valid, x_test, y_test]
df_stock = df.copy()
cols = list(df_stock.columns.values)
print('df_stock.columns.values = ', cols)
df_stock_norm = df_stock.copy()
df_stock_norm = normalize_data(df_stock_norm)
x_train, y_train, x_valid, y_valid, x_test, y_test = load_data(df_stock_norm, seq_len)
print(y_train[:2])
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ', y_train.shape)
print('Inputs = ',x_train.shape[2])
print('Outputs = ', y_train.shape[1])
print('x_valid.shape = ',x_valid.shape)
print('y_valid.shape = ', y_valid.shape)
print('x_test.shape = ', x_test.shape)
print('y_test.shape = ',y_test.shape)
index_in_epoch = 0;
perm_array = np.arange(x_train.shape[0])
np.random.shuffle(perm_array)
def get_next_batch(batch_size):
global index_in_epoch, x_train, perm_array
if index_in_epoch > x_train.shape[0]:
start = 0 # start next epoch
index_in_epoch = 0#batch_size
start = index_in_epoch
index_in_epoch += batch_size
end = index_in_epoch
return x_train[perm_array[start:end]], y_train[perm_array[start:end]]
n_steps = seq_len -1
n_inputs = x_train.shape[2]
n_neurons = 100
n_outputs = y_train.shape[-1]
n_layers = 2
learning_rate = 0.001
batch_size =10
n_epochs = 100
train_set_size = x_train.shape[0]
test_set_size = x_test.shape[0]
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None,n_outputs])
layers = [tf.contrib.rnn.LSTMCell(num_units=n_neurons,
activation=tf.nn.leaky_relu, use_peepholes = True)
for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)
rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
outputs = outputs[:,n_steps-1,:] # keep only last output of sequence
loss = tf.reduce_mean(tf.squared_difference(outputs, y)) # loss function = mean squared error
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for iteration in range(int(n_epochs*train_set_size/batch_size)):
x_batch, y_batch = get_next_batch(batch_size) # fetch the next training batch
sess.run(training_op, feed_dict={X: x_batch, y: y_batch})
if iteration % int(1*train_set_size/batch_size) == 0:
mse_train = loss.eval(feed_dict={X: x_train, y: y_train})
mse_valid = loss.eval(feed_dict={X: x_valid, y: y_valid})
mse_test = loss.eval(feed_dict={X: x_test, y: y_test})
print('%.2f epochs: MSE train/valid/test = %.3f/%.3f/%.3f'%(
iteration*batch_size/train_set_size, mse_train, mse_valid,mse_test))
try:
save_path = saver.save(sess, "modelfile\\model"+str(iteration)+".ckpt")
except Exception as e:
print(e)
if not os.path.exists("modelfile\\"):
os.makedirs("modelfile\\")
save_path = saver.save(sess, "modelfile\\model"+str(iteration)+".ckpt")
The following is my sample of what I am trying to execute:
Same data Please click and see
I am willing to add the Projector of the Tensorboard to my code. But I could not understand how I can make it. I want to visualize the different inputs I am giving for my training. I am supplying the following columns and trying to predict the ohlc values.
'o', 'h', 'l', 'c', 'rel1', 'rel2', 'rel3', 'rel4', 'rel5', 'rel6', 'rel7', 'rel8'
I want to visualize the above columns in the projector to know how they are relating with each other to give me the output.
Please let me know what I can do to get what I am willing to.
EDITED:
I have tried something as follows but cannot see the projector tab:
tf.reset_default_graph()
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None,n_outputs])
symbols = tf.placeholder(tf.int32, [None, 1], name='stock_labels')
embed_matrix = tf.Variable(
tf.random_uniform([1, n_inputs],0.0, 1.0),
name="embed_matrix"
)
stacked_symbols = tf.tile(symbols, [batch_size,n_steps], name='stacked_stock_labels')
stacked_embeds = tf.nn.embedding_lookup(embed_matrix, stacked_symbols)
# stacked_embeds = tf.nn.embedding_lookup(embed_matrix)
# After concat, inputs.shape = (batch_size, num_steps, input_size + embed_size)
inputs_with_embed = tf.concat([X, stacked_embeds], axis=2, name="inputs_with_embed")
embed_matrix_summ = tf.summary.histogram("embed_matrix", embed_matrix)
And edited the following lines in the session code:
merged_sum = tf.summary.merge_all()
global_step = 0
# Set up the logs folder
writer = tf.summary.FileWriter('logs')
writer.add_graph(sess.graph)
projector_config = projector.ProjectorConfig()
# You can add multiple embeddings. Here we add only one.
added_embed = projector_config.embeddings.add()
added_embed.tensor_name = embed_matrix.name
# Link this tensor to its metadata file (e.g. labels).
shutil.copyfile("logs\\metadata.tsv",
"logs\\metadata1.tsv")
added_embed.metadata_path = "metadata.tsv"
# The next line writes a projector_config.pbtxt in the LOG_DIR. TensorBoard will
# read this file during startup.
projector.visualize_embeddings(writer, projector_config)
sess.run(tf.global_variables_initializer())
if iteration % int(1*train_set_size/batch_size) == 0:
global_step += 1
mse_train = loss.eval(feed_dict={X: x_train, y: y_train})
mse_valid = loss.eval(feed_dict={X: x_valid, y: y_valid})
mse_test = loss.eval(feed_dict={X: x_test, y: y_test})
_,train_merge = sess.run([outputs,merged_sum], feed_dict={X: x_train, y: y_train})
writer.add_summary(train_merge, global_step=global_step)
Here is teh metadata.tsv file
Please let me know what I missed.

Tensorflow: Same input data, different output

After training the model, I save it and load to make some tests. But every time I reload the model I get a different accuracy and results with the exactly same input data. After training the model I print the accuracy and it always gets a nice value (0.8 ~ 0.9), but when I reload it goes down to something like (0.1 ~ 0.5) - I dont know if it is something related to the problem btw thats weird.
import tensorflow as tf
import numpy as np
import json
n_nodes_hl1 = 1600
n_nodes_hl2 = 800
n_nodes_hl3 = 400
n_nodes_hl4 = 200
n_classes = 4
batch_size = 50
input_lenght = 65
x = tf.placeholder('float', [None, input_lenght])
y = tf.placeholder('float')
def train_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(cost)
hm_epochs = 20000
saver = tf.train.Saver()
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
epoch = 0
for epoch in range(hm_epochs):
epoch_cost = 0
i = 0
while i < len(train_x):
start = i
end = i + batch_size
batch_x = np.array(train_x[start:end])
batch_y = np.array(train_y[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
epoch_cost += c
i += batch_size
save_path = saver.save(sess, "drive/My Drive/datasets/tensorflow/model")
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print("accuracy:", accuracy.eval({x: test_x, y: test_y}, session=sess))
sess.close()
def group_test_train(features_data, labels_data, test_size):
featureset = []
for i in range(test_size):
featureset += [[features_data[i], labels_data[i]]]
featureset = np.array(featureset)
np.random.shuffle(featureset)
train_x = list(featureset[:, 0][:test_size // 2])
train_y = list(featureset[:, 1][:test_size // 2])
test_x = list(featureset[:, 0][test_size // 2:])
test_y = list(featureset[:, 1][test_size // 2:])
return train_x, train_y, test_x, test_y
def neural_network_model(data):
hidden1 = {'weights': tf.Variable(tf.random_uniform([input_lenght, n_nodes_hl1], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))
}
hidden2 = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl1, n_nodes_hl2], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))
}
hidden3 = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl2, n_nodes_hl3], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))
}
hidden4 = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl3, n_nodes_hl4], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl4]))
}
l_output = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl4, n_classes], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_classes]))
}
l1 = tf.add(tf.matmul(data, hidden1['weights']), hidden1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hidden2['weights']), hidden2['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, hidden3['weights']), hidden3['biases'])
l3 = tf.nn.relu(l3)
l4 = tf.add(tf.matmul(l3, hidden4['weights']), hidden4['biases'])
l4 = tf.nn.relu(l4)
output = tf.add(tf.matmul(l4, l_output['weights']), l_output['biases'])
return output
version = 'end'
with open('drive/My Drive/datasets/json/' + 'data-'+ version +'.json') as json_file:
x_, y_ = json.load(json_file)
train_x, train_y, test_x, test_y = group_test_train(x_, y_, len(x_) )
train_network(x)
Every time I run this part down bellow the accuracy changes and the output as well.
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(cost)
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
new_saver = tf.train.import_meta_graph('drive/My Drive/datasets/tensorflow/model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('drive/My Drive/datasets/tensorflow/'))
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print("accuracy:", accuracy.eval({x: train_x, y: train_y}, session=sess))

Implement inference bayesian network using session tensorflow

I am a new with machine learning. I have a final project about prediction using two algorithms, Artificial Neural Network and Bayesian Neural Network. I want to compare the prediction result between ANN and BNN. I have finished the ANN program, but I have a problem with the BNN. I try a tutorial from this link: bayesian neural network tutorial. This is my ANN sample code to train and evaluate the model.
keep_prob = tf.placeholder("float", name="keep_prob")
x = tf.placeholder(tf.float32, [None, n_input], name="x")
y = tf.placeholder(tf.float32, name="y")
training_epochs = 5000
display_step = 1000
batch_size = 5
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=y), name="cost_function")
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001, name="Adam").minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in tqdm(range(training_epochs)):
avg_cost = 0.0
total_batch = int(len(x_train) / batch_size)
x_batches = np.array_split(x_train, total_batch)
y_batches = np.array_split(y_train, total_batch)
for i in range(total_batch):
batch_x, batch_y = x_batches[i], y_batches[i]
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, keep_prob: 0.8})
avg_cost += c / total_batch
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
print("Optimization Finished!")
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1), name="corr_pred")
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
# print('Accuracy: ', sess.run(accuracy, feed_dict={x: x_test, y: y_test}))
print("Accuracy:", accuracy.eval({x: x_test, y: y_test, keep_prob: 1.0}))
and this is my BNN code:
# Importing required libraries
from math import floor
import edward as ed
import numpy as np
import pandas as pd
import tensorflow as tf
from edward.models import Normal, NormalWithSoftplusScale
from fancyimpute import KNN
from sklearn import preprocessing
# Read data
features_dummies_nan = pd.read_csv('csv/features_dummies_with_label.csv', sep=',')
# Function: impute missing value by KNN
def impute_missing_values_by_KNN():
home_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'hp' in col]]
away_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'ap' in col]]
label_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'label' in col]]
home_filled = pd.DataFrame(KNN(3).complete(home_data))
home_filled.columns = home_data.columns
home_filled.index = home_data.index
away_filled = pd.DataFrame(KNN(3).complete(away_data))
away_filled.columns = away_data.columns
away_filled.index = away_data.index
data_frame_out = pd.concat([home_filled, away_filled, label_data], axis=1)
return data_frame_out
features_dummies = impute_missing_values_by_KNN()
target = features_dummies.loc[:, 'label'].values
data = features_dummies.drop('label', axis=1)
data = data.values
perm = np.random.permutation(len(features_dummies))
data = data[perm]
target = target[perm]
train_size = 0.9
train_cnt = floor(features_dummies.shape[0] * train_size)
x_train = data[0:train_cnt] # data_train
y_train = target[0:train_cnt] # target_train
x_test = data[train_cnt:] # data_test
y_test = target[train_cnt:] # target_test
keep_prob = tf.placeholder("float", name="keep_prob")
n_input = data.shape[1] # D
n_classes = 3
n_hidden_1 = 100 # H0
n_hidden_2 = 100 # H1
n_hidden_3 = 100 # H2
def neural_network(X, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out):
hidden1 = tf.nn.relu(tf.matmul(X, W_0) + b_0)
hidden2 = tf.nn.relu(tf.matmul(hidden1, W_1) + b_1)
hidden3 = tf.nn.relu(tf.matmul(hidden2, W_2) + b_2)
output = tf.matmul(hidden3, W_out) + b_out
return tf.reshape(output, [-1])
scaler = preprocessing.StandardScaler().fit(x_train)
data_train_scaled = scaler.transform(x_train)
data_test_scaled = scaler.transform(x_test)
W_0 = Normal(loc=tf.zeros([n_input, n_hidden_1]), scale=5.0 * tf.ones([n_input, n_hidden_1]))
W_1 = Normal(loc=tf.zeros([n_hidden_1, n_hidden_2]), scale=5.0 * tf.ones([n_hidden_1, n_hidden_2]))
W_2 = Normal(loc=tf.zeros([n_hidden_2, n_hidden_3]), scale=5.0 * tf.ones([n_hidden_2, n_hidden_3]))
W_out = Normal(loc=tf.zeros([n_hidden_3, 1]), scale=5.0 * tf.ones([n_hidden_3, 1]))
b_0 = Normal(loc=tf.zeros(n_hidden_1), scale=5.0 * tf.ones(n_hidden_1))
b_1 = Normal(loc=tf.zeros(n_hidden_2), scale=5.0 * tf.ones(n_hidden_2))
b_2 = Normal(loc=tf.zeros(n_hidden_3), scale=5.0 * tf.ones(n_hidden_3))
b_out = Normal(loc=tf.zeros(1), scale=5.0 * tf.ones(1))
qW_0 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_input, n_hidden_1])),
scale=tf.Variable(tf.random_normal([n_input, n_hidden_1])))
qW_1 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
scale=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])))
qW_2 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
scale=tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])))
qW_out = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_3, 1])),
scale=tf.Variable(tf.random_normal([n_hidden_3, 1])))
qb_0 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_1])),
scale=tf.Variable(tf.random_normal([n_hidden_1])))
qb_1 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_2])),
scale=tf.Variable(tf.random_normal([n_hidden_2])))
qb_2 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_3])),
scale=tf.Variable(tf.random_normal([n_hidden_3])))
qb_out = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([1])),
scale=tf.Variable(tf.random_normal([1])))
sigma_y = 1.0
x = tf.placeholder(tf.float32, [None, n_input])
y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)
inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
W_1: qW_1, b_1: qb_1,
W_2: qW_2, b_2: qb_2,
W_out: qW_out, b_out: qb_out}, data={x: x_train, y: y_train})
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
1000, 0.3, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)
inference.run(n_iter=5000, optimizer=optimizer, global_step=global_step)
But, I want to compare two algorithms result. So, I want to make some variables will be same between ANN and BNN, for example sum of epoch. Then I want to adapt my ANN code above for this BNN code section.
sigma_y = 1.0
x = tf.placeholder(tf.float32, [None, n_input])
y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)
inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
W_1: qW_1, b_1: qb_1,
W_2: qW_2, b_2: qb_2,
W_out: qW_out, b_out: qb_out}, data={x: x_train, y: y_train})
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
1000, 0.3, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)
inference.run(n_iter=5000, optimizer=optimizer, global_step=global_step)
I have several things that I don't understand. There is y = tf.placeholder(tf.float32, name="y") in ANN but in BNN is y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y). Then, there is scale in BNN but not in ANN. So, can I adapt my ANN train and test sample code to BNN sample code above? I want to make inference on BNN run like in sess.run() on ANN so I can count the BNN prediction accuracy result. Can I do that?

Categories

Resources