Implement inference bayesian network using session tensorflow - python

I am a new with machine learning. I have a final project about prediction using two algorithms, Artificial Neural Network and Bayesian Neural Network. I want to compare the prediction result between ANN and BNN. I have finished the ANN program, but I have a problem with the BNN. I try a tutorial from this link: bayesian neural network tutorial. This is my ANN sample code to train and evaluate the model.
keep_prob = tf.placeholder("float", name="keep_prob")
x = tf.placeholder(tf.float32, [None, n_input], name="x")
y = tf.placeholder(tf.float32, name="y")
training_epochs = 5000
display_step = 1000
batch_size = 5
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=y), name="cost_function")
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001, name="Adam").minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in tqdm(range(training_epochs)):
avg_cost = 0.0
total_batch = int(len(x_train) / batch_size)
x_batches = np.array_split(x_train, total_batch)
y_batches = np.array_split(y_train, total_batch)
for i in range(total_batch):
batch_x, batch_y = x_batches[i], y_batches[i]
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, keep_prob: 0.8})
avg_cost += c / total_batch
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
print("Optimization Finished!")
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1), name="corr_pred")
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
# print('Accuracy: ', sess.run(accuracy, feed_dict={x: x_test, y: y_test}))
print("Accuracy:", accuracy.eval({x: x_test, y: y_test, keep_prob: 1.0}))
and this is my BNN code:
# Importing required libraries
from math import floor
import edward as ed
import numpy as np
import pandas as pd
import tensorflow as tf
from edward.models import Normal, NormalWithSoftplusScale
from fancyimpute import KNN
from sklearn import preprocessing
# Read data
features_dummies_nan = pd.read_csv('csv/features_dummies_with_label.csv', sep=',')
# Function: impute missing value by KNN
def impute_missing_values_by_KNN():
home_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'hp' in col]]
away_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'ap' in col]]
label_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'label' in col]]
home_filled = pd.DataFrame(KNN(3).complete(home_data))
home_filled.columns = home_data.columns
home_filled.index = home_data.index
away_filled = pd.DataFrame(KNN(3).complete(away_data))
away_filled.columns = away_data.columns
away_filled.index = away_data.index
data_frame_out = pd.concat([home_filled, away_filled, label_data], axis=1)
return data_frame_out
features_dummies = impute_missing_values_by_KNN()
target = features_dummies.loc[:, 'label'].values
data = features_dummies.drop('label', axis=1)
data = data.values
perm = np.random.permutation(len(features_dummies))
data = data[perm]
target = target[perm]
train_size = 0.9
train_cnt = floor(features_dummies.shape[0] * train_size)
x_train = data[0:train_cnt] # data_train
y_train = target[0:train_cnt] # target_train
x_test = data[train_cnt:] # data_test
y_test = target[train_cnt:] # target_test
keep_prob = tf.placeholder("float", name="keep_prob")
n_input = data.shape[1] # D
n_classes = 3
n_hidden_1 = 100 # H0
n_hidden_2 = 100 # H1
n_hidden_3 = 100 # H2
def neural_network(X, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out):
hidden1 = tf.nn.relu(tf.matmul(X, W_0) + b_0)
hidden2 = tf.nn.relu(tf.matmul(hidden1, W_1) + b_1)
hidden3 = tf.nn.relu(tf.matmul(hidden2, W_2) + b_2)
output = tf.matmul(hidden3, W_out) + b_out
return tf.reshape(output, [-1])
scaler = preprocessing.StandardScaler().fit(x_train)
data_train_scaled = scaler.transform(x_train)
data_test_scaled = scaler.transform(x_test)
W_0 = Normal(loc=tf.zeros([n_input, n_hidden_1]), scale=5.0 * tf.ones([n_input, n_hidden_1]))
W_1 = Normal(loc=tf.zeros([n_hidden_1, n_hidden_2]), scale=5.0 * tf.ones([n_hidden_1, n_hidden_2]))
W_2 = Normal(loc=tf.zeros([n_hidden_2, n_hidden_3]), scale=5.0 * tf.ones([n_hidden_2, n_hidden_3]))
W_out = Normal(loc=tf.zeros([n_hidden_3, 1]), scale=5.0 * tf.ones([n_hidden_3, 1]))
b_0 = Normal(loc=tf.zeros(n_hidden_1), scale=5.0 * tf.ones(n_hidden_1))
b_1 = Normal(loc=tf.zeros(n_hidden_2), scale=5.0 * tf.ones(n_hidden_2))
b_2 = Normal(loc=tf.zeros(n_hidden_3), scale=5.0 * tf.ones(n_hidden_3))
b_out = Normal(loc=tf.zeros(1), scale=5.0 * tf.ones(1))
qW_0 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_input, n_hidden_1])),
scale=tf.Variable(tf.random_normal([n_input, n_hidden_1])))
qW_1 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
scale=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])))
qW_2 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
scale=tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])))
qW_out = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_3, 1])),
scale=tf.Variable(tf.random_normal([n_hidden_3, 1])))
qb_0 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_1])),
scale=tf.Variable(tf.random_normal([n_hidden_1])))
qb_1 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_2])),
scale=tf.Variable(tf.random_normal([n_hidden_2])))
qb_2 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_3])),
scale=tf.Variable(tf.random_normal([n_hidden_3])))
qb_out = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([1])),
scale=tf.Variable(tf.random_normal([1])))
sigma_y = 1.0
x = tf.placeholder(tf.float32, [None, n_input])
y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)
inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
W_1: qW_1, b_1: qb_1,
W_2: qW_2, b_2: qb_2,
W_out: qW_out, b_out: qb_out}, data={x: x_train, y: y_train})
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
1000, 0.3, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)
inference.run(n_iter=5000, optimizer=optimizer, global_step=global_step)
But, I want to compare two algorithms result. So, I want to make some variables will be same between ANN and BNN, for example sum of epoch. Then I want to adapt my ANN code above for this BNN code section.
sigma_y = 1.0
x = tf.placeholder(tf.float32, [None, n_input])
y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)
inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
W_1: qW_1, b_1: qb_1,
W_2: qW_2, b_2: qb_2,
W_out: qW_out, b_out: qb_out}, data={x: x_train, y: y_train})
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
1000, 0.3, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)
inference.run(n_iter=5000, optimizer=optimizer, global_step=global_step)
I have several things that I don't understand. There is y = tf.placeholder(tf.float32, name="y") in ANN but in BNN is y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y). Then, there is scale in BNN but not in ANN. So, can I adapt my ANN train and test sample code to BNN sample code above? I want to make inference on BNN run like in sess.run() on ANN so I can count the BNN prediction accuracy result. Can I do that?

Related

Classification Module from Capsule Network Code

Refer to the Capsule Network Code, I am using just the classification module from the mentioned code, So following is the complete classification code that I extracted from the link.
from __future__ import division, print_function, unicode_literals
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
tf.reset_default_graph()
np.random.seed(42)
tf.set_random_seed(42)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")
X = tf.placeholder(shape=[None, 28, 28, 1], dtype=tf.float32, name="X")
caps1_n_maps = 32
caps1_n_caps = caps1_n_maps * 6 * 6 # 1152 primary capsules
caps1_n_dims = 8
conv1_params = {
"filters": 256,
"kernel_size": 9,
"strides": 1,
"padding": "valid",
"activation": tf.nn.relu,
}
conv2_params = {
"filters": caps1_n_maps * caps1_n_dims, # 256 convolutional filters
"kernel_size": 9,
"strides": 2,
"padding": "valid",
"activation": tf.nn.relu
}
conv1 = tf.layers.conv2d(X, name="conv1", **conv1_params)
conv2 = tf.layers.conv2d(conv1, name="conv2", **conv2_params)
caps1_raw = tf.reshape(conv2, [-1, caps1_n_caps, caps1_n_dims],name="caps1_raw")
def squash(s, axis=-1, epsilon=1e-7, name=None):
with tf.name_scope(name, default_name="squash"):
squared_norm = tf.reduce_sum(tf.square(s), axis=axis,keep_dims=True)
safe_norm = tf.sqrt(squared_norm + epsilon)
squash_factor = squared_norm / (1. + squared_norm)
unit_vector = s / safe_norm
return squash_factor * unit_vector
caps1_output = squash(caps1_raw, name="caps1_output")
caps2_n_caps = 10
caps2_n_dims = 16
init_sigma = 0.1
W_init = tf.random_normal(
shape=(1, caps1_n_caps, caps2_n_caps, caps2_n_dims, caps1_n_dims),
stddev=init_sigma, dtype=tf.float32, name="W_init")
W = tf.Variable(W_init, name="W")
batch_size = tf.shape(X)[0]
W_tiled = tf.tile(W, [batch_size, 1, 1, 1, 1], name="W_tiled")
caps1_output_expanded = tf.expand_dims(caps1_output, -1,
name="caps1_output_expanded")
caps1_output_tile = tf.expand_dims(caps1_output_expanded, 2,
name="caps1_output_tile")
caps1_output_tiled = tf.tile(caps1_output_tile, [1, 1, caps2_n_caps, 1, 1],
name="caps1_output_tiled")
caps2_predicted = tf.matmul(W_tiled, caps1_output_tiled,
name="caps2_predicted")
raw_weights = tf.zeros([batch_size, caps1_n_caps, caps2_n_caps, 1, 1],
dtype=np.float32, name="raw_weights")
#ROUND 1
routing_weights = tf.nn.softmax(raw_weights, dim=2, name="routing_weights")
weighted_predictions = tf.multiply(routing_weights, caps2_predicted,
name="weighted_predictions")
weighted_sum = tf.reduce_sum(weighted_predictions, axis=1, keep_dims=True,
name="weighted_sum")
caps2_output_round_1 = squash(weighted_sum, axis=-2,
name="caps2_output_round_1")
#ROUND 2
caps2_output_round_1_tiled = tf.tile(
caps2_output_round_1, [1, caps1_n_caps, 1, 1, 1],
name="caps2_output_round_1_tiled")
agreement = tf.matmul(caps2_predicted, caps2_output_round_1_tiled,
transpose_a=True, name="agreement")
raw_weights_round_2 = tf.add(raw_weights, agreement,
name="raw_weights_round_2")
routing_weights_round_2 = tf.nn.softmax(raw_weights_round_2,
dim=2,
name="routing_weights_round_2")
weighted_predictions_round_2 = tf.multiply(routing_weights_round_2,
caps2_predicted,
name="weighted_predictions_round_2")
weighted_sum_round_2 = tf.reduce_sum(weighted_predictions_round_2,
axis=1, keep_dims=True,
name="weighted_sum_round_2")
caps2_output_round_2 = squash(weighted_sum_round_2,
axis=-2,
name="caps2_output_round_2")
caps2_output = caps2_output_round_2
#ESTIMATE CLASS PROBABILITIES
def safe_norm(s, axis=-1, epsilon=1e-7, keep_dims=False, name=None):
with tf.name_scope(name, default_name="safe_norm"):
squared_norm = tf.reduce_sum(tf.square(s), axis=axis,
keep_dims=keep_dims)
return tf.sqrt(squared_norm + epsilon)
y_proba = safe_norm(caps2_output, axis=-2, name="y_proba")
y_proba_argmax = tf.argmax(y_proba, axis=2, name="y_proba")
y_pred = tf.squeeze(y_proba_argmax, axis=[1,2], name="y_pred")
y = tf.placeholder(shape=[None], dtype=tf.int64, name="y")
m_plus = 0.9
m_minus = 0.1
lambda_ = 0.5
T = tf.one_hot(y, depth=caps2_n_caps, name="T")
caps2_output_norm = safe_norm(caps2_output, axis=-2, keep_dims=True,
name="caps2_output_norm")
present_error_raw = tf.square(tf.maximum(0., m_plus - caps2_output_norm),
name="present_error_raw")
present_error = tf.reshape(present_error_raw, shape=(-1, 10),
name="present_error")
absent_error_raw = tf.square(tf.maximum(0., caps2_output_norm - m_minus),
name="absent_error_raw")
absent_error = tf.reshape(absent_error_raw, shape=(-1, 10),
name="absent_error")
L = tf.add(T * present_error, lambda_ * (1.0 - T) * absent_error,
name="L")
margin_loss = tf.reduce_mean(tf.reduce_sum(L, axis=1), name="margin_loss")
loss = tf.add(margin_loss, 0, name="loss")
correct = tf.equal(y, y_pred, name="correct")
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss, name="training_op")
init = tf.global_variables_initializer()
saver = tf.train.Saver()
n_epochs = 10
batch_size = 50
restore_checkpoint = True
n_iterations_per_epoch = mnist.train.num_examples // batch_size
n_iterations_validation = mnist.validation.num_examples // batch_size
best_loss_val = np.infty
checkpoint_path = "./my_capsule_network"
with tf.Session() as sess:
if restore_checkpoint and tf.train.checkpoint_exists(checkpoint_path):
saver.restore(sess, checkpoint_path)
else:
init.run()
for epoch in range(n_epochs):
for iteration in range(1, n_iterations_per_epoch + 1):
X_batch, y_batch = mnist.train.next_batch(batch_size)
# Run the training operation and measure the loss:
_, loss_train = sess.run(
[training_op, loss],
feed_dict={X: X_batch.reshape([-1, 28, 28, 1]),
y: y_batch})
print("\rIteration: {}/{} ({:.1f}%) Loss: {:.5f}".format(
iteration, n_iterations_per_epoch,
iteration * 100 / n_iterations_per_epoch,
loss_train),
end="")
# At the end of each epoch,
# measure the validation loss and accuracy:
loss_vals = []
acc_vals = []
for iteration in range(1, n_iterations_validation + 1):
X_batch, y_batch = mnist.validation.next_batch(batch_size)
loss_val, acc_val = sess.run(
[loss, accuracy],
feed_dict={X: X_batch.reshape([-1, 28, 28, 1]),
y: y_batch})
loss_vals.append(loss_val)
acc_vals.append(acc_val)
print("\rEvaluating the model: {}/{} ({:.1f}%)".format(
iteration, n_iterations_validation,
iteration * 100 / n_iterations_validation),
end=" " * 10)
loss_val = np.mean(loss_vals)
acc_val = np.mean(acc_vals)
print("\rEpoch: {} Val accuracy: {:.4f}% Loss: {:.6f}{}".format(
epoch + 1, acc_val * 100, loss_val,
" (improved)" if loss_val < best_loss_val else ""))
# And save the model if it improved:
if loss_val < best_loss_val:
save_path = saver.save(sess, checkpoint_path)
best_loss_val = loss_val
n_iterations_test = mnist.test.num_examples // batch_size
with tf.Session() as sess:
saver.restore(sess, checkpoint_path)
loss_tests = []
acc_tests = []
for iteration in range(1, n_iterations_test + 1):
X_batch, y_batch = mnist.test.next_batch(batch_size)
loss_test, acc_test = sess.run(
[loss, accuracy],
feed_dict={X: X_batch.reshape([-1, 28, 28, 1]),
y: y_batch})
loss_tests.append(loss_test)
acc_tests.append(acc_test)
print("\rEvaluating the model: {}/{} ({:.1f}%)".format(
iteration, n_iterations_test,
iteration * 100 / n_iterations_test),
end=" " * 10)
loss_test = np.mean(loss_tests)
acc_test = np.mean(acc_tests)
print("\rFinal test accuracy: {:.4f}% Loss: {:.6f}".format(
acc_test * 100, loss_test))
Since reconstruction module is not required so in the final loss function I have set loss = 0 for the reconstruction part so that optimizer only work on the classification loss.
loss = tf.add(margin_loss, 0, name="loss")
Now the classification module is working but the accuracy is pathetically low even for MNIST dataset, I got around 10% validation accuracy, Can somebody explain to me what is the problem here because on MNIST the Capsule Networks perform well in classification as discussed in Paper?
Regards

Tensorflow: Same input data, different output

After training the model, I save it and load to make some tests. But every time I reload the model I get a different accuracy and results with the exactly same input data. After training the model I print the accuracy and it always gets a nice value (0.8 ~ 0.9), but when I reload it goes down to something like (0.1 ~ 0.5) - I dont know if it is something related to the problem btw thats weird.
import tensorflow as tf
import numpy as np
import json
n_nodes_hl1 = 1600
n_nodes_hl2 = 800
n_nodes_hl3 = 400
n_nodes_hl4 = 200
n_classes = 4
batch_size = 50
input_lenght = 65
x = tf.placeholder('float', [None, input_lenght])
y = tf.placeholder('float')
def train_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(cost)
hm_epochs = 20000
saver = tf.train.Saver()
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
epoch = 0
for epoch in range(hm_epochs):
epoch_cost = 0
i = 0
while i < len(train_x):
start = i
end = i + batch_size
batch_x = np.array(train_x[start:end])
batch_y = np.array(train_y[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
epoch_cost += c
i += batch_size
save_path = saver.save(sess, "drive/My Drive/datasets/tensorflow/model")
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print("accuracy:", accuracy.eval({x: test_x, y: test_y}, session=sess))
sess.close()
def group_test_train(features_data, labels_data, test_size):
featureset = []
for i in range(test_size):
featureset += [[features_data[i], labels_data[i]]]
featureset = np.array(featureset)
np.random.shuffle(featureset)
train_x = list(featureset[:, 0][:test_size // 2])
train_y = list(featureset[:, 1][:test_size // 2])
test_x = list(featureset[:, 0][test_size // 2:])
test_y = list(featureset[:, 1][test_size // 2:])
return train_x, train_y, test_x, test_y
def neural_network_model(data):
hidden1 = {'weights': tf.Variable(tf.random_uniform([input_lenght, n_nodes_hl1], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl1]))
}
hidden2 = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl1, n_nodes_hl2], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl2]))
}
hidden3 = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl2, n_nodes_hl3], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl3]))
}
hidden4 = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl3, n_nodes_hl4], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_nodes_hl4]))
}
l_output = {'weights': tf.Variable(tf.random_uniform([n_nodes_hl4, n_classes], -1, 1)),
'biases': tf.Variable(tf.random_normal([n_classes]))
}
l1 = tf.add(tf.matmul(data, hidden1['weights']), hidden1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hidden2['weights']), hidden2['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, hidden3['weights']), hidden3['biases'])
l3 = tf.nn.relu(l3)
l4 = tf.add(tf.matmul(l3, hidden4['weights']), hidden4['biases'])
l4 = tf.nn.relu(l4)
output = tf.add(tf.matmul(l4, l_output['weights']), l_output['biases'])
return output
version = 'end'
with open('drive/My Drive/datasets/json/' + 'data-'+ version +'.json') as json_file:
x_, y_ = json.load(json_file)
train_x, train_y, test_x, test_y = group_test_train(x_, y_, len(x_) )
train_network(x)
Every time I run this part down bellow the accuracy changes and the output as well.
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.00001).minimize(cost)
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
new_saver = tf.train.import_meta_graph('drive/My Drive/datasets/tensorflow/model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('drive/My Drive/datasets/tensorflow/'))
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print("accuracy:", accuracy.eval({x: train_x, y: train_y}, session=sess))

why get train accuracy not test accuracy in tensorboard

I want to see test accuracy in tensorboard, but it seems I get accuracy with training data. I print test accuracy on console, and it is showing about 70%, but in tensorboard, the curve showed accuracy is growing and finally almost 100%.
This is my code:
def train_crack_captcha_cnn(is_train, checkpoint_dir):
global max_acc
X = tf.placeholder(tf.float32, [None, dr.ROWS, dr.COLS, dr.CHANNELS])
Y = tf.placeholder(tf.float32, [None, 1, 1, 2])
output, end_points = resnet_v2_50(X, num_classes = 2)
global_steps = tf.Variable(1, trainable=False)
learning_rate = tf.train.exponential_decay(0.001, global_steps, 100, 0.9)
with tf.device('/device:GPU:0'):
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=output))
# optimizer 为了加快训练 learning_rate应该开始大,然后慢慢衰
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss, global_step=global_steps)
predict = tf.argmax(output, axis = 3)
l = tf.argmax(Y, axis = 3)
correct_pred = tf.equal(predict, l)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
## tensorboard
tf.summary.scalar('test_accuracy', accuracy)
tf.summary.scalar("loss", loss)
tf.summary.scalar("learning_rate", learning_rate)
saver = tf.train.Saver()
with tf.Session(config=tf.ConfigProto(allow_soft_placement = True)) as sess:
if is_train:
writer = tf.summary.FileWriter("/tmp/cnn_log/log", graph = sess.graph)
sess.run(tf.global_variables_initializer())
step_value = sess.run(global_steps)
while step_value < 100000:
step_value = sess.run(global_steps)
merged = tf.summary.merge_all()
batch_x, batch_y = get_next_batch()
result, _, _loss= sess.run([merged, optimizer, loss], feed_dict={X: batch_x, Y: batch_y})
writer.add_summary(result, step_value)
print('step : {} loss : {}'.format(step_value, _loss))
# 每100 step计算一次准确率
if step_value % 20 == 0:
acc = sess.run(accuracy, feed_dict={X: validation, Y: validation_labels})
print('accuracy : {}'.format(acc))
# 如果准确率大于max_acc,保存模型,完成训练
if acc > max_acc:
max_acc = float(acc) #转换类型防止变为同一个引用
saver.save(sess, checkpoint_dir + "/" + str(step_value) + '-' + str(acc) + "/model.ckpt", global_step=global_steps)
##### predict #####
# predict_y = sess.run(output, feed_dict={X: test})
# data = pd.DataFrame([i for i in range(1, len(predict_y) + 1)], columns = ['id'])
# predict_y = np.argmax(predict_y, axis = 3)
# predict_y = np.reshape(predict_y,(-1))
# print(predict_y)
# predict_y = pd.Series(predict_y, name='label')
# data['label'] = predict_y
# data.to_csv("gender_submission.csv" + str(step), index=False)
##### end #####
writer.close()
else:
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
acc = sess.run(accuracy, feed_dict={X: validation, Y: validation_labels})
print('accuracy : {}'.format(acc))
I add accuracy into tensorboard like this:
tf.summary.scalar('test_accuracy', accuracy)
and every 20 step, I get one accuracy about test data, and print the result to console, which is not the same with data shown on tensorboard.
Why?

Cost function in classification gives NaN as a result

I am quite new to both python and machine learning and I am trying to create an ANN for a binary classification model in order to determine whether something is faulty or not. When I run the code down below, the cost function seem to come up as nan. Unfortunately, in order to come up with an accuracy percentage, I need the cost to be an actual number. Is there an obvious error in my code or is there another way to circumvent this problem by writing it differently? Any help will be much appreciated. Thanks.
import csv
import tensorflow as tf
import numpy as np
import pandas as pd
import urllib.request as request
import matplotlib.pyplot as plt
train_data = pd.read_csv("C:/Python35/train_data.csv", sep=',', header = None)
test_data = pd.read_csv("C:/Python35/test_data.csv", sep=',', header = None)
X_train = np.asarray(train_data)
X_test = np.asarray(test_data)
train_label = pd.read_csv("C:/Python35/train_label.csv", sep=',', header = None)
test_label = pd.read_csv("C:/Python35/test_label.csv", sep=',', header = None)
y_train = np.asarray(train_label)
y_test = np.asarray(test_label)
labels_train = (np.arange(2) == y_train[:,None]).astype(np.float32)
labels_test = (np.arange(2) == y_test[:,None]).astype(np.float32)
inputs = tf.placeholder(tf.float32, shape=(None, X_train.shape[1]), name='inputs')
label = tf.placeholder(tf.float32, shape=(None, 2), name='labels')
hid1_size = 128
w1 = tf.Variable(tf.random_normal([hid1_size, X_train.shape[1]], stddev=0.01), name='w1')
b1 = tf.Variable(tf.constant(0.1, shape=(hid1_size, 1)), name='b1')
y1 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(w1, tf.transpose(inputs)), b1)), keep_prob=0.5)
hid2_size = 256
w2 = tf.Variable(tf.random_normal([hid2_size, hid1_size], stddev=0.01), name='w2')
b2 = tf.Variable(tf.constant(0.1, shape=(hid2_size, 1)), name='b2')
y2 = tf.nn.dropout(tf.nn.relu(tf.add(tf.matmul(w2, y1), b2)), keep_prob=0.5)
wo = tf.Variable(tf.random_normal([2, hid2_size], stddev=0.01), name='wo')
bo = tf.Variable(tf.random_normal([2, 1]), name='bo')
yo = tf.transpose(tf.add(tf.matmul(wo, y2), bo))
lr = tf.placeholder(tf.float32, shape=(), name='learning_rate')
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=yo, labels=label))
optimizer = tf.train.GradientDescentOptimizer(lr).minimize(loss)
pred = tf.nn.softmax(yo)
pred_label = tf.argmax(pred, 1)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(label, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.InteractiveSession(config=config)
sess.run(init)
for learning_rate in [0.05, 0.01]:
for epoch in range(50):
avg_cost = 0.0
for i in range(X_train.shape[0]):
_, c = sess.run([optimizer, loss], feed_dict={lr:learning_rate,
inputs: X_train[i, None],
label: labels_train[i, None].reshape(-1,2)})
avg_cost += c
avg_cost /= X_train.shape[0]
if epoch % 10 == 0:
print("Epoch: {:3d} Train Cost: {:.4f}".format(epoch, avg_cost))
acc_train = accuracy.eval(feed_dict={inputs: X_train, label: labels_train.reshape(-1,2)})
print("Train accuracy: {:3.2f}%".format(acc_train*100.0))
acc_test = accuracy.eval(feed_dict={inputs: X_test, label: labels_test.reshape(-1,2)})
print("Test accuracy: {:3.2f}%".format(acc_test*100.0))

Tensorflow starts fast and slows down during training [duplicate]

This question already has an answer here:
TensorFlow: slow performance when getting gradients at inputs
(1 answer)
Closed 6 years ago.
Im an electrical engineering student and im trying to model an industrial plant based on the power in a resistor inside a boiller, the temperature of the water in the boiller and the water flow passing through the boiller using python 3.5 and tensorflow.
The matter is that im a beginner at python and tensorflow and i wrote this code that works, but the trainning starts fast and rapidly start to slow down, and by the middle of the trainning its starts to taking ages between steps.
I just need some help on the optimization, and of course, any tips are welcome!
Thank you very much!
Here is the code:
import numpy as np
import tensorflow as tf
input_vec_size = 3
step_size = 0.05
batch_size = 3
test_size = 16
train_end = 1905
eval_end = 290
predict_end = 1396
n_cores = 4
def read_my_file_format(filename_queue):
line_reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = line_reader.read(filename_queue)
record_defaults = [[0.0], [0.0], [0.0], [0.0]]
time, power_in, temperature, flow = \
tf.decode_csv(csv_row, record_defaults=record_defaults)
features = tf.pack([
power_in,
temperature
])
return features, flow
def input_pipeline(directory, batch_size, n_cores, buffer_size, num_epochs=None):
filename_queue = tf.train.string_input_producer(
tf.train.match_filenames_once(directory),
shuffle=True)
features, flow = read_my_file_format(filename_queue)
x, y = tf.train.batch(
[features, flow], batch_size=batch_size, allow_smaller_final_batch=True)
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.001))
def init_bias(shape): #inicializa bias
initial = tf.constant(0.001, shape=shape) #variancia 0.1
return tf.Variable(initial)
def model(X, w_h, w_h2, w_o, B, B2, B3, p_keep_input, p_keep_hidden):
X = tf.nn.dropout(X, p_keep_input)
h = tf.nn.relu(tf.matmul(X, w_h)+B)
h = tf.nn.dropout(h, p_keep_hidden)
h2 = tf.nn.relu(tf.matmul(h, w_h2)+B2)
h2 = tf.nn.dropout(h2, p_keep_hidden)
return tf.matmul(h2, w_o)+B3
X = tf.placeholder("float", [None, input_vec_size])
Y = tf.placeholder("float", [None, 1])
p_keep_hidden = tf.placeholder("float")
p_keep_input = tf.placeholder("float")
w_h = init_weights([input_vec_size, fclayer_size])
w_h2= init_weights([fclayer_size, fclayer_size])
w_o= init_weights([fclayer_size, 1])
B = init_bias([fclayer_size])
B2 = init_bias([fclayer_size])
B3 = init_bias([1])
py_x = model(X, w_h, w_h2, w_o, B, B2, B3, p_keep_input, p_keep_hidden)
predict_op = py_x[0]
cost = tf.reduce_mean(tf.square(predict_op - Y))
train_op = tf.train.MomentumOptimizer(step_size, 0.5).minimize(cost)
saver = tf.train.Saver()
directory = "./train/*.csv"
x, y = input_pipeline(directory, batch_size, n_cores, buffer_size, num_epochs=None)
directory_eval = "./eval/*.csv"
xe, ye = input_pipeline(directory_eval, test_size, n_cores, buffer_size, num_epochs=None)
directory_predict = "./predict/*.csv"
xp, yp = input_pipeline(directory_predict, test_size, n_cores, buffer_size, num_epochs=None)
with tf.Session() as sess:
tf.initialize_all_variables().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
print("==================================TREINAMENTO=================================")
for iteraction in range(int(train_end/batch_size)):
trX, trY = sess.run([x,y])
for i in range(0, batch_size):
features, features_past, features_past2 = sess.run(tf.unpack(trX[i])), sess.run(tf.unpack(trX[i-1])), sess.run(tf.unpack(trX[i-2]))
power_in_i = features[0] - 4
temperature_i = features[1]
temperature_i1 = features_past[1]
temperature_i2 = features_past2[1]
trX_now = tf.pack([power_in_i, (temperature_i-temperature_i1), (temperature_i-temperature_i2)])
trX_now = sess.run(trX_now)
X_Batch, Y_Batch = trX_now.reshape([-1, input_vec_size]), trY[i].reshape([-1, 1])
sess.run(train_op, feed_dict={X: X_Batch,
Y: Y_Batch, p_keep_input: 0.95, p_keep_hidden: 0.7})
if(i%batch_size == 0):
predict_train = sess.run(tf.reshape(predict_op, [-1, 1]), feed_dict={X: X_Batch, p_keep_input: 1.0, p_keep_hidden: 1.0})
train_cost = sess.run(cost, feed_dict={py_x: predict_train, Y: Y_Batch})
print("Train Batch:", iteraction,"Sample:", batch_size*iteraction, "X:", X_Batch, "Y:", Y_Batch, "y_:",
predict_train, "Cost:", train_cost)
saver.save(sess, "./model.ckpt")
print('Variaveis salvas com sucesso')
coord.request_stop()
coord.join(threads)
sess.close()
print('=============================Fim do Treinamento=============================')
with tf.Session() as sess:
tf.initialize_all_variables().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
print("==============================VALIDAÇAO==================================")
saver.restore(sess, "./model.ckpt")
print("Model restored.")
for iteraction in range(int(eval_end/test_size)):
teX, teY = sess.run([xe, ye])
for i in range(0, test_size):
features, features_past, features_past2 = sess.run(tf.unpack(teX[i])), sess.run(tf.unpack(teX[i - 1])), sess.run(tf.unpack(teX[i-2]))
power_in_i = features[0] - 4
temperature_i = features[1]
temperature_i1 = features_past[1]
teX_now = tf.pack([power_in_i, (temperature_i - temperature_i1), (temperature_i-temperature_i2)])
teX_now = sess.run(teX_now)
X_Batch, Y_Batch = teX_now.reshape([-1, input_vec_size]), teY[i].reshape([-1, 1])
predict_eval = sess.run(tf.reshape(predict_op, [-1, 1]), feed_dict={X: X_Batch, p_keep_input: 1.0, p_keep_hidden: 1.0})
eval_cost = sess.run(cost, feed_dict={py_x: predict_eval, Y: Y_Batch})
print("Eval Batch:", iteraction,"Sample:", batch_size*iteraction, "X:", X.eval(feed_dict={X: X_Batch}), "Y:", Y_Batch, "y_:",
predict_eval, "Cost:", eval_cost)
coord.request_stop()
coord.join(threads)
sess.close()
print('=============================FIM DA VALIDAÇAO=============================')
with tf.Session() as sess:
tf.initialize_all_variables().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
print("==============================PREDIÇÃO==================================")
saver.restore(sess, "./model.ckpt")
print("Model restored.")
predict_batch_mean = 0
predict_mean = 0
for iteraction in range(int(predict_end / test_size)):
tpX, tpY = sess.run([xp, yp])
for i in range(0, test_size):
features, features_past, features_past2 = sess.run(tf.unpack(tpX[i])), sess.run(tf.unpack(tpX[i - 1])), sess.run(tf.unpack(tpX[i-2]))
power_in_i = features[0]- 4
temperature_i = features[1]
temperature_i1 = features_past[1]
tpX_now = tf.pack([power_in_i, (temperature_i - temperature_i1), (temperature_i-temperature_i2)])
tpX_now = sess.run(tpX_now)
X_Batch, Y_Batch = tpX_now.reshape([-1, input_vec_size]), tpY[i].reshape([-1, 1])
prediction = sess.run(tf.reshape(predict_op, [-1, 1]), feed_dict={X: X_Batch, p_keep_input: 1.0, p_keep_hidden: 1.0})
print("Predict Batch:", iteraction,"Sample:", batch_size*iteraction, "X:", X.eval(feed_dict={X: X_Batch}), "y_:",
prediction)
predict_batch_mean = (predict_batch_mean + prediction)/i
predict_mean = (predict_mean + predict_batch_mean)/iteraction
print("Predicted Flow:", predict_mean)
coord.request_stop()
coord.join(threads)
sess.close()
My quick guess is that you are creating a lot of new nodes in each iteration through your training: those tf.packs and tf.reshapes are just making your graph bigger and bigger.
Construct the graph once outside the training loop, and I'll bet everything gets happy.
Pandas saved me this time, and im already in love with it!
After some learning, there is the working code. Now its fast, aside of the "not that good" prediction accuracy yet.
Heres the code:
import numpy as np
import pandas as pd
import tensorflow as tf
#VARIAVEIS
input_vec_size = 6
layer1_size = 512
fclayer_size = 1024
step_size = 0.02
test_size = 16
train_end = 1905
eval_end = 290
predict_end = 1396
#READS TRAIN FILE
def read_data(directory):
data=pd.read_csv(directory, sep=',',header=None)
return data
#Batch Maker
def get_batch(data, i, data_size):
j = i + (input_vec_size - 1 - data_size)*(i//(data_size - input_vec_size + 1)) + input_vec_size - 1
# print(j, i//(data_size - 5))
features = [(data[1][j] - 4) / 16,
(data[2][j] - data[2][j - 1])*10,
(data[2][j] - data[2][j - 2])*10,
(data[2][j] - data[2][j - 3])*10,
(data[2][j] - data[2][j - 4])*10,
(data[2][j] - data[2][j - 5])*10]
features = np.reshape(features, [-1, input_vec_size])
flow = data[3][j]/1500
flow = np.reshape(flow, [-1,1])
return features, flow
#Inicializaçao de variaveis
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.001))
def init_bias(shape): #inicializa bias
initial = tf.constant(0.001, shape=shape) #variancia 0.1
return tf.Variable(initial)
#Definindo Modelo DNN
def model(X, w_h, w_h2, w_o, B, B2, p_keep_input, p_keep_hidden):
X = tf.nn.dropout(X, p_keep_input)
h = tf.nn.relu(tf.matmul(X, w_h)+B)
h = tf.nn.dropout(h, p_keep_hidden)
h2 = tf.nn.relu(tf.matmul(h, w_h2))
h2 = tf.nn.dropout(h2, p_keep_hidden)
return tf.matmul(h2, w_o)
#PLaceholders
X = tf.placeholder("float", [None, input_vec_size])
Y = tf.placeholder("float", [None, 1])
p_keep_hidden = tf.placeholder("float")
p_keep_input = tf.placeholder("float")
#Estados iniciais das variaveis
w_h = init_weights([input_vec_size, layer1_size])
w_h2= init_weights([layer1_size, fclayer_size])
w_o= init_weights([fclayer_size, 1])
B = init_bias([layer1_size])
B2 = init_bias([fclayer_size])
#Modelo
py_x = model(X, w_h, w_h2, w_o, B, B2, p_keep_input, p_keep_hidden)
#Operaçao de previsão
predict_op = tf.reshape(py_x[0], [-1,1])
#Funçao custo
cost = tf.reduce_mean(tf.square(predict_op - Y))
#Operação de treinamento
train_op = tf.train.AdadeltaOptimizer(step_size).minimize(cost)
#Utilizado para salvar as variaveis apos o treinamento
saver = tf.train.Saver()
with tf.Session() as sess:
tf.initialize_all_variables().run()
directory = '~/PycharmProjects/modelagemELT430/train/G2.csv'
data = read_data(directory)
for i in range(0, 10*(train_end - input_vec_size + 1)):
features, flow = get_batch(data, i, train_end)
# features = sess.run(features)
sess.run(train_op, feed_dict={X: features,
Y: flow, p_keep_input: 0.9, p_keep_hidden: 0.6})
predict_train = sess.run(predict_op,
feed_dict={X: features, p_keep_input: 1.0, p_keep_hidden: 1.0})
train_cost = sess.run(cost, feed_dict={py_x: predict_train, Y: flow})
print("Train Sample:", i, "X:", features, "Y:", flow*1500, "y_:",
predict_train*1500, "Cost:", train_cost)
saver.save(sess, "./model.ckpt")
print('Variaveis salvas com sucesso')
sess.close()
print('=============================Fim do Treinamento=============================')
with tf.Session() as sess:
tf.initialize_all_variables().run()
directory = '~/PycharmProjects/modelagemELT430/eval/G2E.csv'
data = read_data(directory)
print("==============================VALIDAÇAO==================================")
saver.restore(sess, "./model.ckpt")
print("Model restored.")
for i in range(0, eval_end - input_vec_size + 1):
features, flow = get_batch(data, i, eval_end)
predict_eval = sess.run(predict_op,
feed_dict={X: features, p_keep_input: 1.0, p_keep_hidden: 1.0})
eval_cost = sess.run(cost, feed_dict={py_x: predict_eval, Y: flow})
print("Eval Sample:", i, "X:", features, "Y:",flow*1500, "y_:",predict_eval*1500, "Cost:", eval_cost)
sess.close()
print('============================Fim da Validação=================================')
with tf.Session() as sess:
tf.initialize_all_variables().run()
directory = '~/PycharmProjects/modelagemELT430/predict/G2P.csv'
data = read_data(directory)
print("==============================Predição==================================")
saver.restore(sess, "./model.ckpt")
print("Model restored.")
for i in range(0, predict_end - input_vec_size + 1):
features, flow = get_batch(data, i, predict_end)
predict = sess.run(predict_op,
feed_dict={X: features, p_keep_input: 1.0, p_keep_hidden: 1.0})
eval_cost = sess.run(cost, feed_dict={py_x: predict, Y: flow})
print("Predict Sample:", i, "X:", features, "y_:",predict*1500)
sess.close()
print('============================Fim da Prediçao=================================')

Categories

Resources