Tensorflow different result using estimators and same generic model - python

I am working with the dbpedia training set (small_train, small_test). Using estimators with bag_of_words_model gives an accuracy of 0.70
Tensorflow version 0.9
Why is the following code behaving differently than the estimators ?
In estimators graph_action's _run_with_monitors does the same thing by running the train_op for given no of steps
outputs = session.run(tensors, feed_dict=feed_dict)
While the same model implemented as below gives accuracy of 0.67
with tf.Graph().as_default():
sess = tf.Session()
keys_placeholder = tf.placeholder(tf.int64, shape=(None,),name='keys')
keys = tf.identity(keys_placeholder)
labels_placeholder = tf.placeholder(tf.int64, shape=(None,),name='labels')
input_placeholder = tf.placeholder(tf.int64, shape=(None, 10),name='input')
inputs = {'key': keys_placeholder.name, 'inputs_': input_placeholder.name}
tf.add_to_collection('inputs', json.dumps(inputs))
target = tf.one_hot(labels_placeholder, 15, 1, 0)
word_vectors = learn.ops.categorical_variable(input_placeholder, n_classes=n_words,
embedding_size=EMBEDDING_SIZE, name='words')
features = tf.reduce_max(word_vectors, reduction_indices=1)
prediction, loss = learn.models.logistic_regression(features, target)
prediction = tf.argmax(prediction, 1)
train_op = tf.contrib.layers.optimize_loss(
loss, tf.contrib.framework.get_global_step(),
optimizer='Adam', learning_rate=0.01)
outputs = {'key': keys.name, 'prediction': prediction.name}
tf.add_to_collection('outputs', json.dumps(outputs))
init = tf.initialize_all_variables()
for step in xrange(100):
fill_feed_dict = {
keys_placeholder: np.array(range(len(x_train))),
labels_placeholder: y_train,
input_placeholder: x_train }
start_time = time.time()
_, loss_value = sess.run([train_op, loss],
duration = time.time() - start_time
print("Done Training")
y_predicted = sess.run(prediction, feed_dict={input_placeholder: x_test})
score = metrics.accuracy_score(y_test, y_predicted)
print('Accuracy: {0:f}'.format(score))
I am reading the data as follows:-
global n_words
training_set = pandas.read_csv('dbpedia_data/dbpedia_csv/train_small.csv', header=None)
testing_set = pandas.read_csv('dbpedia_data/dbpedia_csv/test_small.csv', header=None)
x_train = training_set[2]
y_train = training_set[0]
x_test = testing_set[2]
y_test = testing_set[0]
# Process vocabulary
vocab_processor = learn.preprocessing.VocabularyProcessor(MAX_DOCUMENT_LENGTH)
x_train = np.array(list(vocab_processor.fit_transform(x_train)))
x_test = np.array(list(vocab_processor.transform(x_test)))
n_words = len(vocab_processor.vocabulary_)


Using ray tune `tune.run` with pytorch returns different optimal hyperparameters combination

I've initialized two identical ANN with PyTorch (both as structure and initial parameters), and I've noticed that the hyperparameters setting with Ray Tune, returns different results for the two ANN, even if I didn't have any random initialization.
Someone could explain what I'm doing wrong? I'll attach the code:
ANN Initialization:
class Featrues_model(nn.Module):
def __init__(self, n_inputs, dim_hidden, n_outputs):
self.fc1 = nn.Linear(n_inputs, dim_hidden)
self.fc2 = nn.Linear(dim_hidden, n_outputs)
def forward(self, X):
X = self.fc1(X)
X = self.fc2(X)
return X
features_model_v1 = Featrues_model(len(list_input_variables),5,6)
features_model_v2 = Featrues_model(len(list_input_variables),5,6)
Hyperpamameters setting
config = {
"lr": tune.choice([1e-2, 1e-5]),
"weight_decay": tune.choice([1e-2, 1e-5]),
"batch_size": tune.choice([16,64]),
"epochs": tune.choice([10,50])
Train & Validation Dataframe
trainset = df_final.copy()
test_abs = int(len(trainset) * 0.8)
train_subset, val_subset = random_split(
trainset, [test_abs, len(trainset) - test_abs]
df_train = df_final.iloc[train_subset.indices]
df_val = df_final.iloc[val_subset.indices]
Train function design
def setting_model(config, df_train, df_val, model):
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"])
BATCH_SIZE = config["batch_size"]
for epoch in range(config["epochs"]):
train_epoch_loss = 0
train_epoch_acc = 0
step = 0
for i in tqdm(range(0, df_train.shape[0], BATCH_SIZE)):
batch_X = np.array(
batch_X = torch.Tensor([x for x in batch_X])
batch_Y = np.array(
batch_Y = torch.Tensor([int(y) for y in batch_Y])
batch_Y = batch_Y.type(torch.int64)
outputs = model.forward(batch_X)
train_loss = criterion(outputs, batch_Y)
train_acc = multi_acc(outputs, batch_Y)
train_epoch_loss += train_loss.item()
train_epoch_acc += train_acc.item()
step += 1
# print statistics
print(f"Epochs: {epoch}")
print(f"Train Loss: {train_epoch_loss/len(df_train)}")
print(f"Train Acc: {train_epoch_acc/step}")
# Validation loss
with torch.no_grad():
X_val = np.array(
X_val = torch.Tensor([x for x in X_val])
Y_val = np.array(
Y_val = torch.Tensor([int(y) for y in Y_val])
Y_val = Y_val.type(torch.int64)
outputs = model.forward(X_val)
_, predicted = torch.max(outputs.data, 1)
total = Y_val.size(0)
correct = (predicted == Y_val).sum().item()
loss = criterion(outputs, Y_val)
tune.report(loss=(loss.numpy()), accuracy=correct / total)
print(f"Validation Loss: {loss.numpy()/len(df_val)}")
print(f"Validation Acc: {correct / total:.3f}")
print("Finished Training")
Hyperparameters Tune
result_v1 = tune.run(
partial(setting_model, df_train=df_train, df_val=df_val, model=features_model_v1),
result_v2 = tune.run(
partial(setting_model, df_train=df_train, df_val=df_val, model=features_model_v2),
{'lr': 1e-05, 'weight_decay': 1e-05, 'epochs': 1}
{'lr': 0.01, 'weight_decay': 1e-05, 'epochs': 1}
The issue is the use of torch.random under the hood. Since you are not directly providing a weight matrix for your layers, pytorch initializes it for you. Luckily, you can have a reproducible experiment by setting
torch.manual_seed(x) # where x is an integer
One should use only a few random seeds, otherwise you might overfit on the random seed. See lottery ticket hypothesis at https://arxiv.org/abs/1803.03635)

How can I make prediction for the model below. I want to feed an image in trained model and get the output?

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import torch
import torchvision
from torchvision import datasets
from torchvision import transforms as T # for simplifying the transforms
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, sampler, random_split
from torchvision import models
!pip install timm # kaggle doesnt have it installed by default
import timm
from timm.loss import LabelSmoothingCrossEntropy
import sys
from tqdm import tqdm
import time
import copy
def get_classes(data_dir):
all_data = datasets.ImageFolder(data_dir)
return all_data.classes
def get_data_loaders(data_dir, batch_size, train = False):
if train:
transform = T.Compose([
T.RandomApply(torch.nn.ModuleList([T.ColorJitter()]), p=0.25),
T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), # imagenet means
T.RandomErasing(p=0.2, value='random')
train_data = datasets.ImageFolder(os.path.join(data_dir, "train/"), transform = transform)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=4)
return train_loader, len(train_data)
# val/test
transform = T.Compose([ # We dont need augmentation for test transforms
T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), # imagenet means
val_data = datasets.ImageFolder(os.path.join(data_dir, "validation/"), transform=transform)
test_data = datasets.ImageFolder(os.path.join(data_dir, "train/"), transform=transform)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True, num_workers=4)
return val_loader, test_loader, len(val_data), len(test_data)
dataset_path = "/kaggle/input/dfdc-faces-of-the-train-sample"
(train_loader, train_data_len) = get_data_loaders(dataset_path, 128, train=True)
(val_loader, test_loader, valid_data_len, test_data_len) = get_data_loaders(dataset_path, 32, train=False)
classes = get_classes("/kaggle/input/dfdc-faces-of-the-train-sample/train")
print(classes, len(classes))
dataloaders = {
"train": train_loader,
"validation": val_loader
dataset_sizes = {
"train": train_data_len,
"validation": valid_data_len
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.hub.load('facebookresearch/deit:main', 'deit_tiny_patch16_224', pretrained=True)
for param in model.parameters(): #freeze model
param.requires_grad = False
n_inputs = model.head.in_features
model.head = nn.Sequential(
nn.Linear(n_inputs, 512),
nn.Linear(512, len(classes))
model = model.to(device)
criterion = LabelSmoothingCrossEntropy()
criterion = criterion.to(device)
optimizer = optim.Adam(model.head.parameters(), lr=0.001)
# lr scheduler
exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.97)
def train_model(model, criterion, optimizer, scheduler, num_epochs=1):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print(f'Epoch {epoch}/{num_epochs - 1}')
for phase in ['train', 'validation']: # We do training and validation phase per epoch
if phase == 'train':
model.train() # model to training mode
model.eval() # model to evaluate
running_loss = 0.0
running_corrects = 0.0
for inputs, labels in tqdm(dataloaders[phase]):
inputs = inputs.to(device)
labels = labels.to(device)
with torch.set_grad_enabled(phase == 'train'): # no autograd makes validation go faster
outputs = model(inputs)
_, preds = torch.max(outputs, 1) # used for accuracy
loss = criterion(outputs, labels)
if phase == 'train':
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
if phase == 'train':
scheduler.step() # step at end of epoch
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print("{} Loss: {:.4f} Acc: {:.4f}".format(phase, epoch_loss, epoch_acc))
if phase == 'validation' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict()) # keep the best validation accuracy model
time_elapsed = time.time() - since # slight error
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print("Best Val Acc: {:.4f}".format(best_acc))
return model
model_ft = train_model(model, criterion, optimizer, exp_lr_scheduler)# now it is a lot faster
test_loss = 0.0
class_correct = list(0 for i in range(len(classes)))
class_total = list(0 for i in range(len(classes)))
for data, target in tqdm(test_loader):
data, target = data.to(device), target.to(device)
with torch.no_grad(): # turn off autograd for faster testing
output = model(data)
loss = criterion(output, target)
test_loss = loss.item() * data.size(0)
_, pred = torch.max(output, 1)
correct_tensor = pred.eq(target.data.view_as(pred))
correct = np.squeeze(correct_tensor.cpu().numpy())
if len(target) == 32:
for i in range(32):
label = target.data[i]
class_correct[label] += correct[i].item()
class_total[label] += 1
test_loss = test_loss / test_data_len
print('Test Loss: {:.4f}'.format(test_loss))
for i in range(len(classes)):
if class_total[i] > 0:
print("Test Accuracy of %5s: %2d%% (%2d/%2d)" % (
classes[i], 100*class_correct[i]/class_total[i], np.sum(class_correct[i]), np.sum(class_total[i])
print("Test accuracy of %5s: NA" % (classes[i]))
print("Test Accuracy of %2d%% (%2d/%2d)" % (
100*np.sum(class_correct)/np.sum(class_total), np.sum(class_correct), np.sum(class_total)
I have copied this code from Kaggle and similarly the data. The dataset is also present on kaggle and it's name is same as used in the code dataset path above. The code just works fine and also makes the model, but I don't know how to make a prediction.
from PIL import Image
import cv2
path_to_model = 'checkpoint.pt'
transform = T.Compose([ # We dont need augmentation for test transforms
T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), # imagenet means
PIL_image = Image.fromarray(np.uint8(img)).convert('RGB')
labels = labels.to(device)
logits = model(imge.to(device))
params = list(model.parameters())
sm = nn.Softmax()
#weight_softmax = model.linear1.weight.detach().cpu().numpy()
logits = sm(logits)
_,prediction = torch.max(logits,1)
confidence = logits[:,int(prediction.item())].item()*100
print('confidence of prediction:',logits[:,int(prediction.item())].item()*100)
I did it myself and I can predict now

Projector in Tensorboard python application

I have the following code and sample which is working fine and exactly I want it to:
import numpy as np
import pandas as pd
import sklearn
import sklearn.preprocessing
import datetime
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.contrib.tensorboard.plugins import projector
valid_set_size_percentage = 3
test_set_size_percentage = 3
seq_len = 5 # choose sequence length
df = pd.read_csv("Test.csv", encoding = 'utf-16',sep=',',index_col = 0)
def normalize_data(df):
cols = list(df_stock.columns.values)
min_max_scaler = sklearn.preprocessing.MinMaxScaler()
df = pd.DataFrame(min_max_scaler.fit_transform(df.values))
df.columns = cols
return df
def load_data(stock, seq_len):
data_raw = stock.as_matrix() # convert to numpy array
data = []
for index in range(len(data_raw) - seq_len):
data.append(data_raw[index: index + seq_len])
data = np.array(data);
valid_set_size = int(np.round(valid_set_size_percentage/100*data.shape[0]));
test_set_size = int(np.round(test_set_size_percentage/100*data.shape[0]));
train_set_size = data.shape[0] - (valid_set_size + test_set_size);
x_train = data[:train_set_size,:-1,:]
y_train = data[:train_set_size,-1,:4]
x_valid = data[train_set_size:train_set_size+valid_set_size,:-1,:]
y_valid = data[train_set_size:train_set_size+valid_set_size,-1,:4]
x_test = data[train_set_size+valid_set_size:,:-1,:]
y_test = data[train_set_size+valid_set_size:,-1,:4]
return [x_train, y_train, x_valid, y_valid, x_test, y_test]
df_stock = df.copy()
cols = list(df_stock.columns.values)
print('df_stock.columns.values = ', cols)
df_stock_norm = df_stock.copy()
df_stock_norm = normalize_data(df_stock_norm)
x_train, y_train, x_valid, y_valid, x_test, y_test = load_data(df_stock_norm, seq_len)
print('x_train.shape = ',x_train.shape)
print('y_train.shape = ', y_train.shape)
print('Inputs = ',x_train.shape[2])
print('Outputs = ', y_train.shape[1])
print('x_valid.shape = ',x_valid.shape)
print('y_valid.shape = ', y_valid.shape)
print('x_test.shape = ', x_test.shape)
print('y_test.shape = ',y_test.shape)
index_in_epoch = 0;
perm_array = np.arange(x_train.shape[0])
def get_next_batch(batch_size):
global index_in_epoch, x_train, perm_array
if index_in_epoch > x_train.shape[0]:
start = 0 # start next epoch
index_in_epoch = 0#batch_size
start = index_in_epoch
index_in_epoch += batch_size
end = index_in_epoch
return x_train[perm_array[start:end]], y_train[perm_array[start:end]]
n_steps = seq_len -1
n_inputs = x_train.shape[2]
n_neurons = 100
n_outputs = y_train.shape[-1]
n_layers = 2
learning_rate = 0.001
batch_size =10
n_epochs = 100
train_set_size = x_train.shape[0]
test_set_size = x_test.shape[0]
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None,n_outputs])
layers = [tf.contrib.rnn.LSTMCell(num_units=n_neurons,
activation=tf.nn.leaky_relu, use_peepholes = True)
for layer in range(n_layers)]
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)
rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)
stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons])
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
outputs = outputs[:,n_steps-1,:] # keep only last output of sequence
loss = tf.reduce_mean(tf.squared_difference(outputs, y)) # loss function = mean squared error
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
saver = tf.train.Saver()
with tf.Session() as sess:
for iteration in range(int(n_epochs*train_set_size/batch_size)):
x_batch, y_batch = get_next_batch(batch_size) # fetch the next training batch
sess.run(training_op, feed_dict={X: x_batch, y: y_batch})
if iteration % int(1*train_set_size/batch_size) == 0:
mse_train = loss.eval(feed_dict={X: x_train, y: y_train})
mse_valid = loss.eval(feed_dict={X: x_valid, y: y_valid})
mse_test = loss.eval(feed_dict={X: x_test, y: y_test})
print('%.2f epochs: MSE train/valid/test = %.3f/%.3f/%.3f'%(
iteration*batch_size/train_set_size, mse_train, mse_valid,mse_test))
save_path = saver.save(sess, "modelfile\\model"+str(iteration)+".ckpt")
except Exception as e:
if not os.path.exists("modelfile\\"):
save_path = saver.save(sess, "modelfile\\model"+str(iteration)+".ckpt")
The following is my sample of what I am trying to execute:
Same data Please click and see
I am willing to add the Projector of the Tensorboard to my code. But I could not understand how I can make it. I want to visualize the different inputs I am giving for my training. I am supplying the following columns and trying to predict the ohlc values.
'o', 'h', 'l', 'c', 'rel1', 'rel2', 'rel3', 'rel4', 'rel5', 'rel6', 'rel7', 'rel8'
I want to visualize the above columns in the projector to know how they are relating with each other to give me the output.
Please let me know what I can do to get what I am willing to.
I have tried something as follows but cannot see the projector tab:
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None,n_outputs])
symbols = tf.placeholder(tf.int32, [None, 1], name='stock_labels')
embed_matrix = tf.Variable(
tf.random_uniform([1, n_inputs],0.0, 1.0),
stacked_symbols = tf.tile(symbols, [batch_size,n_steps], name='stacked_stock_labels')
stacked_embeds = tf.nn.embedding_lookup(embed_matrix, stacked_symbols)
# stacked_embeds = tf.nn.embedding_lookup(embed_matrix)
# After concat, inputs.shape = (batch_size, num_steps, input_size + embed_size)
inputs_with_embed = tf.concat([X, stacked_embeds], axis=2, name="inputs_with_embed")
embed_matrix_summ = tf.summary.histogram("embed_matrix", embed_matrix)
And edited the following lines in the session code:
merged_sum = tf.summary.merge_all()
global_step = 0
# Set up the logs folder
writer = tf.summary.FileWriter('logs')
projector_config = projector.ProjectorConfig()
# You can add multiple embeddings. Here we add only one.
added_embed = projector_config.embeddings.add()
added_embed.tensor_name = embed_matrix.name
# Link this tensor to its metadata file (e.g. labels).
added_embed.metadata_path = "metadata.tsv"
# The next line writes a projector_config.pbtxt in the LOG_DIR. TensorBoard will
# read this file during startup.
projector.visualize_embeddings(writer, projector_config)
if iteration % int(1*train_set_size/batch_size) == 0:
global_step += 1
mse_train = loss.eval(feed_dict={X: x_train, y: y_train})
mse_valid = loss.eval(feed_dict={X: x_valid, y: y_valid})
mse_test = loss.eval(feed_dict={X: x_test, y: y_test})
_,train_merge = sess.run([outputs,merged_sum], feed_dict={X: x_train, y: y_train})
writer.add_summary(train_merge, global_step=global_step)
Here is teh metadata.tsv file
Please let me know what I missed.

why get train accuracy not test accuracy in tensorboard

I want to see test accuracy in tensorboard, but it seems I get accuracy with training data. I print test accuracy on console, and it is showing about 70%, but in tensorboard, the curve showed accuracy is growing and finally almost 100%.
This is my code:
def train_crack_captcha_cnn(is_train, checkpoint_dir):
global max_acc
X = tf.placeholder(tf.float32, [None, dr.ROWS, dr.COLS, dr.CHANNELS])
Y = tf.placeholder(tf.float32, [None, 1, 1, 2])
output, end_points = resnet_v2_50(X, num_classes = 2)
global_steps = tf.Variable(1, trainable=False)
learning_rate = tf.train.exponential_decay(0.001, global_steps, 100, 0.9)
with tf.device('/device:GPU:0'):
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=Y, logits=output))
# optimizer 为了加快训练 learning_rate应该开始大,然后慢慢衰
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss, global_step=global_steps)
predict = tf.argmax(output, axis = 3)
l = tf.argmax(Y, axis = 3)
correct_pred = tf.equal(predict, l)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
## tensorboard
tf.summary.scalar('test_accuracy', accuracy)
tf.summary.scalar("loss", loss)
tf.summary.scalar("learning_rate", learning_rate)
saver = tf.train.Saver()
with tf.Session(config=tf.ConfigProto(allow_soft_placement = True)) as sess:
if is_train:
writer = tf.summary.FileWriter("/tmp/cnn_log/log", graph = sess.graph)
step_value = sess.run(global_steps)
while step_value < 100000:
step_value = sess.run(global_steps)
merged = tf.summary.merge_all()
batch_x, batch_y = get_next_batch()
result, _, _loss= sess.run([merged, optimizer, loss], feed_dict={X: batch_x, Y: batch_y})
writer.add_summary(result, step_value)
print('step : {} loss : {}'.format(step_value, _loss))
# 每100 step计算一次准确率
if step_value % 20 == 0:
acc = sess.run(accuracy, feed_dict={X: validation, Y: validation_labels})
print('accuracy : {}'.format(acc))
# 如果准确率大于max_acc,保存模型,完成训练
if acc > max_acc:
max_acc = float(acc) #转换类型防止变为同一个引用
saver.save(sess, checkpoint_dir + "/" + str(step_value) + '-' + str(acc) + "/model.ckpt", global_step=global_steps)
##### predict #####
# predict_y = sess.run(output, feed_dict={X: test})
# data = pd.DataFrame([i for i in range(1, len(predict_y) + 1)], columns = ['id'])
# predict_y = np.argmax(predict_y, axis = 3)
# predict_y = np.reshape(predict_y,(-1))
# print(predict_y)
# predict_y = pd.Series(predict_y, name='label')
# data['label'] = predict_y
# data.to_csv("gender_submission.csv" + str(step), index=False)
##### end #####
ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
acc = sess.run(accuracy, feed_dict={X: validation, Y: validation_labels})
print('accuracy : {}'.format(acc))
I add accuracy into tensorboard like this:
tf.summary.scalar('test_accuracy', accuracy)
and every 20 step, I get one accuracy about test data, and print the result to console, which is not the same with data shown on tensorboard.

Implement inference bayesian network using session tensorflow

I am a new with machine learning. I have a final project about prediction using two algorithms, Artificial Neural Network and Bayesian Neural Network. I want to compare the prediction result between ANN and BNN. I have finished the ANN program, but I have a problem with the BNN. I try a tutorial from this link: bayesian neural network tutorial. This is my ANN sample code to train and evaluate the model.
keep_prob = tf.placeholder("float", name="keep_prob")
x = tf.placeholder(tf.float32, [None, n_input], name="x")
y = tf.placeholder(tf.float32, name="y")
training_epochs = 5000
display_step = 1000
batch_size = 5
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=y), name="cost_function")
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001, name="Adam").minimize(cost)
with tf.Session() as sess:
for epoch in tqdm(range(training_epochs)):
avg_cost = 0.0
total_batch = int(len(x_train) / batch_size)
x_batches = np.array_split(x_train, total_batch)
y_batches = np.array_split(y_train, total_batch)
for i in range(total_batch):
batch_x, batch_y = x_batches[i], y_batches[i]
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y, keep_prob: 0.8})
avg_cost += c / total_batch
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
print("Optimization Finished!")
correct_prediction = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1), name="corr_pred")
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
# print('Accuracy: ', sess.run(accuracy, feed_dict={x: x_test, y: y_test}))
print("Accuracy:", accuracy.eval({x: x_test, y: y_test, keep_prob: 1.0}))
and this is my BNN code:
# Importing required libraries
from math import floor
import edward as ed
import numpy as np
import pandas as pd
import tensorflow as tf
from edward.models import Normal, NormalWithSoftplusScale
from fancyimpute import KNN
from sklearn import preprocessing
# Read data
features_dummies_nan = pd.read_csv('csv/features_dummies_with_label.csv', sep=',')
# Function: impute missing value by KNN
def impute_missing_values_by_KNN():
home_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'hp' in col]]
away_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'ap' in col]]
label_data = features_dummies_nan[[col for col in features_dummies_nan.columns if 'label' in col]]
home_filled = pd.DataFrame(KNN(3).complete(home_data))
home_filled.columns = home_data.columns
home_filled.index = home_data.index
away_filled = pd.DataFrame(KNN(3).complete(away_data))
away_filled.columns = away_data.columns
away_filled.index = away_data.index
data_frame_out = pd.concat([home_filled, away_filled, label_data], axis=1)
return data_frame_out
features_dummies = impute_missing_values_by_KNN()
target = features_dummies.loc[:, 'label'].values
data = features_dummies.drop('label', axis=1)
data = data.values
perm = np.random.permutation(len(features_dummies))
data = data[perm]
target = target[perm]
train_size = 0.9
train_cnt = floor(features_dummies.shape[0] * train_size)
x_train = data[0:train_cnt] # data_train
y_train = target[0:train_cnt] # target_train
x_test = data[train_cnt:] # data_test
y_test = target[train_cnt:] # target_test
keep_prob = tf.placeholder("float", name="keep_prob")
n_input = data.shape[1] # D
n_classes = 3
n_hidden_1 = 100 # H0
n_hidden_2 = 100 # H1
n_hidden_3 = 100 # H2
def neural_network(X, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out):
hidden1 = tf.nn.relu(tf.matmul(X, W_0) + b_0)
hidden2 = tf.nn.relu(tf.matmul(hidden1, W_1) + b_1)
hidden3 = tf.nn.relu(tf.matmul(hidden2, W_2) + b_2)
output = tf.matmul(hidden3, W_out) + b_out
return tf.reshape(output, [-1])
scaler = preprocessing.StandardScaler().fit(x_train)
data_train_scaled = scaler.transform(x_train)
data_test_scaled = scaler.transform(x_test)
W_0 = Normal(loc=tf.zeros([n_input, n_hidden_1]), scale=5.0 * tf.ones([n_input, n_hidden_1]))
W_1 = Normal(loc=tf.zeros([n_hidden_1, n_hidden_2]), scale=5.0 * tf.ones([n_hidden_1, n_hidden_2]))
W_2 = Normal(loc=tf.zeros([n_hidden_2, n_hidden_3]), scale=5.0 * tf.ones([n_hidden_2, n_hidden_3]))
W_out = Normal(loc=tf.zeros([n_hidden_3, 1]), scale=5.0 * tf.ones([n_hidden_3, 1]))
b_0 = Normal(loc=tf.zeros(n_hidden_1), scale=5.0 * tf.ones(n_hidden_1))
b_1 = Normal(loc=tf.zeros(n_hidden_2), scale=5.0 * tf.ones(n_hidden_2))
b_2 = Normal(loc=tf.zeros(n_hidden_3), scale=5.0 * tf.ones(n_hidden_3))
b_out = Normal(loc=tf.zeros(1), scale=5.0 * tf.ones(1))
qW_0 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_input, n_hidden_1])),
scale=tf.Variable(tf.random_normal([n_input, n_hidden_1])))
qW_1 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
scale=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])))
qW_2 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
scale=tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])))
qW_out = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_3, 1])),
scale=tf.Variable(tf.random_normal([n_hidden_3, 1])))
qb_0 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_1])),
qb_1 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_2])),
qb_2 = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([n_hidden_3])),
qb_out = NormalWithSoftplusScale(loc=tf.Variable(tf.random_normal([1])),
sigma_y = 1.0
x = tf.placeholder(tf.float32, [None, n_input])
y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)
inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
W_1: qW_1, b_1: qb_1,
W_2: qW_2, b_2: qb_2,
W_out: qW_out, b_out: qb_out}, data={x: x_train, y: y_train})
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
1000, 0.3, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)
inference.run(n_iter=5000, optimizer=optimizer, global_step=global_step)
But, I want to compare two algorithms result. So, I want to make some variables will be same between ANN and BNN, for example sum of epoch. Then I want to adapt my ANN code above for this BNN code section.
sigma_y = 1.0
x = tf.placeholder(tf.float32, [None, n_input])
y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y)
inference = ed.KLqp({W_0: qW_0, b_0: qb_0,
W_1: qW_1, b_1: qb_1,
W_2: qW_2, b_2: qb_2,
W_out: qW_out, b_out: qb_out}, data={x: x_train, y: y_train})
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
1000, 0.3, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)
inference.run(n_iter=5000, optimizer=optimizer, global_step=global_step)
I have several things that I don't understand. There is y = tf.placeholder(tf.float32, name="y") in ANN but in BNN is y = Normal(loc=neural_network(x, W_0, W_1, W_2, W_out, b_0, b_1, b_2, b_out), scale=sigma_y). Then, there is scale in BNN but not in ANN. So, can I adapt my ANN train and test sample code to BNN sample code above? I want to make inference on BNN run like in sess.run() on ANN so I can count the BNN prediction accuracy result. Can I do that?

