I am trying to train a neural net on a binary dataset I have. The accuracy starts at 0.064, goes to 0.08 by the 10th epoch, and then converges to 0 by the 60th epoch. Can anyone please offer some advice as to what I am doing wrong in my implementation?
import tensorflow as tf
import numpy as np
import math
'''
input -> weight -> hidden layer 1 (activation funtion) -> weights -> hidden layer 2 -> ... -> weights -> output layer
^feed-forward
compare output to intended output = cost function (cross entropy)
optimization function (optimizer) -> minimize cost (AdamOptimizer, AdaGrad, and others)
^backpropogation
feed-forward + backpropogation = epoch (1 cycle)
'''
def nnModel(data, keep_prob):
hidden1 = {'weights':tf.Variable(tf.random_normal([len(trainX[0]),hl1Nodes])),
'biases':tf.Variable(tf.random_normal([hl1Nodes]))}
hidden2 = {'weights':tf.Variable(tf.random_normal([hl1Nodes,hl2Nodes])),
'biases':tf.Variable(tf.random_normal([hl2Nodes]))}
hidden3 = {'weights':tf.Variable(tf.random_normal([hl2Nodes,hl3Nodes])),
'biases':tf.Variable(tf.random_normal([hl3Nodes]))}
hidden4 = {'weights':tf.Variable(tf.random_normal([hl3Nodes,hl4Nodes])),
'biases':tf.Variable(tf.random_normal([hl4Nodes]))}
output = {'weights':tf.Variable(tf.random_normal([hl4Nodes,numClasses])),
'biases':tf.Variable(tf.random_normal([numClasses]))}
#(inputData * weight) + bias
l1 = tf.add(tf.matmul(data,hidden1['weights']),hidden1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1,hidden2['weights']),hidden2['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2,hidden3['weights']),hidden3['biases'])
l3 = tf.nn.relu(l3)
l4 = tf.add(tf.matmul(l3,hidden4['weights']),hidden4['biases'])
l4 = tf.nn.relu(l4)
drop_out = tf.nn.dropout(l4, keep_prob)
theOutput = tf.matmul(drop_out, output['weights']) + output['biases']
return theOutput
def shuffle(a, b):
assert len(a) == len(b)
p = np.random.permutation(len(a))
return a[p], b[p]
def nnTrain(inputData, keep_prob, x1, y1, x2, y2):
prediction = nnModel(inputData, keep_prob)
print("8787****l",prediction)
print(y1)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
optimizer = tf.train.AdagradOptimizer(.0008).minimize(cost)
numEpochs = 200
numRecords = x1.shape[0]
great20 = False
indexMult = math.floor(numRecords/10)
interval = math.floor(numEpochs/10)
trainToTestIndices = []
for i in range(10):
trainToTestIndices.append(i*indexMult)
trainToTestIndices.append(numRecords-1)
print(trainToTestIndices)
testX = x2 #[trainToTestIndices[0]:trainToTestIndices[1]][:]
testY = y2 #[trainToTestIndices[0]:trainToTestIndices[1]][:]
trainX = x1 #[trainToTestIndices[1]:][:]
trainY = y1 #[trainToTestIndices[1]:][:]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
cntCrosses = 0
for epoch in range(numEpochs):
print("8787****l",prediction)
# k = 0
# while k < epoch:
#
# if epoch > 0 and epoch % 20 == 0:
# cntCrosses += 1
#
# testX = np.zeros((trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses], x1.shape[1]))
# testY = np.zeros((trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses], y1.shape[1]))
# trainX = np.zeros(((trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]+trainToTestIndices[-1]-trainToTestIndices[cntCrosses+1]+1), x1.shape[1]))
# trainY = np.zeros(((trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]+trainToTestIndices[-1]-trainToTestIndices[cntCrosses+1]+1), y1.shape[1]))
#
# testX[:(trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses])][:] = x1[trainToTestIndices[cntCrosses]:trainToTestIndices[cntCrosses+1]][:]
# testY[:(trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses])][:] = y1[trainToTestIndices[cntCrosses]:trainToTestIndices[cntCrosses+1]][:]
# trainX[:(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1])][:] = x1[trainToTestIndices[cntCrosses-1]:trainToTestIndices[cntCrosses]][:]
# trainY[:(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1])][:] = y1[trainToTestIndices[cntCrosses-1]:trainToTestIndices[cntCrosses]][:]
# trainX[(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]):][:] = x1[trainToTestIndices[cntCrosses+1]:][:]
# trainY[(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]):][:] = y1[trainToTestIndices[cntCrosses+1]:][:]
#
# great20 = True
epochCost = 0
# trainX2, trainY2 = shuffle(trainX, trainY)
# testX2, testY2 = shuffle(testX, testY)
i=0
while i < trainX.shape[0]:
start = i
end = i+numBatches
# if not great20:
batchX=np.array(trainX[start:end])
batchY=np.array(trainY[start:end])
# if great20:
# batchX=np.array(trainX2[start:end])
# batchY=np.array(trainY2[start:end])
_, c = sess.run([optimizer, cost], feed_dict = {x: batchX, y: batchY, keep_prob:0.50})
epochCost += c
i += numBatches
print('Epoch', epoch, 'completed out of', numEpochs, 'loss', epochCost)
correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
# if great20:
print('Accuracy', accuracy.eval({x:testX, y:testY, keep_prob : 1.0}))
# correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
# accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
# print('Accuracy', accuracy.eval({x:testX, y:testY}))
if __name__ == '__main__':
keep_prob = tf.placeholder(tf.float32)
trainX_ = np.loadtxt("newBTRAIN.csv", delimiter=',')
global trainX
global trainY
trainX = trainX_[125:,:]
trainY_ = np.loadtxt("yT.csv", delimiter=',')
trainY = trainY_[125:,:]
testX = trainX_[:125, :]
testY = trainY_[:125, :]
hl1Nodes = 500
hl2Nodes = 500
hl3Nodes = 500
hl4Nodes = 500
numClasses = trainY.shape[1]
print(numClasses)
numBatches = 100
x = tf.placeholder('float', [None, len(trainX[0])])
y = tf.placeholder('float')
nnTrain(x, keep_prob, trainX, trainY, testX, testY)
Related
I have extracted CNN features from a pretrain vgg19 with size 4096. Then I am using a shallower architecture to train a classifier with softmax and center losses. Unfortunately, the softmax loss function returns nan. There is detailed discussion available here, however I am not able to resolve the problem with clip because labels and logits are in two different data format (int64, float32). Furthermore, I also changed the learning rate but still got the same error.
Can some please let me know, how to resolve this situation.
from __future__ import division
from __future__ import print_function
import csv
import numpy as np
import tensorflow as tf
from retrieval_model import setup_train_model
FLAGS = None
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def get_name(read_file):
feat_lst = []
identifier_lst = []
with open(read_file, 'r') as csvfile:
read_file = csv.reader(csvfile, delimiter=',')
for row in read_file:
feat = row[:-1]
s_feat = [float(i) for i in feat]
identifier = row[-1]
feat_lst.append(s_feat)
identifier_lst.append(identifier)
return feat_lst, identifier_lst
def get_batch(batch_index, batch_size, labels, f_lst):
start_ind = batch_index * batch_size
end_ind = start_ind + batch_size
return f_lst[start_ind:end_ind], labels[start_ind:end_ind]
def creat_dict(orig_labels):
dict = {}
count = 0
for x in orig_labels:
n_label = dict.get(x, None)
if n_label is None:
dict[x] = count
count += 1
return dict
def main(_):
save_dir = 'model/one-branch-ckpt'
train_file = 'gtrain.csv'
img_feat, img_labels = get_name(train_file)
map_dict = creat_dict(img_labels)
img_labels = [map_dict.get(x) for x in img_labels]
im_feat_dim = 4096
batch_size = 50
max_num_epoch = 10
steps_per_epoch = len(img_feat) // batch_size
num_steps = steps_per_epoch * max_num_epoch
# Setup placeholders for input variables.
im_feat_plh = tf.placeholder(tf.float32, shape=[batch_size, im_feat_dim])
label_plh = tf.placeholder(tf.int64, shape=(batch_size), name='labels')
train_phase_plh = tf.placeholder(tf.bool)
# Setup training operation.
t_l = setup_train_model(im_feat_plh, train_phase_plh, label_plh, classes)
# Setup optimizer.
global_step = tf.Variable(0, trainable=False)
init_learning_rate = 0.0001
learning_rate = tf.train.exponential_decay(init_learning_rate, global_step,
steps_per_epoch, 0.794, staircase=True)
optim = tf.train.AdamOptimizer(init_learning_rate)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_step = optim.minimize(t_l, global_step=global_step)
# Setup model saver.
saver = tf.train.Saver(save_relative_paths=True,max_to_keep=1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(num_steps):
im_feats, labels = get_batch(
i % steps_per_epoch, batch_size, img_labels, img_feat)
feed_dict = {
im_feat_plh: im_feats,
label_plh: labels,
train_phase_plh: True,
}
[_, loss_val] = sess.run([train_step, t_l], feed_dict=feed_dict)
if i % 100 == 0:
print('Epoch: %d Step: %d Loss: %f' % (i // steps_per_epoch, i, loss_val))
if i % steps_per_epoch == 0 and i > 0:
print('Saving checkpoint at step %d' % i)
saver.save(sess, save_dir, global_step=global_step)
if __name__ == '__main__':
np.random.seed(0)
tf.set_random_seed(0)
tf.app.run(main=main)
**************************retrieval_model********************************
def setup_train_model(im_feats, train_phase, im_labels, nrof_classes):
alfa = 0.9
# nrof_classes = 28783
i_embed = embedding_model(im_feats, train_phase, im_labels)
c_l = embedding_loss(i_embed, im_labels, alfa, nrof_classes)
loss = softmax_loss(i_embed, im_labels)
total_loss = loss + c_l
return total_loss
def add_fc(inputs, outdim, train_phase, scope_in):
fc = fully_connected(inputs, outdim, activation_fn=None, scope=scope_in + '/fc')
fc_bnorm = tf.layers.batch_normalization(fc, momentum=0.1, epsilon=1e-5,
training=train_phase, name=scope_in + '/bnorm')
fc_relu = tf.nn.relu(fc_bnorm, name=scope_in + '/relu')
fc_out = tf.layers.dropout(fc_relu, seed=0, training=train_phase, name=scope_in + '/dropout')
return fc_out
def embedding_loss(features, label, alfa, nrof_classes):
nrof_features = features.get_shape()[1]
centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
initializer=tf.constant_initializer(0), trainable=False)
label = tf.reshape(label, [-1])
centers_batch = tf.gather(centers, label)
diff = (1 - alfa) * (centers_batch - features)
#centers = tf.scatter_sub(centers, label, diff)
center_loss = tf.reduce_mean(tf.square(features - centers_batch))
#softmax_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=features))
#total_loss = softmax_loss + center_loss
return center_loss
def embedding_model(im_feats, train_phase, im_labels,
fc_dim=2048, embed_dim=512):
# Image branch.
im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1')
im_fc2 = fully_connected(im_fc1, embed_dim, activation_fn=None,
scope='im_embed_2')
return tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10)
def softmax_loss(feat, im_labels):
label = tf.reshape(im_labels, [-1])
softmax = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=feat))
return softmax
I have created a custom class to be an ML model, and it is working fine, but I would like to normalize the inputs as they have a wide range of values (e.g. 0, 20000, 500, 10, 8). Currently, as a way of normalizing the inputs, I'm applying lambda x: np.log(x + 1) to each input (the +1 is so it doesn't error out when 0 is passed in). Would a normalization layer be better than my current approach? If so, how would I go about implementing it? My code for the model is below:
class FollowModel:
def __init__(self, input_shape, output_shape, hidden_layers, input_labels, learning_rate=0.001):
tf.reset_default_graph()
assert len(input_labels) == input_shape[1], 'Incorrect number of input labels!'
# Placeholders for input and output data
self.input_labels = input_labels
self.input_shape = input_shape
self.output_shape = output_shape
self.X = tf.placeholder(shape=input_shape, dtype=tf.float64, name='X')
self.y = tf.placeholder(shape=output_shape, dtype=tf.float64, name='y')
self.hidden_layers = hidden_layers
self.learning_rate = learning_rate
# Variables for two group of weights between the three layers of the network
self.W1 = tf.Variable(np.random.rand(input_shape[1], hidden_layers), dtype=tf.float64)
self.W2 = tf.Variable(np.random.rand(hidden_layers, output_shape[1]), dtype=tf.float64)
# Create the neural net graph
self.A1 = tf.sigmoid(tf.matmul(self.X, self.W1))
self.y_est = tf.sigmoid(tf.matmul(self.A1, self.W2))
# Define a loss function
self.deltas = tf.square(self.y_est - self.y) # want this to be 0
self.loss = tf.reduce_sum(self.deltas)
# Define a train operation to minimize the loss
self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)
#initialize
self.model_init = tf.global_variables_initializer()
self.trained = False
def train(self, Xtrain, ytrain, Xtest, ytest, training_steps, batch_size, print_progress=True):
#intiialize session
self.trained = True
self.training_steps = training_steps
self.batch_size = batch_size
self.sess = tf.Session()
self.sess.run(self.model_init)
self.losses = []
self.accs = []
self.testing_accuracies = []
for i in range(training_steps*batch_size):
self.sess.run(self.optimizer, feed_dict={self.X: Xtrain, self.y: ytrain})
local_loss = self.sess.run(self.loss, feed_dict={self.X: Xtrain.values, self.y: ytrain.values})
self.losses.append(local_loss)
self.weights1 = self.sess.run(self.W1)
self.weights2 = self.sess.run(self.W2)
y_est_np = self.sess.run(self.y_est, feed_dict={self.X: Xtrain.values, self.y: ytrain.values})
correct = [estimate.argmax(axis=0) == target.argmax(axis=0)
for estimate, target in zip(y_est_np, ytrain.values)]
acc = 100 * sum(correct) / len(correct)
self.accs.append(acc)
if i % batch_size == 0:
batch_num = i / batch_size
if batch_num % 5 == 0:
self.testing_accuracies.append(self.test_accuracy(Xtest, ytest, False, True))
temp_table = pd.concat([Xtrain, ytrain], axis=1).sample(frac=1)
column_names = list(temp_table.columns.values)
X_columns, y_columns = column_names[0:len(column_names) - 2], column_names[len(column_names) - 2:]
Xtrain = temp_table[X_columns]
ytrain = temp_table[y_columns]
if print_progress: print('Step: %d, Accuracy: %.2f, Loss: %.2f' % (int(i/batch_size), acc, local_loss))
if print_progress: print("Training complete!\nloss: {}, hidden nodes: {}, steps: {}, epoch size: {}, total steps: {}".format(int(self.losses[-1]*100)/100, self.hidden_layers, training_steps, batch_size, training_steps*batch_size))
self.follow_accuracy = acc
return acc
def test_accuracy(self, Xtest, ytest, print_progress=True, return_accuracy=False):
if self.trained:
X = tf.placeholder(shape=Xtest.shape, dtype=tf.float64, name='X')
y = tf.placeholder(shape=ytest.shape, dtype=tf.float64, name='y')
W1 = tf.Variable(self.weights1)
W2 = tf.Variable(self.weights2)
A1 = tf.sigmoid(tf.matmul(X, W1))
y_est = tf.sigmoid(tf.matmul(A1, W2))
# Calculate the predicted outputs
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
y_est_np = sess.run(y_est, feed_dict={X: Xtest, y: ytest})
correctly_followed = 0
incorrectly_followed = 0
missed_follows = 0
correctly_skipped = 0
for estimate, actual in zip(y_est_np, ytest.values):
est = estimate.argmax(axis=0)
# print(estimate)
actual = actual.argmax(axis=0)
if est == 1 and actual == 0: incorrectly_followed += 1
elif est == 1 and actual == 1: correctly_followed += 1
elif est == 0 and actual == 1: missed_follows += 1
else: correctly_skipped += 1
# correct = [estimate.argmax(axis=0) == target.argmax(axis=0) for estimate, target in zip(y_est_np, ytest.values)]
total_followed = incorrectly_followed + correctly_followed
total_correct = correctly_followed + correctly_skipped
total_incorrect = incorrectly_followed + missed_follows
try: total_accuracy = int(total_correct * 10000 / (total_correct + total_incorrect)) / 100
except: total_accuracy = 0
total_skipped = correctly_skipped + missed_follows
try: follow_accuracy = int(correctly_followed * 10000 / total_followed) / 100
except: follow_accuracy = 0
try: skip_accuracy = int(correctly_skipped * 10000 / total_skipped) / 100
except: skip_accuracy = 0
if print_progress: print('Correctly followed {} / {} ({}%), correctly skipped {} / {} ({}%)'.format(
correctly_followed, total_followed, follow_accuracy, correctly_skipped, total_skipped, skip_accuracy))
self.follow_accuracy = follow_accuracy
if return_accuracy:
return total_accuracy
else:
print('The model is not trained!')
def make_prediction_on_normal_data(self, input_list):
assert len(input_list) == len(self.input_labels), 'Incorrect number of inputs (had {} should have {})'.format(len(input_list), len(self.input_labels))
# from ProcessData import normalize_list
# normalize_list(input_list)
input_array = np.array([input_list])
X = tf.placeholder(shape=(1, len(input_list)), dtype=tf.float64, name='X')
y = tf.placeholder(shape=(1, 2), dtype=tf.float64, name='y')
W1 = tf.Variable(self.weights1)
W2 = tf.Variable(self.weights2)
A1 = tf.sigmoid(tf.matmul(X, W1))
y_est = tf.sigmoid(tf.matmul(A1, W2))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
y_est_np = sess.run(y_est, feed_dict={X: input_array, y: self.create_blank_outputs()})
predicted_value = y_est_np[0].argmax(axis=0)
return predicted_value
def make_prediction_on_abnormal_data(self, input_list):
from ProcessData import normalize_list
normalize_list(input_list)
return self.make_prediction_on_normal_data(input_list)
def create_blank_outputs(self):
blank_outputs = np.zeros(shape=(1,2), dtype=np.int)
for i in range(len(blank_outputs[0])):
blank_outputs[0][i] = float(blank_outputs[0][i])
return blank_outputs
I don't see see why you want to create a layer that does that. The common practice of preprocessing your inputs is as you are currently doing.
Using the log operator is quite common for skewed data, but there are other preprocessing solutions such as sklearn's MinMaxScaler and StandardScaler
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
Those are just examples of two other ways to scale your data.
There is such a thing called BatchNorm but it is not recommended as the first layer of the network as distribution of the data is fixed and doesn’t vary during training.
I wish to log the predictions every N epochs\iterations and generate a histogram for each class. My question is how do I log the predictions into an array, including the label in order to generate the histograms?
How do I make sure it happens of every N epochs\iterations?
I have edited the post to add the code so you will be able to see what I am talking about. The last 2 code chunks should somehow be used for what I requested.
Thanks in advance!
import tensorflow as tf
import numpy as np
import math
from random import random
from array import array
from ROOT import TFile, TTree, TH1D, TH2D, TBranch, vector
NUM_EXAMPLES = 1.6e4
TRAIN_SPLIT = .8
MINI_BATCH_SIZE = 1000
#NUM_EPOCHS = 3500
F_PATH = "/home/cauchy/Documents/Machine_Learning"
F_TEST = []
F_TEST += ["d3pd-ckt12rmd2030pp-G_ww_qqqq_%d%d00.root" % (1,2)]
F_TEST += ["d3pd-ckt12rmd2030pp-pyj%d.root" % (4)]
F_TEST += ["d3pd-ckt12rmd2030pp-pyj%d.root" % (5)]
F_TEST += ["d3pd-ckt12rmd2030pp-pyj%d.root" % (6)]
F_TEST += ["d3pd-ckt12rmd2030pp-pyj%d.root" % (7)]
#CALIBRATION_TARGET = "pt" # you can use pt,m,eta
INPUTS = ['m', 'grootau21', 'ysfilt', 'ungrngtrk'] # Removed pt
PT_MIN = 450 #for file 1200
PT_MAX = 730 #for file 1200
F_OUTPUT = "G1200_signaltobackground_from_pt_mass_ysfilt_grootau21_ungrngtrk.root"
N_INPUTS = len(INPUTS)
#============== inputs / target ====================================
jet_features = []
target = []
#=================== branches for training and validation ===========
pt = []
m = []
grootau21 =[]
ysfilt = []
ungrngtrk = []
#weight = []
#================ Prepare the dataset ========================
# I need to change the data to include the multiplication by the weight (constant)
for fi in F_TEST: #Should it include background AND signal files? Yes.
current_e = 0
f = TFile(F_PATH + '/' + fi, 'read')
t = TTree()
f.GetObject("dibjet", t) # Changed from "Tree" to "dibjet"
for entry in t:
current_e += 1
if current_e > NUM_EXAMPLES: # NUM_EXAMPLES should change for the different files
break
if (t.jet1_pt > PT_MAX or t.jet1_pt < PT_MIN):
continue
tmp = []
if 'pt' in INPUTS: tmp += [t.jet1_pt / MAX_PT] #for file 1200
if 'm' in INPUTS: tmp += [t.jet1_m / 500] #for file 1200
if 'grootau21' in INPUTS: tmp += [t.jet1_grootau21]
if 'ysfilt' in INPUTS: tmp += [t.jet1_ysfilt]
if 'ungrngtrk' in INPUTS: tmp += [t.jet1_ungrngtrk / 110] #for file 1200
# We need only look at the class {background, signal} of the entry in terms of target
jet_features += [tmp]
# One-hot encoder
if fi == 'd3pd-ckt12rmd2030pp-G_ww_qqqq_1200.root': target += [[1, 0]]
else: target += [[0, 1]]
pt += [t.jet1_pt]
m += [t.jet1_m]
grootau21 += [t.jet1_grootau21]
ysfilt += [t.jet1_ysfilt]
ungrngtrk += [t.jet1_ungrngtrk]
#weight += [t.weight]
######################################
###### prepare inputs for NN #########
trainset = list(zip(jet_features, target)) # remove ref_target?
np.random.shuffle(trainset)
jet_features, target = zip(*trainset) # What does this line do? Rearranges jetmoments\target...
total_sample = len(target)
train_size = int(total_sample*TRAIN_SPLIT)
all_x = np.float32((jet_features)) # Converts the list type? Why double paranthesis?
all_y = np.float32(target)
train_x = all_x[:train_size] # Create training\testing partitions?
test_x = all_x[train_size:]
train_y = all_y[:train_size]
test_y = all_y[train_size:]
# Define important parameters and variable to work with the tensors
learning_rate = 0.3
training_epochs = 500
cost_history = np.empty(shape=[1], dtype=float)
n_dim = N_INPUTS
#print("n_dim", n_dim)
n_class = 2
model_path = "/home/cauchy/Documents/TensorFlow/Cuts_W" # Forgot what this path is used for
# Define the number of hidden layers and number of neurons for each layer
n_hidden_1 = 10
n_hidden_2 = 10
n_hidden_3 = 10
n_hidden_4 = 10
x = tf.placeholder(tf.float32, [None, n_dim])
W = tf.Variable(tf.zeros([n_dim, n_class]))
b = tf.Variable(tf.zeros([n_class]))
y_ = tf.placeholder(tf.float32, [None, n_class]) # Should we use a vector instead with 1 for signal and 0 for background?
# Define the model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with sigmoid activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.sigmoid(layer_1)
# Hidden layer with sigmoid activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.sigmoid(layer_2)
# Hidden layer with sigmoid activation
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3 = tf.nn.sigmoid(layer_3)
# Hidden layer with ReLU activation
layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
layer_4 = tf.nn.relu(layer_4)
# Output layer with linear activation
out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
return out_layer
# Define the weights and the biases for each layer
weights = {
'h1': tf.Variable(tf.truncated_normal([n_dim, n_hidden_1])),
'h2': tf.Variable(tf.truncated_normal([n_hidden_1, n_hidden_2])),
'h3': tf.Variable(tf.truncated_normal([n_hidden_2, n_hidden_3])),
'h4': tf.Variable(tf.truncated_normal([n_hidden_3, n_hidden_4])),
'out': tf.Variable(tf.truncated_normal([n_hidden_4, n_class]))
}
biases = {
'b1': tf.Variable(tf.truncated_normal([n_hidden_1])),
'b2': tf.Variable(tf.truncated_normal([n_hidden_2])),
'b3': tf.Variable(tf.truncated_normal([n_hidden_3])),
'b4': tf.Variable(tf.truncated_normal([n_hidden_4])),
'out': tf.Variable(tf.truncated_normal([n_class]))
}
# Initialize all the variables
init = tf.global_variables_initializer()
saver = tf.train.Saver()
# Call your model defined
y = multilayer_perceptron(x, weights, biases)
# Define the cost function and optimizer
cost_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_))
training_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
sess = tf.Session
sess.run(init)
# Calculate the cost and the accuracy for each epoch
mse_history = [] # mean squared error
accuracy_history = []
for epoch in range(training_epochs):
sess.run(training_step, feed_dict={x: train_x, y_: train_y})
cost = sess.run(cost_function, feed_dict={x: train_x, y_: train_y})
cost_history = np.append(cost_history, cost)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# print("Accuracy: ", (sess.run(accuracy, feed_dict={x:test_x, y_:test_y})))
pred_y = sess.run(y, feed_dict={x: test_x})
mse = tf.reduce_mean(tf.square(pred_y - test_y))
mse_ = sess.run(mse)
mse_history.append(mse_)
accuracy = (sess.run(accuracy, feed_dict={x: train_x, y_: train_y}))
accuracy_history.append(accuracy)
print('epoch: ', epoch, ' - ','cost: ', cost, " - MSE: ", mse_, "- Train Accuracy: ", accuracy)
save_path = saver.save(sess, model_path)
print("Model saved in file: %s" % save_path)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print("Test Accuracy: ", (sess.run(accuracy, feed_dict={x: test_x, y_: test_y})))
# Print the final mean square error
pred_y = sess.run(y, feed_dict={x: test_x})
mse = tf.reduce_mean(tf.square(pred_y - test_y))
print("MSE: $.4f" % sess.run(mse))
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Set up logging for predictions
# Log the values in the "Softmax" tensor with label "probabilities"
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensors_to_log, every_n_iter=50)
I'm trying to train a GAN using tensorflow and pytorch. I'm using the torchvision.datasets.ImageFolder(root, transform=None, target_transform=None, loader=) class to load the images. I have two classes: Negatives and Positives. How can I access a batch of negative and positive images from the dataloader when I'm iterating through it. Look at the dataloader iteration in the training loop. Here is the code:
DATA_FOLDER = './tf_data/plasmodium_photos/'
IMAGE_PIXELS = 28*28*3
NOISE_SIZE = 10
BATCH_SIZE = 10
def noise(n_rows, n_cols):
return np.random.normal(size=(n_rows, n_cols))
def xavier_init(size):
in_dim = size[0] if len(size) == 1 else size[1]
stddev = 1. / np.sqrt(float(in_dim))
return tf.random_uniform(shape=size, minval=-stddev, maxval=stddev)
def images_to_vectors(images):
return images.reshape(images.shape[0], 2352)
def vectors_to_images(vectors):
return vectors.reshape(vectors.shape[0], 28, 28, 3)
def plasmodium_data():
compose = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((.5, .5, .5), (.5, .5, .5))
])
return datasets.ImageFolder(DATA_FOLDER, transform=compose)
# Load data
data = plasmodium_data()
# Create loader with data, so that we can iterate over it
data_loader = DataLoader(data, batch_size=BATCH_SIZE, shuffle=True)
# Num batches
num_batches = len(data_loader)
## Discriminator
# Input
X = tf.placeholder(tf.float32, shape=(None, IMAGE_PIXELS))
# Layer 1 Variables
D_W1 = tf.Variable(xavier_init([2352, 1024]))
D_B1 = tf.Variable(xavier_init([1024]))
# Layer 2 Variables
D_W2 = tf.Variable(xavier_init([1024, 512]))
D_B2 = tf.Variable(xavier_init([512]))
# Layer 3 Variables
D_W3 = tf.Variable(xavier_init([512, 256]))
D_B3 = tf.Variable(xavier_init([256]))
# Out Layer Variables
D_W4 = tf.Variable(xavier_init([256, 1]))
D_B4 = tf.Variable(xavier_init([1]))
# Store Variables in list
D_var_list = [D_W1, D_B1, D_W2, D_B2, D_W3, D_B3, D_W4, D_B4]
## setting up a Generator network
# Input
Z = tf.placeholder(tf.float32, shape=(None, NOISE_SIZE))
# Layer 1 Variables
G_W1 = tf.Variable(xavier_init([10, 256]))
G_B1 = tf.Variable(xavier_init([256]))
# Layer 2 Variables
G_W2 = tf.Variable(xavier_init([256, 512]))
G_B2 = tf.Variable(xavier_init([512]))
# Layer 3 Variables
G_W3 = tf.Variable(xavier_init([512, 1024]))
G_B3 = tf.Variable(xavier_init([1024]))
# Out Layer Variables
G_W4 = tf.Variable(xavier_init([1024, 2352]))
G_B4 = tf.Variable(xavier_init([2352]))
# Store Variables in list
G_var_list = [G_W1, G_B1, G_W2, G_B2, G_W3, G_B3, G_W4, G_B4]
def discriminator(x):
l1 = tf.nn.dropout(tf.nn.leaky_relu(tf.matmul(x, D_W1) + D_B1, .2), .3)
l2 = tf.nn.dropout(tf.nn.leaky_relu(tf.matmul(l1, D_W2) + D_B2, .2), .3)
l3 = tf.nn.dropout(tf.nn.leaky_relu(tf.matmul(l2, D_W3) + D_B3, .2), .3)
out = tf.matmul(l3, D_W4) + D_B4
return out
def generator(z):
l1 = tf.nn.leaky_relu(tf.matmul(z, G_W1) + G_B1, .2)
l2 = tf.nn.leaky_relu(tf.matmul(l1, G_W2) + G_B2, .2)
l3 = tf.nn.leaky_relu(tf.matmul(l2, G_W3) + G_B3, .2)
out = tf.nn.tanh(tf.matmul(l3, G_W4) + G_B4)
return out
G_sample = generator(Z)
D_real = discriminator(X)
D_fake = discriminator(G_sample)
# Losses
D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_real, labels=tf.ones_like(D_real)))
D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_fake, labels=tf.zeros_like(D_fake)))
D_loss = D_loss_real + D_loss_fake
G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_fake, labels=tf.ones_like(D_fake)))
# Optimizers
D_opt = tf.train.AdamOptimizer(2e-4).minimize(D_loss, var_list=D_var_list)
G_opt = tf.train.AdamOptimizer(2e-4).minimize(G_loss, var_list=G_var_list)
# Testing configuration
num_test_samples = 100
test_noise = noise(num_test_samples, NOISE_SIZE)
# Variable initialization
num_epochs = 200
# Start interactive session
session = tf.InteractiveSession()
# Init Variables
tf.global_variables_initializer().run()
# Init Logger
# Needed to make sure the logging output is visible.
# See https://github.com/tensorflow/tensorflow/issues/3047
tf.logging.set_verbosity(tf.logging.INFO)
X_batch = None
Z_batch = None
#Training
# Iterate through epochs
for epoch in range(num_epochs):
for n_batch, (batch, label) in enumerate(data_loader):
#print(epoch)
# 1. Train Discriminator
print(label)
#print(type(batch))
#print(batch.permute(0, 2, 3, 1).shape)
X_batch = images_to_vectors(batch.permute(0, 2, 3, 1).numpy())
feed_dict = {X: X_batch, Z: noise(BATCH_SIZE, NOISE_SIZE)}
_, d_error, d_pred_real, d_pred_fake = session.run(
[D_opt, D_loss, D_real, D_fake], feed_dict=feed_dict
)
# 2. Train Generator
feed_dict = {Z: noise(BATCH_SIZE, NOISE_SIZE)}
_, g_error = session.run(
[G_opt, G_loss], feed_dict=feed_dict
)
if n_batch % 5 == 0:
display.clear_output(True)
# Generate images from test noise
test_images = session.run(
G_sample, feed_dict={Z: test_noise}
)
test_images = vectors_to_images(test_images)
# Log Images
for i in range(len(test_images)):
imgplot = plt.imshow(test_images[i])
# Log Status
#tf.logging.info(
# epoch, num_epochs, n_batch, num_batches,
# d_error, g_error, d_pred_real, d_pred_fake
#)
I am new to Machine Learning. I am studying the Iris dataset. And used Sepal length, Sepal width, Petal length to predict Petal Width using neural network. Thus making 3 input nodes as A1 with bias b1, 10 hidden node as A2 with bias b2 and 1 output node.
Further, x_val_train, x_val_test,y_val_train,y_val_test variables are used for training and testing
The main function is as below.
x_val = np.array([x[0:3] for x in iris.data])
y_val = np.array([x[3] for x in iris.data])
hidden_layer_size = 10
#Generate a 1D array of random numbers range round(len(x_val)*0.8
train_indices = np.random.choice(len(x_val), round(len(x_val)*0.8), replace = False)
#Create a set which does not contain the numbers in train_indices and turn it into array
test_indices = np.array(list(set(range(len(x_val))) - set(train_indices)))
#print("Train Indexes\n",train_indices,test_indices)
x_val_train = x_val[train_indices]
x_val_test = x_val[test_indices]
y_val_train = y_val[train_indices]
y_val_test = y_val[test_indices]
x_data = tf.placeholder(shape=[None, 3], dtype = tf.float32)
y_target = tf.placeholder(shape = [None, 1], dtype = tf.float32) #Figure out usage of None
#Create Layers for NN
A1 = tf.Variable(tf.random_normal(shape = [3,hidden_layer_size])) #Input -> Hidden
b1 = tf.Variable(tf.random_normal(shape = [hidden_layer_size])) #bias in Input for hidden
A2 = tf.Variable(tf.random_normal(shape = [hidden_layer_size,1])) #Hidden -> Output
b2 = tf.Variable(tf.random_normal(shape=[1])) #Hidden Layer Bias
#Generation of Model
hidden_output = tf.nn.relu(tf.add(tf.matmul(x_data,A1),b1))
final_output = tf.nn.relu(tf.add(tf.matmul(hidden_output,A2),b2))
cost = tf.reduce_mean(tf.square(y_target - final_output))
learning_rate = 0.01
model = tf.train.AdamOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
sess.run(init)
#Training Loop
loss_vec = []
test_loss = []
epoch = 500
for i in range(epoch):
#generates len(x_val_train) random numbers
rand_index = np.random.choice(len(x_val_train), size = batch_size)
#Get len(x_val_train) data with its 3 input notes or
rand_x = x_val_train[rand_index]
#print(rand_index,rand_x)
rand_y = np.transpose([y_val_train[rand_index]])
sess.run(model, feed_dict = {x_data: rand_x, y_target: rand_y})
temp_loss = sess.run(cost, feed_dict = {x_data: rand_x, y_target : rand_y})
loss_vec.append(np.sqrt(temp_loss))
test_temp_loss = sess.run(cost, feed_dict = {x_data : x_val_test, y_target : np.transpose([y_val_test])})
test_loss.append(np.sqrt(test_temp_loss))
if (i+1)%50!=0:
print('Generation: ' + str(i+1) + '.loss = ' + str(temp_loss))
predict = tf.argmax(tf.add(tf.matmul(hidden_output,A2),b2), 1)
test = np.matrix('2 3 4')
pred = predict.eval(session = sess, feed_dict = {x_data : test})
print("pred: ", pred)
plt.plot(loss_vec, 'k-', label='Train Loss')
plt.plot(test_loss, 'r--', label='Test Loss')
plt.show()
Also, In this code,
hidden_output = tf.nn.relu(tf.add(tf.matmul(x_data,A1),b1))`
I have successfully trained my model after normalizing my data. But i need to predict the output by user input data.
Here,
test = np.matrix('2 3 4')
pred = predict.eval(session = sess, feed_dict = {x_data : test})
print("pred: ", pred)
I have written this code to predict the result, but pred always returns 0. I also tried for more than 100 samples, It still returns 0. Can you please tell me where i am getting wrong?
Summary
Let's take a look at
predict = tf.argmax(tf.add(tf.matmul(hidden_output,A2),b2), 1)
This is (almost) equal to
predict = tf.argmax(final_output)
The argmax is the main issue. If final_output was a 1-hot encoding then argmax would make sense, but final_output is just an array of scalars.
Full Working Code
Here is the full working code given that you have
import numpy as np
import tensorflow as tf
import os
import urllib
# Data sets
IRIS_TRAINING = "iris_training.csv"
IRIS_TRAINING_URL = "http://download.tensorflow.org/data/iris_training.csv"
IRIS_TEST = "iris_test.csv"
IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"
# If the training and test sets aren't stored locally, download them.
if not os.path.exists(IRIS_TRAINING):
raw = urllib.urlopen(IRIS_TRAINING_URL).read()
with open(IRIS_TRAINING, "w") as f:
f.write(raw)
if not os.path.exists(IRIS_TEST):
raw = urllib.urlopen(IRIS_TEST_URL).read()
with open(IRIS_TEST, "w") as f:
f.write(raw)
training_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename=IRIS_TRAINING, target_dtype=np.int, features_dtype=np.float32)
test_set = tf.contrib.learn.datasets.base.load_csv_with_header( filename=IRIS_TEST, target_dtype=np.int, features_dtype=np.float32)
x_val_train = training_set.data[:,:3]
x_val_test = test_set.data[:,:3]
y_val_train = training_set.data[:,3].reshape([-1,1])
y_val_test = test_set.data[:,3].reshape([-1,1])
x_data = tf.placeholder(shape=[None, 3], dtype = tf.float32)
y_target = tf.placeholder(shape = [None, 1], dtype = tf.float32) #Figure out usage of None
#Create Layers for NN
hidden_layer_size = 20
A1 = tf.Variable(tf.random_normal(shape = [3,hidden_layer_size])) #Input -> Hidden
b1 = tf.Variable(tf.random_normal(shape = [hidden_layer_size])) #bias in Input for hidden
A2 = tf.Variable(tf.random_normal(shape = [hidden_layer_size,1])) #Hidden -> Output
b2 = tf.Variable(tf.random_normal(shape = [1])) #Hidden Layer Bias
#Generation of model
hidden_output = tf.nn.relu(tf.add(tf.matmul(x_data,A1),b1))
final_output = tf.add(tf.matmul(hidden_output,A2),b2)
loss = tf.reduce_mean(tf.square(y_target - final_output))
learning_rate = 0.01
train = tf.train.AdamOptimizer(learning_rate).minimize(loss)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
#Training Loop
loss_vec = []
test_loss = []
epoch = 2000
batch_size = 100
def oneTrainingSession(epoch,loss_vec,test_loss,batch_size) :
rand_index = np.random.choice(len(x_val_train), size = batch_size)
rand_x = x_val_train #[rand_index,:]
rand_y = y_val_train #[rand_index,:]
temp_loss,_ = sess.run([loss,train], feed_dict = {x_data: rand_x, y_target : rand_y})
loss_vec.append(np.sqrt(temp_loss))
test_temp_loss = sess.run(loss, feed_dict = {x_data : x_val_test, y_target : y_val_test})
test_loss.append(np.sqrt(test_temp_loss))
if (i+1)%500 == 0:
print('Generation: ' + str(i+1) + '.loss = ' + str(temp_loss))
for i in range(epoch):
oneTrainingSession(epoch,loss_vec,test_loss,batch_size)
test = x_val_test[:3,:]
print "The test values are"
print test
print ""
pred = sess.run(final_output, feed_dict = {x_data : test})
print("pred: ", pred)
Output
Generation: 500.loss = 0.12768
Generation: 1000.loss = 0.0389756
Generation: 1500.loss = 0.0370268
Generation: 2000.loss = 0.0361797
The test values are
[[ 5.9000001 3. 4.19999981]
[ 6.9000001 3.0999999 5.4000001 ]
[ 5.0999999 3.29999995 1.70000005]]
('pred: ', array([[ 1.45187187],
[ 1.92516518],
[ 0.36887735]], dtype=float32))