softmax_cross_entropy_with_logits nan - python

I have extracted CNN features from a pretrain vgg19 with size 4096. Then I am using a shallower architecture to train a classifier with softmax and center losses. Unfortunately, the softmax loss function returns nan. There is detailed discussion available here, however I am not able to resolve the problem with clip because labels and logits are in two different data format (int64, float32). Furthermore, I also changed the learning rate but still got the same error.
Can some please let me know, how to resolve this situation.
from __future__ import division
from __future__ import print_function
import csv
import numpy as np
import tensorflow as tf
from retrieval_model import setup_train_model
FLAGS = None
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def get_name(read_file):
feat_lst = []
identifier_lst = []
with open(read_file, 'r') as csvfile:
read_file = csv.reader(csvfile, delimiter=',')
for row in read_file:
feat = row[:-1]
s_feat = [float(i) for i in feat]
identifier = row[-1]
feat_lst.append(s_feat)
identifier_lst.append(identifier)
return feat_lst, identifier_lst
def get_batch(batch_index, batch_size, labels, f_lst):
start_ind = batch_index * batch_size
end_ind = start_ind + batch_size
return f_lst[start_ind:end_ind], labels[start_ind:end_ind]
def creat_dict(orig_labels):
dict = {}
count = 0
for x in orig_labels:
n_label = dict.get(x, None)
if n_label is None:
dict[x] = count
count += 1
return dict
def main(_):
save_dir = 'model/one-branch-ckpt'
train_file = 'gtrain.csv'
img_feat, img_labels = get_name(train_file)
map_dict = creat_dict(img_labels)
img_labels = [map_dict.get(x) for x in img_labels]
im_feat_dim = 4096
batch_size = 50
max_num_epoch = 10
steps_per_epoch = len(img_feat) // batch_size
num_steps = steps_per_epoch * max_num_epoch
# Setup placeholders for input variables.
im_feat_plh = tf.placeholder(tf.float32, shape=[batch_size, im_feat_dim])
label_plh = tf.placeholder(tf.int64, shape=(batch_size), name='labels')
train_phase_plh = tf.placeholder(tf.bool)
# Setup training operation.
t_l = setup_train_model(im_feat_plh, train_phase_plh, label_plh, classes)
# Setup optimizer.
global_step = tf.Variable(0, trainable=False)
init_learning_rate = 0.0001
learning_rate = tf.train.exponential_decay(init_learning_rate, global_step,
steps_per_epoch, 0.794, staircase=True)
optim = tf.train.AdamOptimizer(init_learning_rate)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_step = optim.minimize(t_l, global_step=global_step)
# Setup model saver.
saver = tf.train.Saver(save_relative_paths=True,max_to_keep=1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(num_steps):
im_feats, labels = get_batch(
i % steps_per_epoch, batch_size, img_labels, img_feat)
feed_dict = {
im_feat_plh: im_feats,
label_plh: labels,
train_phase_plh: True,
}
[_, loss_val] = sess.run([train_step, t_l], feed_dict=feed_dict)
if i % 100 == 0:
print('Epoch: %d Step: %d Loss: %f' % (i // steps_per_epoch, i, loss_val))
if i % steps_per_epoch == 0 and i > 0:
print('Saving checkpoint at step %d' % i)
saver.save(sess, save_dir, global_step=global_step)
if __name__ == '__main__':
np.random.seed(0)
tf.set_random_seed(0)
tf.app.run(main=main)
**************************retrieval_model********************************
def setup_train_model(im_feats, train_phase, im_labels, nrof_classes):
alfa = 0.9
# nrof_classes = 28783
i_embed = embedding_model(im_feats, train_phase, im_labels)
c_l = embedding_loss(i_embed, im_labels, alfa, nrof_classes)
loss = softmax_loss(i_embed, im_labels)
total_loss = loss + c_l
return total_loss
def add_fc(inputs, outdim, train_phase, scope_in):
fc = fully_connected(inputs, outdim, activation_fn=None, scope=scope_in + '/fc')
fc_bnorm = tf.layers.batch_normalization(fc, momentum=0.1, epsilon=1e-5,
training=train_phase, name=scope_in + '/bnorm')
fc_relu = tf.nn.relu(fc_bnorm, name=scope_in + '/relu')
fc_out = tf.layers.dropout(fc_relu, seed=0, training=train_phase, name=scope_in + '/dropout')
return fc_out
def embedding_loss(features, label, alfa, nrof_classes):
nrof_features = features.get_shape()[1]
centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
initializer=tf.constant_initializer(0), trainable=False)
label = tf.reshape(label, [-1])
centers_batch = tf.gather(centers, label)
diff = (1 - alfa) * (centers_batch - features)
#centers = tf.scatter_sub(centers, label, diff)
center_loss = tf.reduce_mean(tf.square(features - centers_batch))
#softmax_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=features))
#total_loss = softmax_loss + center_loss
return center_loss
def embedding_model(im_feats, train_phase, im_labels,
fc_dim=2048, embed_dim=512):
# Image branch.
im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1')
im_fc2 = fully_connected(im_fc1, embed_dim, activation_fn=None,
scope='im_embed_2')
return tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10)
def softmax_loss(feat, im_labels):
label = tf.reshape(im_labels, [-1])
softmax = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=feat))
return softmax

Related

Using a target size (torch.Size([400])) that is different to the input size (torch.Size([400, 1]))

I'm currently switching from tensorflow to pytorch and facing the warning UserWarning: Using a target size (torch.Size([400])) that is different to the input size (torch.Size([400, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size
I came across that unsqueeze(1) on my target could help to resolve my problem, however, I do so obtain problems in regard of the multitarget which results from the shape my loss function (crossentropy) expects.
Here is a minimal example to my code:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
X1 = torch.randn(400, 1, 9999)
X2 = torch.randn((400,1, 9999))
aux1 = torch.randn(400,1)
aux2 = torch.randn(400,1)
aux3 = torch.randn(400,1)
y1 = torch.rand(400,)
y2 = torch.rand(400,)
y3 = torch.rand(400,)
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
# In[18]:
class MultiTaskDataset:
def __init__(self,
amplitude,
phase,
weight,
temperature,
humidity,
shelf_life_clf,
shelf_life_pred,
thickness_pred
):
self.amplitude = amplitude
self.phase = phase
self.weight = weight
self.temperature = temperature
self.humidity = humidity
self.shelf_life_clf = shelf_life_clf
self.shelf_life_pred = shelf_life_pred
self.thickness_pred = thickness_pred
def __len__(self):
return self.amplitude.shape[0]
def __getitem__(self, idx):
#inputs
amplitude = self.amplitude[idx]
phase = self.phase[idx]
weight = self.weight[idx]
temperature = self.temperature[idx]
humidity = self.humidity[idx]
#outputs
shelf_life_clf = self.shelf_life_clf[idx]
shelf_life_reg = self.shelf_life_pred[idx]
thickness_pred = self.thickness_pred[idx]
return ([torch.tensor(amplitude, dtype=torch.float32),
torch.tensor(phase, dtype=torch.float32),
torch.tensor(weight, dtype=torch.float32),
torch.tensor(temperature, dtype=torch.float32),
torch.tensor(humidity, dtype=torch.float32)],
[torch.tensor(shelf_life_clf, dtype=torch.long),
torch.tensor(shelf_life_reg, dtype=torch.float32),
torch.tensor(thickness_pred, dtype=torch.float32)])
# In[19]:
# train loader
dataset = MultiTaskDataset(X1, X2, aux1, aux2, aux3,
y1,y2,y3)
train_loader = DataLoader(dataset, batch_size=512, shuffle=True, num_workers=0)
# test loader
# In[20]:
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.features_amp = nn.Sequential(
nn.LazyConv1d(1, 3, 1),
)
self.features_phase = nn.Sequential(
nn.LazyConv1d(1, 3, 1),
)
self.backbone1 = nn.Sequential(
nn.LazyConv1d(64,3,1),
nn.LazyConv1d(64,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.backbone2 = nn.Sequential(
nn.Conv1d(64, 32,3,1),
nn.Conv1d(32, 32,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.backbone3 = nn.Sequential(
nn.Conv1d(32, 16,3,1),
nn.Conv1d(16, 16,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.classifier = nn.LazyLinear(2)
self.shelf_life_reg = nn.LazyLinear(1)
self.thickness_reg = nn.LazyLinear(1)
def forward(self, x1, x2, aux1, aux2, aux3):
x1 = self.features_amp(x1)
x2 = self.features_phase(x2)
x1 = x1.view(x1.size(0),-1)
x2 = x2.view(x2.size(0),-1)
x = torch.cat((x1, x2), dim=-1)
print(x.size())
x = x.unsqueeze(1)
print(x.size())
x = self.backbone1(x)
print(x.size())
x = torch.flatten(x, start_dim=1, end_dim=-1)
x = torch.cat([x, aux1, aux2, aux3], dim=-1)
shelf_life_clf = self.classifier(x)
shelf_life_reg = self.shelf_life_reg(x)
thickness_reg = self.thickness_reg(x)
return (shelf_life_clf,
shelf_life_reg,
thickness_reg)
model = MyModel()
optimizer = optim.Adam(model.parameters(), lr=0.003)
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
criterion3 = nn.MSELoss()
# In[21]:
def train(epoch):
model.train()
#exp_lr_scheduler.step()
arr_loss = []
#first_batch = next(iter(train_loader))
for batch_idx, (data, target) in enumerate(train_loader):
#amp, phase = data
clf, reg1, reg2 = target
#print(amp.shape, phase.shape)
#print(target[2].shape)
if torch.cuda.is_available():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data = [data[i].cuda() for i in range(len(data))]
target = [target[i].cuda() for i in range(len(target))]
model.to(device)
optimizer.zero_grad()
output1, output2, output3 = model(*data)
#losses
loss = criterion1(output1, target[0].long())
loss1 = criterion2(output2, target[1].float())
loss2 = criterion3(output3, target[2].float())
loss = loss + loss1 + loss2
#metrices
loss.backward()
optimizer.step()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
100. * (batch_idx + 1) / len(train_loader), loss.data))
arr_loss.append(loss.data)
return arr_loss
def averaged_accuracy(outputs, targets):
assert len(outputs) != len(targets), "number of outputs should equal the number of targets"
accuracy = []
for i in range(len(outputs)):
_, predicted = torch.max(output1.data, 1)
total += target[0].size(0)
correct += (predicted == target[0]).sum()
acc = correct / total *100
accuracy.append(acc)
return torch.mean(accuracy)
# In[22]:
optimizer = optim.Adam(model.parameters(), lr=0.00003)
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
criterion3 = nn.MSELoss()
n_epochs = 10
for epoch in range(n_epochs):
train(epoch)
Can anybody provide guidance to resolve this problem?

The training set and the verification set are mixed on the image classification task

I am doing image classification, but during training, the input training set and verification set appear mixed, and I see it through the tensorboard, I don't know where the problem occurred.
https://github.com/a87871660/Picture_classification
This is my code, I don't know if there is a problem on the thread.
import os
import numpy as np
from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt
from numpy import *
C = []
label_C = []
R = []
label_R = []
H = []
label_H = []
L = []
label_L = []
Y = []
label_Y = []
def get_file (file_dir):
for file_1 in os.listdir(file_dir + '\\C'):
C.append(file_dir + '\\C' + '\\' + file_1)
label_C.append(0)
for file_2 in os.listdir(file_dir + '\\R'):
R.append(file_dir + '\\R' + '\\' + file_2)
label_R.append(1)
for file_3 in os.listdir(file_dir + '\\H'):
H.append(file_dir + '\\H' + '\\' + file_3)
label_H.append(2)
for file_4 in os.listdir(file_dir + '\\L'):
L.append(file_dir + '\\L' + '\\' + file_4)
label_L.append(3)
for file_5 in os.listdir(file_dir + '\\Y'):
Y.append(file_dir + '\\Y' + '\\' + file_5)
label_Y.append(4)
print('There are %d C\n There are %d R\n There are %d H\n There are %d L\n There are %d Y' %(len(C), len(R), len(H), len(L), len(Y)))
image_list = np.hstack((C, R, H, L, Y))
label_list = np.hstack((label_C, label_R, label_H, label_L, label_Y))
temp = np.array([image_list, label_list])
# translate [file_dir, label], [file_dir, label]...
temp = temp.transpose()
np.random.shuffle(temp)
image_list = list(temp[:, 0])
label_list = list(temp[:, 1])
label_list = [int(float(i)) for i in label_list]
"""
n_sample = len(label_list)
n_val = int(math.ceil(n_sample * ratio))
n_train = n_sample - n_val
tra_iamges = image_list[0:n_train]
tra_labels = label_list[0:n_train]
tra_labels = [int(float(i)) for i in tra_labels]
val_images = image_list[n_train:-1]
val_labels = label_list[n_train:-1]
val_labels = [int(float(i)) for i in val_labels]
"""
return image_list, label_list
def get_batch(image, label, image_h, image_w, target_height, target_width, batch_size, capacity):
image = tf.cast(image, tf.string)
label = tf.cast(label, tf.int32)
input_queue = tf.train.slice_input_producer([image, label], num_epochs=None, shuffle=False)
label = input_queue[1]
# Tensor("input_producer/GatherV2_1:0", shape=(), dtype=int32)
image_contants = tf.read_file(input_queue[0])
# Tensor("ReadFile:0", shape=(), dtype=string)
image = tf.image.decode_jpeg(image_contants, channels=3)
image = tf.image.convert_image_dtype(image, dtype=tf.float32)#!!
image = tf.image.resize_images(image, [image_h, image_w])
image = tf.image.resize_image_with_crop_or_pad(image, target_height, target_width)
#image = tf.image.per_image_standardization(image)
image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=16, capacity=capacity)
#image_batch, label_batch = tf.train.shuffle_batch([image, label], batch_size=batch_size, capacity=capacity, min_after_dequeue=capacity - 1)
label_batch = tf.reshape(label_batch, [batch_size])
image_batch = tf.cast(image_batch, tf.float32)#??
return image_batch, label_batch
def valuation_batch(val_iamge, val_label, image_h, image_w, target_height, target_width, batch_size, capacity):
val_iamge = tf.cast(val_iamge, tf.string)
val_label = tf.cast(val_label, tf.int32)
input_queue = tf.train.slice_input_producer([val_iamge, val_label], num_epochs=None, shuffle=False)
val_label = input_queue[1]
image_contents = tf.read_file(input_queue[0])
val_iamge = tf.image.decode_jpeg(image_contents, channels=3)
val_iamge = tf.image.resize_images(val_iamge, [image_h, image_w])
val_iamge = tf.image.resize_image_with_crop_or_pad(val_iamge, target_height, target_width)#!!
image_batch, label_batch = tf.train.batch([val_iamge, val_label], batch_size=batch_size, num_threads=16, capacity=capacity)
val_label_batch = tf.reshape(label_batch, [batch_size])
val_image_batch = tf.cast(image_batch, tf.float32)
return val_image_batch, val_label_batch
The code for preloading data is above, and the code for training is below.
import os
import numpy as np
import tensorflow as tf
from pre import get_file, get_batch, valuation_batch
from nw import deep_CNN, losses, trainning, evaluation
n_class = 5
image_h = 260
image_w = 220
target_height = 300
target_width = 260
batch_size = 16
capacity = 256
capacity1 = 256
max_step = 200000
learning_rate = 0.001
train_dir = 'E:\\DeepLearn\\picture_train'
valuation_dir = 'E:\\DeepLearn\\picture_valuation'
logs_train_dir = 'E:\\DeepLearn\\Classify_picture\\self\\model\\train\\'
logs_valuation_dir = 'E:\\DeepLearn\\Classify_picture\\self\\model\\valuation\\'
train, train_label = get_file(train_dir)
valuation, valuation_label = get_file(valuation_dir)
train_batch, train_label_batch = get_batch(train, train_label, image_h, image_w, target_height, target_width, batch_size, capacity)
val_image_batch, val_label_batch = valuation_batch(valuation, valuation_label, target_height, target_width, batch_size, capacity1)
is_training = tf.placeholder(tf.bool, shape=(), name='is_training')
image_batch = tf.cond(is_training, lambda: train_batch, lambda: val_image_batch)
label_batch = tf.cond(is_training, lambda: train_label_batch, lambda: val_label_batch)
train_logits = deep_CNN(image_batch, batch_size, n_class)
train_loss = losses(train_logits, label_batch)
train_op = trainning(train_loss, learning_rate)
train_acc = evaluation(train_logits, label_batch)
summary_op = tf.summary.merge_all()
sess = tf.Session()
train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
valuation_writer = tf.summary.FileWriter(logs_valuation_dir)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess, coord)
try:
for step in np.arange(max_step):
if coord.should_stop():
break
_, summary_training, tra_loss, tra_acc= sess.run([train_op, summary_op, train_loss, train_acc], feed_dict={is_training: True})
summary_valuation, val_loss, val_acc = sess.run([summary_op, train_loss, train_acc], feed_dict={is_training: False})
if step % 100 == 0:
print('Step %d, train loss = %.3f, valuation loss = %.3f, train accuracy = %.2f%%, valuation accuracy = %.2f%%, learning rate is %f' % (step, tra_loss, val_loss, tra_acc * 100.0, val_acc * 100.0, _[1]))
checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
train_writer.add_summary(summary_training, step)
valuation_writer.add_summary(summary_valuation, step)
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
sess.close()

Applying Normalization to Inputs in Tensorflow

I have created a custom class to be an ML model, and it is working fine, but I would like to normalize the inputs as they have a wide range of values (e.g. 0, 20000, 500, 10, 8). Currently, as a way of normalizing the inputs, I'm applying lambda x: np.log(x + 1) to each input (the +1 is so it doesn't error out when 0 is passed in). Would a normalization layer be better than my current approach? If so, how would I go about implementing it? My code for the model is below:
class FollowModel:
def __init__(self, input_shape, output_shape, hidden_layers, input_labels, learning_rate=0.001):
tf.reset_default_graph()
assert len(input_labels) == input_shape[1], 'Incorrect number of input labels!'
# Placeholders for input and output data
self.input_labels = input_labels
self.input_shape = input_shape
self.output_shape = output_shape
self.X = tf.placeholder(shape=input_shape, dtype=tf.float64, name='X')
self.y = tf.placeholder(shape=output_shape, dtype=tf.float64, name='y')
self.hidden_layers = hidden_layers
self.learning_rate = learning_rate
# Variables for two group of weights between the three layers of the network
self.W1 = tf.Variable(np.random.rand(input_shape[1], hidden_layers), dtype=tf.float64)
self.W2 = tf.Variable(np.random.rand(hidden_layers, output_shape[1]), dtype=tf.float64)
# Create the neural net graph
self.A1 = tf.sigmoid(tf.matmul(self.X, self.W1))
self.y_est = tf.sigmoid(tf.matmul(self.A1, self.W2))
# Define a loss function
self.deltas = tf.square(self.y_est - self.y) # want this to be 0
self.loss = tf.reduce_sum(self.deltas)
# Define a train operation to minimize the loss
self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)
#initialize
self.model_init = tf.global_variables_initializer()
self.trained = False
def train(self, Xtrain, ytrain, Xtest, ytest, training_steps, batch_size, print_progress=True):
#intiialize session
self.trained = True
self.training_steps = training_steps
self.batch_size = batch_size
self.sess = tf.Session()
self.sess.run(self.model_init)
self.losses = []
self.accs = []
self.testing_accuracies = []
for i in range(training_steps*batch_size):
self.sess.run(self.optimizer, feed_dict={self.X: Xtrain, self.y: ytrain})
local_loss = self.sess.run(self.loss, feed_dict={self.X: Xtrain.values, self.y: ytrain.values})
self.losses.append(local_loss)
self.weights1 = self.sess.run(self.W1)
self.weights2 = self.sess.run(self.W2)
y_est_np = self.sess.run(self.y_est, feed_dict={self.X: Xtrain.values, self.y: ytrain.values})
correct = [estimate.argmax(axis=0) == target.argmax(axis=0)
for estimate, target in zip(y_est_np, ytrain.values)]
acc = 100 * sum(correct) / len(correct)
self.accs.append(acc)
if i % batch_size == 0:
batch_num = i / batch_size
if batch_num % 5 == 0:
self.testing_accuracies.append(self.test_accuracy(Xtest, ytest, False, True))
temp_table = pd.concat([Xtrain, ytrain], axis=1).sample(frac=1)
column_names = list(temp_table.columns.values)
X_columns, y_columns = column_names[0:len(column_names) - 2], column_names[len(column_names) - 2:]
Xtrain = temp_table[X_columns]
ytrain = temp_table[y_columns]
if print_progress: print('Step: %d, Accuracy: %.2f, Loss: %.2f' % (int(i/batch_size), acc, local_loss))
if print_progress: print("Training complete!\nloss: {}, hidden nodes: {}, steps: {}, epoch size: {}, total steps: {}".format(int(self.losses[-1]*100)/100, self.hidden_layers, training_steps, batch_size, training_steps*batch_size))
self.follow_accuracy = acc
return acc
def test_accuracy(self, Xtest, ytest, print_progress=True, return_accuracy=False):
if self.trained:
X = tf.placeholder(shape=Xtest.shape, dtype=tf.float64, name='X')
y = tf.placeholder(shape=ytest.shape, dtype=tf.float64, name='y')
W1 = tf.Variable(self.weights1)
W2 = tf.Variable(self.weights2)
A1 = tf.sigmoid(tf.matmul(X, W1))
y_est = tf.sigmoid(tf.matmul(A1, W2))
# Calculate the predicted outputs
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
y_est_np = sess.run(y_est, feed_dict={X: Xtest, y: ytest})
correctly_followed = 0
incorrectly_followed = 0
missed_follows = 0
correctly_skipped = 0
for estimate, actual in zip(y_est_np, ytest.values):
est = estimate.argmax(axis=0)
# print(estimate)
actual = actual.argmax(axis=0)
if est == 1 and actual == 0: incorrectly_followed += 1
elif est == 1 and actual == 1: correctly_followed += 1
elif est == 0 and actual == 1: missed_follows += 1
else: correctly_skipped += 1
# correct = [estimate.argmax(axis=0) == target.argmax(axis=0) for estimate, target in zip(y_est_np, ytest.values)]
total_followed = incorrectly_followed + correctly_followed
total_correct = correctly_followed + correctly_skipped
total_incorrect = incorrectly_followed + missed_follows
try: total_accuracy = int(total_correct * 10000 / (total_correct + total_incorrect)) / 100
except: total_accuracy = 0
total_skipped = correctly_skipped + missed_follows
try: follow_accuracy = int(correctly_followed * 10000 / total_followed) / 100
except: follow_accuracy = 0
try: skip_accuracy = int(correctly_skipped * 10000 / total_skipped) / 100
except: skip_accuracy = 0
if print_progress: print('Correctly followed {} / {} ({}%), correctly skipped {} / {} ({}%)'.format(
correctly_followed, total_followed, follow_accuracy, correctly_skipped, total_skipped, skip_accuracy))
self.follow_accuracy = follow_accuracy
if return_accuracy:
return total_accuracy
else:
print('The model is not trained!')
def make_prediction_on_normal_data(self, input_list):
assert len(input_list) == len(self.input_labels), 'Incorrect number of inputs (had {} should have {})'.format(len(input_list), len(self.input_labels))
# from ProcessData import normalize_list
# normalize_list(input_list)
input_array = np.array([input_list])
X = tf.placeholder(shape=(1, len(input_list)), dtype=tf.float64, name='X')
y = tf.placeholder(shape=(1, 2), dtype=tf.float64, name='y')
W1 = tf.Variable(self.weights1)
W2 = tf.Variable(self.weights2)
A1 = tf.sigmoid(tf.matmul(X, W1))
y_est = tf.sigmoid(tf.matmul(A1, W2))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
y_est_np = sess.run(y_est, feed_dict={X: input_array, y: self.create_blank_outputs()})
predicted_value = y_est_np[0].argmax(axis=0)
return predicted_value
def make_prediction_on_abnormal_data(self, input_list):
from ProcessData import normalize_list
normalize_list(input_list)
return self.make_prediction_on_normal_data(input_list)
def create_blank_outputs(self):
blank_outputs = np.zeros(shape=(1,2), dtype=np.int)
for i in range(len(blank_outputs[0])):
blank_outputs[0][i] = float(blank_outputs[0][i])
return blank_outputs
I don't see see why you want to create a layer that does that. The common practice of preprocessing your inputs is as you are currently doing.
Using the log operator is quite common for skewed data, but there are other preprocessing solutions such as sklearn's MinMaxScaler and StandardScaler
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
Those are just examples of two other ways to scale your data.
There is such a thing called BatchNorm but it is not recommended as the first layer of the network as distribution of the data is fixed and doesn’t vary during training.

TensorFlow tf.train.Saver() not working on tf.contrib.layers.fully_connected()

So I wrote this generalised TensorFlow code and want to save and restore models. But apparently the error is that there is no variables to save. I did everything as given in this official example. Ignore the __init__ method except the last line, since it only takes relevant parameters to train the model with, also there is no Syntax Errors. The error it produces is given below the code.
class Neural_Network(object):
def __init__(self, numberOfLayers, nodes, activations, learningRate,
optimiser = 'GradientDescent', regularizer = None,
dropout = 0.5, initializer = tf.contrib.layers.xavier_initializer()):
self.numberOfLayers = numberOfLayers
self.nodes = nodes
self.activations = activations
self.learningRate = learningRate
self.regularizer = regularizer
self.dropout = dropout
self.initializer = initializer
if(optimiser == 'GradientDescent'):
self.optimiser = tf.train.GradientDescentOptimizer(self.learningRate)
elif(optimiser == 'AdamOptimiser'):
self.optimiser = tf.train.AdamOptimizer(self.learningRate)
self.saver = tf.train.Saver()
def create_Neural_Net(self, numberOfFeatures):
self.numberOfFeatures = numberOfFeatures
self.X = tf.placeholder(dtype = tf.float32, shape = (None, self.numberOfFeatures), name = 'Input_Dataset')
#self.output = None
for i in range(0, self.numberOfLayers):
if(i == 0):
layer = tf.contrib.layers.fully_connected(self.X, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
elif(i == self.numberOfLayers-1):
self.output = tf.contrib.layers.fully_connected(layer, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
else:
layer = tf.contrib.layers.fully_connected(layer, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
def train_Neural_Net(self, dataset, labels, epochs):
entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = self.output, labels = labels, name = 'cross_entropy')
loss = tf.reduce_mean(entropy, name = 'loss')
hypothesis = tf.nn.softmax(self.output)
correct_preds = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
train_op = self.optimiser.minimize(loss)
self.loss=[]
self.accuracy = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(0, epochs):
_, l, acc = sess.run([train_op, loss, accuracy], feed_dict = {self.X:dataset})
print('Loss in epoch ' + str(i) + ' is: ' + str(l))
self.loss.append(l)
self.accuracy.append(acc)
self.saver.save(sess, './try.ckpt')
return self.loss, self.accuracy
And ran this code as:
nn = Neural_Network(2, [20,3], [tf.nn.relu, tf.nn.relu], 0.001, optimiser = 'AdamOptimiser')
nn.create_Neural_Net(4)
nn.train_Neural_Net(dataset, labels, 1000)
The error it gives is:
ValueError: No variables to save
So what is wrong in this code? And how can I fix it?

TensorFlow: How come my accuracy converges to 0?

I am trying to train a neural net on a binary dataset I have. The accuracy starts at 0.064, goes to 0.08 by the 10th epoch, and then converges to 0 by the 60th epoch. Can anyone please offer some advice as to what I am doing wrong in my implementation?
import tensorflow as tf
import numpy as np
import math
'''
input -> weight -> hidden layer 1 (activation funtion) -> weights -> hidden layer 2 -> ... -> weights -> output layer
^feed-forward
compare output to intended output = cost function (cross entropy)
optimization function (optimizer) -> minimize cost (AdamOptimizer, AdaGrad, and others)
^backpropogation
feed-forward + backpropogation = epoch (1 cycle)
'''
def nnModel(data, keep_prob):
hidden1 = {'weights':tf.Variable(tf.random_normal([len(trainX[0]),hl1Nodes])),
'biases':tf.Variable(tf.random_normal([hl1Nodes]))}
hidden2 = {'weights':tf.Variable(tf.random_normal([hl1Nodes,hl2Nodes])),
'biases':tf.Variable(tf.random_normal([hl2Nodes]))}
hidden3 = {'weights':tf.Variable(tf.random_normal([hl2Nodes,hl3Nodes])),
'biases':tf.Variable(tf.random_normal([hl3Nodes]))}
hidden4 = {'weights':tf.Variable(tf.random_normal([hl3Nodes,hl4Nodes])),
'biases':tf.Variable(tf.random_normal([hl4Nodes]))}
output = {'weights':tf.Variable(tf.random_normal([hl4Nodes,numClasses])),
'biases':tf.Variable(tf.random_normal([numClasses]))}
#(inputData * weight) + bias
l1 = tf.add(tf.matmul(data,hidden1['weights']),hidden1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1,hidden2['weights']),hidden2['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2,hidden3['weights']),hidden3['biases'])
l3 = tf.nn.relu(l3)
l4 = tf.add(tf.matmul(l3,hidden4['weights']),hidden4['biases'])
l4 = tf.nn.relu(l4)
drop_out = tf.nn.dropout(l4, keep_prob)
theOutput = tf.matmul(drop_out, output['weights']) + output['biases']
return theOutput
def shuffle(a, b):
assert len(a) == len(b)
p = np.random.permutation(len(a))
return a[p], b[p]
def nnTrain(inputData, keep_prob, x1, y1, x2, y2):
prediction = nnModel(inputData, keep_prob)
print("8787****l",prediction)
print(y1)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y))
optimizer = tf.train.AdagradOptimizer(.0008).minimize(cost)
numEpochs = 200
numRecords = x1.shape[0]
great20 = False
indexMult = math.floor(numRecords/10)
interval = math.floor(numEpochs/10)
trainToTestIndices = []
for i in range(10):
trainToTestIndices.append(i*indexMult)
trainToTestIndices.append(numRecords-1)
print(trainToTestIndices)
testX = x2 #[trainToTestIndices[0]:trainToTestIndices[1]][:]
testY = y2 #[trainToTestIndices[0]:trainToTestIndices[1]][:]
trainX = x1 #[trainToTestIndices[1]:][:]
trainY = y1 #[trainToTestIndices[1]:][:]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
cntCrosses = 0
for epoch in range(numEpochs):
print("8787****l",prediction)
# k = 0
# while k < epoch:
#
# if epoch > 0 and epoch % 20 == 0:
# cntCrosses += 1
#
# testX = np.zeros((trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses], x1.shape[1]))
# testY = np.zeros((trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses], y1.shape[1]))
# trainX = np.zeros(((trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]+trainToTestIndices[-1]-trainToTestIndices[cntCrosses+1]+1), x1.shape[1]))
# trainY = np.zeros(((trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]+trainToTestIndices[-1]-trainToTestIndices[cntCrosses+1]+1), y1.shape[1]))
#
# testX[:(trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses])][:] = x1[trainToTestIndices[cntCrosses]:trainToTestIndices[cntCrosses+1]][:]
# testY[:(trainToTestIndices[cntCrosses+1]-trainToTestIndices[cntCrosses])][:] = y1[trainToTestIndices[cntCrosses]:trainToTestIndices[cntCrosses+1]][:]
# trainX[:(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1])][:] = x1[trainToTestIndices[cntCrosses-1]:trainToTestIndices[cntCrosses]][:]
# trainY[:(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1])][:] = y1[trainToTestIndices[cntCrosses-1]:trainToTestIndices[cntCrosses]][:]
# trainX[(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]):][:] = x1[trainToTestIndices[cntCrosses+1]:][:]
# trainY[(trainToTestIndices[cntCrosses]-trainToTestIndices[cntCrosses-1]):][:] = y1[trainToTestIndices[cntCrosses+1]:][:]
#
# great20 = True
epochCost = 0
# trainX2, trainY2 = shuffle(trainX, trainY)
# testX2, testY2 = shuffle(testX, testY)
i=0
while i < trainX.shape[0]:
start = i
end = i+numBatches
# if not great20:
batchX=np.array(trainX[start:end])
batchY=np.array(trainY[start:end])
# if great20:
# batchX=np.array(trainX2[start:end])
# batchY=np.array(trainY2[start:end])
_, c = sess.run([optimizer, cost], feed_dict = {x: batchX, y: batchY, keep_prob:0.50})
epochCost += c
i += numBatches
print('Epoch', epoch, 'completed out of', numEpochs, 'loss', epochCost)
correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
# if great20:
print('Accuracy', accuracy.eval({x:testX, y:testY, keep_prob : 1.0}))
# correct = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
# accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
# print('Accuracy', accuracy.eval({x:testX, y:testY}))
if __name__ == '__main__':
keep_prob = tf.placeholder(tf.float32)
trainX_ = np.loadtxt("newBTRAIN.csv", delimiter=',')
global trainX
global trainY
trainX = trainX_[125:,:]
trainY_ = np.loadtxt("yT.csv", delimiter=',')
trainY = trainY_[125:,:]
testX = trainX_[:125, :]
testY = trainY_[:125, :]
hl1Nodes = 500
hl2Nodes = 500
hl3Nodes = 500
hl4Nodes = 500
numClasses = trainY.shape[1]
print(numClasses)
numBatches = 100
x = tf.placeholder('float', [None, len(trainX[0])])
y = tf.placeholder('float')
nnTrain(x, keep_prob, trainX, trainY, testX, testY)

Categories

Resources