Related
I have a GAN that aims to reproduce the paper CycleGAN. However, even though everything is implemented correctly (as for some basic dataset it works), with the Facade Dataset the discriminator predicts the following:
print(f"Segm: {tf.reduce_mean(discriminator_segm(segmented_test[0:10])).numpy()}, {tf.reduce_mean(discriminator_segm(generator_real_to_segm(real_test[0:10]))).numpy()}")
print(f"Real: {tf.reduce_mean(discriminator_real(real_test[0:10])).numpy()}, {tf.reduce_mean(discriminator_real(generator_segm_to_real(segmented_test[0:10]))).numpy()}")
# Segm: 0.9463781714439392, 0.9564124941825867
# Real: 0.9635561108589172, 0.8240727782249451
In other words, it considers the generated output as good as the ground truth
This makes no sense, as the best guess, if the generated images were identical to the ground truth (which are not, are very far from being similar), should be to predict 0.5
At this point, I tried to train the discriminator many more times than the generator, with no difference (still get the accuracy near 90%)
If you need, even though it's a bit long, this is the training loop:
def train(EPOCHS = 400, batch_size = 4):
real_train_batched = real_train.reshape((-1, batch_size, *real_train.shape[1:]))
segmented_train_batched = segmented_train.reshape((-1, batch_size, *segmented_train.shape[1:]))
for e in range(EPOCHS):
print(f"Epoch: {e+1}/{EPOCHS}")
gen_perm = lambda : np.random.permutation(range(len(real_train_batched)))
perm = gen_perm()
real_train_batched_gen = real_train_batched[perm]
segmented_train_batched_gen = segmented_train_batched[perm]
real_train_batched_disc = real_train_batched[perm]
segmented_train_batched_disc = segmented_train_batched[perm]
pack = zip(real_train_batched_gen, segmented_train_batched_gen, real_train_batched_disc, segmented_train_batched_disc)
for i, (b_real_gen, b_segm_gen, b_real_disc, b_segm_disc) in enumerate(pack):
print(".", end="")
let_the_magic_happen(b_segm_gen, b_real_gen, b_segm_disc, b_real_disc, alpha=tf.constant(0.00000005))
#tf.function
def let_the_magic_happen_discriminator(batch_segmented_gen, batch_real_gen, batch_segmented_disc, batch_real_disc):
fake_real_images = generator_segm_to_real(batch_segmented_gen, training = False)
fake_segmented_images = generator_real_to_segm(batch_real_gen, training = False)
with tf.GradientTape(persistent=True) as disc_tape:
real_real_pred = discriminator_real(batch_real_disc, training=True)
real_fake_pred = discriminator_real(fake_real_images, training=True)
segm_real_pred = discriminator_segm(batch_segmented_disc, training=True)
segm_fake_pred = discriminator_segm(fake_segmented_images, training=True)
segm_disc_loss = discriminator_loss(segm_real_pred, segm_fake_pred)
real_disc_loss = discriminator_loss(real_real_pred, real_fake_pred)
segm_disc_grad = disc_tape.gradient(segm_disc_loss, discriminator_segm.trainable_weights)
real_disc_grad = disc_tape.gradient(real_disc_loss, discriminator_real.trainable_weights)
segm_disc_optimizer.apply_gradients(zip(segm_disc_grad , discriminator_segm.trainable_weights))
real_disc_optimizer.apply_gradients(zip(real_disc_grad , discriminator_real.trainable_weights))
#tf.function
def let_the_magic_happen_generator(batch_segmented_gen, batch_real_gen, alpha):
with tf.GradientTape(persistent=True) as gen_tape:
fake_real_images = generator_segm_to_real(batch_segmented_gen, training = True)
fake_segmented_images = generator_real_to_segm(batch_real_gen, training = True)
fake_real_images_pred = discriminator_real(fake_real_images, training=False)
fake_segmented_images_pred = discriminator_segm(fake_segmented_images, training=False)
batch_real_gen_reconstruction = generator_segm_to_real(fake_segmented_images, training=True)
batch_segmented_gen_reconstruction = generator_real_to_segm(fake_real_images, training=True)
reconstruction_loss = alpha * (
tf.keras.losses.MeanAbsoluteError()(batch_segmented_gen, batch_segmented_gen_reconstruction) +
tf.keras.losses.MeanAbsoluteError()(batch_real_gen, batch_real_gen_reconstruction)
)
# tf.print(reconstruction_loss)
# tf.print(generator_loss(fake_real_images_pred))
# tf.print(generator_loss(fake_segmented_images_pred))
segm_to_real_gen_loss = generator_loss(fake_real_images_pred) + reconstruction_loss
real_to_segm_gen_loss = generator_loss(fake_segmented_images_pred) + reconstruction_loss
real_to_segm_gen_grad = gen_tape.gradient(real_to_segm_gen_loss, generator_real_to_segm.trainable_weights)
segm_to_real_gen_grad = gen_tape.gradient(segm_to_real_gen_loss, generator_segm_to_real.trainable_weights)
real_to_segm_gen_optimizer.apply_gradients(zip(real_to_segm_gen_grad , generator_real_to_segm.trainable_weights))
segm_to_real_gen_optimizer.apply_gradients(zip(segm_to_real_gen_grad , generator_segm_to_real.trainable_weights))
def let_the_magic_happen(batch_segmented_gen, batch_real_gen, batch_segmented_disc, batch_real_disc, alpha):
let_the_magic_happen_discriminator(batch_segmented_gen, batch_real_gen, batch_segmented_disc, batch_real_disc)
if tf.random.uniform([1]) < 0.2 :
let_the_magic_happen_generator(batch_segmented_gen, batch_real_gen, alpha = alpha)
let_the_magic_happen_discriminator(batch_segmented_gen, batch_real_gen, batch_segmented_disc, batch_real_disc)
#tf.function
def generator_loss(fake_pred) :
return tf.keras.losses.BinaryCrossentropy()(tf.ones_like(fake_pred), fake_pred)
#tf.function
def discriminator_loss(real_pred, fake_pred):
return tf.keras.losses.BinaryCrossentropy()(
tf.concat((tf.ones_like(real_pred) - 5e-2, tf.zeros_like(fake_pred)),axis=0),
tf.concat((real_pred, fake_pred),axis=0),
)
The generators are 2 U-Net where the discriminator are 2 LeNet (also tried with PatchGAN, same thing happens)
I've also already tried with MAE instead of BCE
I have already trained a model with this structure using first order optimization. The code is below:
def build_model():
input_layer1 = Input((dataX.shape[1],))
input_layer2 = Input((dataT1.shape[1],))
input_layer3 = Input((dataT2.shape[1],))
input_layer4 = Input((dataT3.shape[1],))
input_layer5 = Input((dataT4.shape[1],))
#continuous layers
hidden_1 = K.layers.Dense(64, activation='tanh')(input_layer1)
hidden_2 = K.layers.Dense(64, activation='tanh')(hidden_1)
hidden_3 = K.layers.Dense(64, activation='tanh')(hidden_2)
hidden_4 = K.layers.Dense(64, activation='tanh')(hidden_3)
#categorical layers and merging
hidden_5 = K.layers.Dense(64, activation='tanh')(input_layer2)
hidden_6 = K.layers.Dense(64, activation='tanh')(input_layer3)
hidden_7 = K.layers.Dense(64, activation='tanh')(input_layer4)
hidden_8 = K.layers.Dense(64, activation='tanh')(input_layer5)
merged1 = merge.concatenate([hidden_5, hidden_6])
merged2 = merge.concatenate([hidden_7, hidden_8])
merged3 = merge.concatenate([merged1, merged2])
hidden_9 = K.layers.Dense(64, activation='tanh')(merged3)
hidden_10 = K.layers.Dense(32, activation='tanh')(hidden_9)
hidden_11 = K.layers.Dense(16, activation='tanh')(hidden_10)
hidden_12 = K.layers.Dense(8, activation='tanh')(hidden_11)
hidden_13 = K.layers.Dense(4, activation='tanh')(hidden_12)
hidden_14 = K.layers.Dense(2, activation='tanh',name='2D')(hidden_13)
#completely merged kayers
merged_layers = merge.concatenate([hidden_4, hidden_14])
merged=K.layers.Dense(64, activation='tanh')(merged_layers)
merged1=K.layers.Dense(64, activation='tanh')(merged)
merged2=K.layers.Dense(64, activation='tanh')(merged1)
merged3=K.layers.Dense(64, activation='tanh')(merged2)
output=K.layers.Dense(1, activation='tanh')(merged3)
model = Model(inputs=[input_layer1,input_layer2,input_layer3,input_layer4,input_layer5], outputs=output)
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return model
The optimizers offered by TensorFlow are all first order. I want to use second-order optimization and was wondering how I would do this? All the examples I have looked at describe a way for sequential models with second order optimization such as L-BFGS but not functional models.
I have a CNN model which I run on the dataset which is linked here for viewing : data
I have tried using sensitivity and specificity provided by Keras and also tried the one using scikit learn. I want help to understand if something is wrong with my code? I understand the model performance depends on a lot of things but I want to know if there is something wrong with the code that is giving me sensitivity different for Keras and different for scikit learn. Also, there are misclassifications. how can I improve my model results?
My code looks like below:
import numpy as np
import pandas as pd
import os
import tensorflow as tf
#import keras as k
from IPython.display import display
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import plot_model
#from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import TimeDistributed
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn import metrics
def confusion_metrics (conf_matrix):
# save confusion matrix and slice into four pieces
TP = conf_matrix[1][1]
TN = conf_matrix[0][0]
FP = conf_matrix[0][1]
FN = conf_matrix[1][0]
print('True Positives:', TP)
print('True Negatives:', TN)
print('False Positives:', FP)
print('False Negatives:', FN)
# calculate accuracy
conf_accuracy = (float (TP+TN) / float(TP + TN + FP + FN))
# calculate mis-classification
conf_misclassification = 1- conf_accuracy
# calculate the sensitivity
conf_sensitivity = (TP / float(TP + FN))
# calculate the specificity
conf_specificity = (TN / float(TN + FP))
# calculate precision
conf_precision = (TN / float(TN + FP))
# calculate f_1 score
conf_f1 = 2 * ((conf_precision * conf_sensitivity) / (conf_precision + conf_sensitivity))
print('-'*50)
print(f'Accuracy: {round(conf_accuracy,2)}')
print(f'Mis-Classification: {round(conf_misclassification,2)}')
print(f'Sensitivity: {round(conf_sensitivity,2)}')
print(f'Specificity: {round(conf_specificity,2)}')
print(f'Precision: {round(conf_precision,2)}')
print(f'f_1 Score: {round(conf_f1,2)}')
def og_build_model_less_layer(n_rows,n_cols):
ecg_input = Input(shape=(n_cols,n_rows), name='ecg_signal')
print('model_input shape:' , ecg_input.shape)
c1 = Conv1D(80, 2,name = 'conv_1',kernel_initializer="glorot_uniform")(ecg_input)
b1 = BatchNormalization(name = 'BN_1')(c1) #a1 = Activation('relu')(b1)
d1 = Dropout(0.4,name = 'drop_1')(b1)
c2 = Conv1D(80,2,name = 'conv_2',kernel_initializer="glorot_uniform")(d1)
b2 = BatchNormalization(name = 'BN_2')(c2)
d2 = Dropout(0.6,name = 'drop_2')(b2)
c3 = Conv1D(80, 2,name = 'conv_3',kernel_initializer="glorot_uniform")(d2)
b3 = BatchNormalization(name = 'BN_3')(c3)
d3 = Dropout(0.4,name = 'drop_3')(b3)
c4 = Conv1D(80, 2,name = 'conv_4',kernel_initializer="glorot_uniform")(d3)
b4 = BatchNormalization(name = 'BN_4')(c4)
d4 = Dropout(0.6,name = 'drop_4')(b4)
c5 = Conv1D(80, 2,name = 'conv_5',kernel_initializer="glorot_uniform")(d4)
b5 = BatchNormalization(name = 'BN_5')(c5)
d5 = Dropout(0.5,name = 'drop_5')(b5)
fl = Flatten(name='fl')(d5)
den1 = Dense(256,name='den1')(fl)
den = Dense(30,name='den2')(den1)
drp = Dropout(0.5)(den)
output = Dense(1, activation='sigmoid')(drp)
opt = Adam(learning_rate=1e-4)
sens = tf.keras.metrics.SensitivityAtSpecificity(0.15)
spec = tf.keras.metrics.SpecificityAtSensitivity(0.15)
model = Model(inputs=ecg_input, outputs=output, name='model')
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy',sens,spec])
print(model.summary)
return model
train_df = pd.read_pickle('data/train_ecg_gl.pkl')
train_df = train_df.dropna()
train_df = train_df.sort_values(by='Time', ascending=True)#, na_position='first')
test_df = pd.read_pickle('data/test_ecg_gl.pkl')
test_df = test_df.dropna()
test_df = test_df.sort_values(by='Time', ascending=True)
df = pd.concat([train_df, test_df], ignore_index=True)
df = df.sort_values(by='Time')
data = df.iloc[:,1:161].values
data=data[...,None]
labels = df['hypo_label'].values
train_data = train_df.iloc[:,1:161].values
train_data=train_data[...,None]
train_labels = train_df['hypo_label'].values
test_data = test_df.iloc[:,1:161].values
test_data=test_data[...,None]
test_labels = test_df['hypo_label'].values
xtrain,ytrain = train_data,train_labels
xtest,ytest = test_data,test_labels
n_cols = data.shape[2]
n_rows = data.shape[1]
batch_size,lr , verbose , epochs , val_split = 45 ,0.01, 1, 40, 0.1
early_stopping_callback = EarlyStopping(monitor = 'loss', patience = 10, mode = 'min', restore_best_weights = True)
cb_lr_reducer = ReduceLROnPlateau(monitor='loss', factor= 0.1, patience=10, min_lr= 1e-5)
model = og_build_model_less_layer(n_cols,n_rows)
model.fit(x = xtrain, y = ytrain, epochs=epochs,verbose=verbose,batch_size=batch_size,validation_split=val_split, shuffle=False,callbacks=[cb_lr_reducer, early_stopping_callback])
_, taccuracy,tsensitivity,tspecificity = model.evaluate(xtest, ytest, batch_size=batch_size, verbose=verbose)
print('Model Test 0.7*0.3 Accuracy:' , taccuracy)
print('Model Test 0.7*0.3 sensitivity:' , tsensitivity)
print('Model Test 0.7*0.3 specificity:' , specificity)
y_pred = model.predict(xtest)
y_pred = y_pred.flatten()
print(y_pred)
#print(p_pred.round(2))
# extract the predicted class labels
y_pred = np.where(y_pred < 0.5, 0, 1)
# Creating the confusion matrix
cm = metrics.confusion_matrix(ytest, y_pred)
# Assigning columns names
cm_df = pd.DataFrame(cm, columns = ['Predicted Negative', 'Predicted Positive'],index = ['Actual Negative', 'Actual Positive'])
# Showing the confusion matrix
print(cm_df)
confusion_metrics(cm)
After I run this code for the data linked here, it gives me following output:
Model Test 0.7*0.3 Accuracy: 0.654349148273468
Model Test 0.7*0.3 sensitivity: 0.9166133999824524
Model Test 0.7*0.3 specificity: 0.9982390403747559
##################################################
##################################################
##################################################
[0.00757153 0.00837034 0.02366774 ... 0.5926605 0.59990513 0.56060743]
Predicted Negative Predicted Positive
Actual Negative 29073 2160
Actual Positive 14531 1107
True Positives: 1107
True Negatives: 29073
False Positives: 2160
False Negatives: 14531
--------------------------------------------------
Accuracy: 0.64
Mis-Classification: 0.36
Sensitivity: 0.07
Specificity: 0.93
Precision: 0.93
f_1 Score: 0.13
The performance metrics improved when I used my custom validation set which is a stratified split of 80-20 from training.
I am trying to get the gradient with respect to a specific images (sitting in adv_loader). These images are loaded in adv_loader. I tried just taking the code that was calculating the gradient in the backprop step but Yolo3 doesn’t seem to have a grad attribute. Any ideas on how to get that?
import mxnet as mx
import gluoncv as gcv
import numpy as np
import matplotlib.pyplot as plt
from mxnet import gluon
from gluoncv.loss import YOLOV3Loss
import time
ctx = [mx.gpu(i) for i in range(mx.context.num_gpus())] if mx.context.num_gpus()>0 else [mx.cpu()]
#Datasets
train = gcv.data.RecordFileDetection('./data/train.rec',coord_normalized=False)
val = gcv.data.RecordFileDetection('./data/val.rec',coord_normalized=False)
classes = ['Passenger Vehicle','Small Car','Bus']
net = gcv.model_zoo.yolo3_mobilenet0_25_custom(pretrained_base=False,classes=classes,ctx=ctx)
net.collect_params().initialize(force_reinit=True,ctx=ctx)
import multiprocessing as mp
### The following looks nasty, and I apologize for the difficulty but basically what we are doing is
# transforming our data so it is in the correct format for yolo3
batch_size = 32 #This can be changed, but it determines how often we update our model.
num_workers = mp.cpu_count()//2
### We will import the following to reduce what i need to type (I am not sure about you but I like being lazy)
from mxnet import autograd
### 416 is our width/height we want the network to train on
sizes = 328
train_transform = gcv.data.transforms.presets.yolo.YOLO3DefaultTrainTransform(sizes,sizes, net)
# return stacked images, center_targets, scale_targets, gradient weights, objectness_targets, class_targets
# additionally, return padded ground truth bboxes, so there are 7 components returned by dataloader
batchify_fn = gcv.data.batchify.Tuple(*([gcv.data.batchify.Stack() for _ in range(6)] + [gcv.data.batchify.Pad(axis=0, pad_val=-1) for _ in range(1)]))
train_loader = mx.gluon.data.DataLoader(train.transform(train_transform), batch_size, shuffle=True,
batchify_fn=batchify_fn, last_batch='rollover',num_workers=num_workers,prefetch=num_workers + num_workers//2)
val_batchify_fn = gcv.data.batchify.Tuple(gcv.data.batchify.Stack(), gcv.data.batchify.Pad(pad_val=-1))
val_transform = gcv.data.transforms.presets.yolo.YOLO3DefaultValTransform(sizes,sizes)
val_loader = mx.gluon.data.DataLoader(
val.transform(val_transform),
batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep',num_workers=num_workers,prefetch=num_workers + num_workers//2)
adv_loader = mx.gluon.data.DataLoader(train.transform(train_transform), batch_size, shuffle=True,
batchify_fn=batchify_fn, last_batch='rollover',num_workers=num_workers,prefetch=num_workers + num_workers//2) #this is a placeholder for data that will generate advarsaial examples
#How we will validate our model
def validate(net, val_data, ctx, eval_metric):
"""Test on validation dataset."""
eval_metric.reset()
# set nms threshold and topk constraint
net.set_nms(nms_thresh=0.45, nms_topk=455)
mx.nd.waitall()
net.hybridize()
for batch in val_data:
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
label = mx.gluon.utils.split_and_load(batch[1], ctx_list=ctx,batch_axis=0, even_split=False)
det_bboxes = []
det_ids = []
det_scores = []
gt_bboxes = []
gt_ids = []
gt_difficults = []
for x, y in zip(data, label):
# get prediction results
ids, scores, bboxes = net(x)
det_ids.append(ids)
det_scores.append(scores)
# clip to image size
det_bboxes.append(bboxes.clip(0, batch[0].shape[2]))
# split ground truths
gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))
gt_difficults.append(y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None)
# update metric
eval_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficults)
return eval_metric.get()
eval_metric = gcv.utils.metrics.voc_detection.VOCMApMetric(class_names=classes)
nepochs=11
net.initialize(force_reinit=True)
net.collect_params().reset_ctx(ctx)
#Grab a trainer or optimizer to perform the optimization
trainer = mx.gluon.Trainer(net.collect_params(),'adam',
{'learning_rate':0.001},
kvstore='device')
for i in range(nepochs):
now = time.time()
mx.nd.waitall()
net.hybridize(static_alloc=True,static_shape=True)
for ixl,batch in enumerate(train_loader):
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
# objectness, center_targets, scale_targets, weights, class_targets
fixed_targets = [mx.gluon.utils.split_and_load(batch[it], ctx_list=ctx, batch_axis=0) for it in range(1, 6)]
gt_boxes = mx.gluon.utils.split_and_load(batch[6], ctx_list=ctx, batch_axis=0)
sum_losses = []
with autograd.record():
for ix, x in enumerate(data):
obj_loss, center_loss, scale_loss, cls_loss = net(x, gt_boxes[ix], *[ft[ix] for ft in fixed_targets])
sum_losses.append(obj_loss + center_loss + scale_loss + cls_loss)
autograd.backward(sum_losses)
trainer.step(batch_size)
for ixl,batch in enumerate(adv_loader):
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
# objectness, center_targets, scale_targets, weights, class_targets
fixed_targets = [mx.gluon.utils.split_and_load(batch[it], ctx_list=ctx, batch_axis=0) for it in range(1, 6)]
gt_boxes = mx.gluon.utils.split_and_load(batch[6], ctx_list=ctx, batch_axis=0)
sum_losses = []
with autograd.record():
for ix, x in enumerate(data):
obj_loss, center_loss, scale_loss, cls_loss = net(x, gt_boxes[ix], *[ft[ix] for ft in fixed_targets])
sum_losses.append(obj_loss + center_loss + scale_loss + cls_loss)
autograd.backward(sum_losses,retain_graph=True)
print(net.grad.asnumpy())
I am getting told here that AttributeError: 'YOLOV3' object has no attribute 'grad'. I am using this documentation https://mxnet.apache.org/versions/1.6/api/python/docs/tutorials/packages/autograd/index.html
Any idea how to get an image and pass it through to the gradient of the yolo loss function?
I have been using TensorFlow for a reasonable length of time now. and believed I had a thorough understanding of how a TensorFlow graph works and executes within a session. However, I have written all of my TensorFlow models in a script-like fashion as such:
import tensorflow as tf
import DataWorker
import Constants
x = tf.placeholder(tf.float32, [None, Constants.sequenceLength, DataWorker.numFeatures])
y = tf.placeholder(tf.float32, [None, 1])
xTensors = tf.unstack(x, axis=1) # [seqLength tensors of shape (batchSize, numFeatures)]
W = tf.Variable(tf.random_normal([Constants.numHidden, 1])) # Weighted matrix
b = tf.Variable(tf.random_normal([1])) # Bias
cell = tf.contrib.rnn.BasicLSTMCell(Constants.numHidden, forget_bias=Constants.forgetBias)
outputs, finalState = tf.nn.static_rnn(cell, xTensors, dtype=tf.float32)
# predictions = [tf.add(tf.matmul(output, W), b) for output in outputs] # List of predictions after each time step
prediction = tf.add(tf.matmul(outputs[-1], W), b) # Prediction after final time step
prediction = tf.tanh(prediction) # Activation
mse = tf.losses.mean_squared_error(predictions=prediction, labels=y) # Mean loss over entire batch
accuracy = tf.reduce_mean(1 - (tf.abs(y - prediction) / DataWorker.labelRange)) # Accuracy over entire batch
optimiser = tf.train.AdamOptimizer(Constants.learningRate).minimize(mse) # Backpropagation
with tf.Session() as session:
session.run(tf.global_variables_initializer())
# #############################################
# TRAINING
# #############################################
for epoch in range(Constants.numEpochs):
print("***** EPOCH:", epoch + 1, "*****\n")
IDPointer, TSPointer = 0, 0 # Pointers to current ID and timestamp
epochComplete = False
batchNum = 0
while not epochComplete:
batchNum += 1
batchX, batchY, IDPointer, TSPointer, epochComplete = DataWorker.generateBatch(IDPointer, TSPointer, isTraining=True)
dict = {x: batchX, y: batchY}
session.run(optimiser, dict)
if batchNum % 1000 == 0 or epochComplete:
batchLoss = session.run(mse, dict)
batchAccuracy = session.run(accuracy, dict)
print("Iteration:", batchNum)
print(batchLoss)
print(str("%.2f" % (batchAccuracy * 100) + "%\n"))
# #############################################
# TESTING
# #############################################
testX, testY, _, _, _ = DataWorker.generateBatch(0, 0, isTraining=False)
testAccuracy = session.run(accuracy, {x: testX, y: testY})
print("Testing Accuracy:", str("%.2f" % (testAccuracy * 100) + "%"))
But now, for practicality and readability, I want to implement my model as a class, but have encountered many problems with initializing my variables, etc.
This is the closest I have got to implementing the above example using my own LSTM class
Model.py
import tensorflow as tf
import Constants
import DataWorker # Remove this dependency
class LSTM():
"""docstring."""
def __init__(self,
inputDimensionList,
outputDimensionList,
numLayers=Constants.numLayers,
numHidden=Constants.numHidden,
learningRate=Constants.learningRate,
forgetBias=Constants.forgetBias
):
"""docstring."""
self.batchInputs = tf.placeholder(tf.float32, [None] + inputDimensionList)
self.batchLabels = tf.placeholder(tf.float32, [None] + outputDimensionList)
self.weightedMatrix = tf.Variable(tf.random_normal([numHidden] + outputDimensionList))
self.biasMatrix = tf.Variable(tf.random_normal(outputDimensionList))
self.cell = tf.contrib.rnn.BasicLSTMCell(numHidden, forget_bias=forgetBias)
self.numLayers = numLayers
self.numHidden = numHidden
self.learningRate = learningRate
self.forgetBias = forgetBias
self.batchDict = {}
self.batchInputTensors = None
self.batchOutputs = None # All needed as instance variables?
self.batchFinalStates = None
self.batchPredictions = None
self.batchLoss = None
self.batchAccuracy = None
self.initialised = False
self.session = tf.Session()
# Take in activation, loss and optimiser FUNCTIONS as args
def execute(self, command):
"""docstring."""
return self.session.run(command, self.batchDict)
def setBatchDict(self, inputs, labels):
"""docstring."""
self.batchDict = {self.batchInputs: inputs, self.batchLabels: labels}
self.batchInputTensors = tf.unstack(self.batchInputs, axis=1)
def processBatch(self):
"""docstring."""
self.batchOutputs, self.batchFinalState = tf.nn.static_rnn(self.cell, self.batchInputTensors, dtype=tf.float32)
pred = tf.tanh(tf.add(tf.matmul(self.batchOutputs[-1], self.weightedMatrix), self.biasMatrix))
mse = tf.losses.mean_squared_error(predictions=pred, labels=self.batchLabels)
optimiser = tf.train.AdamOptimizer(self.learningRate).minimize(mse)
if not self.initialised:
self.session.run(tf.global_variables_initializer())
self.initialised = True
with tf.variable_scope("model") as scope:
if self.initialised:
scope.reuse_variables()
self.execute(optimiser)
self.batchPredictions = self.execute(pred)
self.batchLoss = self.execute(tf.losses.mean_squared_error(predictions=self.batchPredictions, labels=self.batchLabels))
self.batchAccuracy = self.execute(tf.reduce_mean(1 - (tf.abs(self.batchLabels - self.batchPredictions) / DataWorker.labelRange)))
return self.batchPredictions, self.batchLabels, self.batchLoss, self.batchAccuracy
def kill(self):
"""docstring."""
self.session.close()
This class is quite messy, especially processBatch() as I have just been trying to get it to work before refining it.
I then run my model here:
Main.py
import DataWorker
import Constants
from Model import LSTM
inputDim = [Constants.sequenceLength, DataWorker.numFeatures]
outputDim = [1]
lstm = LSTM(inputDimensionList=inputDim, outputDimensionList=outputDim)
# #############################################
# TRAINING
# #############################################
for epoch in range(Constants.numEpochs):
print("***** EPOCH:", epoch + 1, "*****\n")
IDPointer, TSPointer = 0, 0 # Pointers to current ID and timestamp
epochComplete = False
batchNum = 0
while not epochComplete:
batchNum += 1
batchX, batchY, IDPointer, TSPointer, epochComplete = DataWorker.generateBatch(IDPointer, TSPointer, isTraining=True)
lstm.setBatchDict(batchX, batchY)
batchPredictions, batchLabels, batchLoss, batchAccuracy = lstm.runBatch()
if batchNum % 1000 == 0 or epochComplete:
print("Iteration:", batchNum)
print("Pred:", batchPredictions[-1], "\tLabel:", batchLabels[-1])
print("Loss:", batchLoss)
print("Accuracy:", str("%.2f" % (batchAccuracy * 100) + "%\n"))
# #############################################
# TESTING
# #############################################
testX, testY, _, _, _ = DataWorker.generateBatch(0, 0, isTraining=False)
lstm.setBatchDict(testX, testY)
_, _, _, testAccuracy = lstm.runBatch()
print("Testing Accuracy:", str("%.2f" % (testAccuracy * 100) + "%"))
lstm.kill()
A single passthrough of the graph is executed fine, when all the variables are initialized, but it is on the second iteration where I get the error
ValueError: Variable rnn/basic_lstm_cell/kernel/Adam/ already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
optimiser = tf.train.AdamOptimizer(self.learningRate).minimize(mse)
I Googled this problem and learned that using scope.reuse_variables() should stop it trying to initialize the AdamOptimizer a second time, but cleary this isn't working how I have implemented it. How can I fix this issue?
As a side note, is my method of creating the TensorFlow session as an instance variable within my LSTM class acceptable, or should I create the session in Main and then pass it into the LSTM instance?
In general I wrap anything that creates variables under the hood with tf.make_template when doing object oriented model building.
However, you should avoid adding ops to the graph in a training loop, which looks like it's happening here. They will build up and cause problems, and likely give you incorrect results. Instead, define the graph (with inputs from tf.data, placeholders, or queues) and only loop over a session.run call. Even better, structure your code as an Estimator and this will be enforced.