Keras Stateful LSTM get low accuracy when testing on training set - python

Generally, I use the stateful LSTM to make predictions. When I train the LSTM, the output accuracy is quite high. However, when I test the LSTM model on the training set, the accuracy is low! That really confused me, I thought they should be the same. Here are my codes and the outputs. Is there anyone knows why such things happen? Thank you!
model = Sequential()
adam = keras.optimizers.Adam(lr=0.0001)
model.add(LSTM(512, batch_input_shape=(12, 1, 120), return_sequences=False, stateful=True))
model.add(Dense(8, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
print 'Train...'
for epoch in range(30):
mean_tr_acc = []
mean_tr_loss = []
current_data, current_label, origin_label, is_shuffled = train_iter.next()
for i in range(current_data.shape[1]):
if i%1000==0:
print "current iter at {} with {} iteration".format(i, epoch)
data_slice = current_data[:,i,:]
# Data slice dim: [batch size = 12, time_step=1, feature_dim=120]
data_slice = np.expand_dims(data_slice, axis=1)
label_slice = current_label[:,i,:]
one_hot_labels = keras.utils.to_categorical(label_slice, num_classes=8)
last_element = one_hot_labels[:,-1,:]
tr_loss, tr_acc = model.train_on_batch(np.array(data_slice), np.array(last_element))
mean_tr_acc.append(tr_acc)
mean_tr_loss.append(tr_loss)
model.reset_states()
print 'accuracy training = {}'.format(np.mean(mean_tr_acc))
print 'loss training = {}'.format(np.mean(mean_tr_loss))
print '___________________________________'
# At here, just evaluate the model on the training dataset
mean_te_acc = []
mean_te_loss = []
for i in range(current_data.shape[1]):
if i%1000==0:
print "current val iter at {} with {} iteration".format(i, epoch)
data_slice = current_data[:,i,:]
data_slice = np.expand_dims(data_slice, axis=1)
label_slice = current_label[:,i,:]
one_hot_labels = keras.utils.to_categorical(label_slice, num_classes=8)
last_element = one_hot_labels[:,-1,:]
te_loss, te_acc = model.test_on_batch(np.array(data_slice), np.array(last_element))
mean_te_acc.append(te_acc)
mean_te_loss.append(te_loss)
model.reset_states()
Here is the program output:
current iter at 0 with 13 iteration
current iter at 1000 with 13 iteration
accuracy training = 0.991784930229
loss training = 0.0320105217397
___________________________________
Batch shuffled
current val iter at 0 with 13 iteration
current val iter at 1000 with 13 iteration
accuracy testing = 0.927557885647
loss testing = 0.230829760432
___________________________________

Ok, so here is the problem: it seems that in my code (stateful LSTM), the training error does not really imply the real training error. In other words, more iterations are needed before the model can work well on the validation set (before the model is really trained). Generally, this is a silly mistake:P

Related

Accuracy does not increase Pytorch

I'm currently working on a project using Pytorch. I want to evaluate the accuracy of a neural network but it seems it does not increase when the test is running. The output I get is:
As you can see, I print the accuracy of every epoch always getting the same number.
Here you are the code of my classifier:
class Classifier(torch.nn.Module):
def __init__(self):
super().__init__()
self.layer1 = torch.nn.Linear(in_features=6, out_features=2, bias=True)
self.layer2 = torch.nn.Linear(in_features=2, out_features=1, bias=True)
self.activation = torch.sigmoid
def forward(self, x):
x=self.activation(self.layer1(x))
x=self.activation(self.layer2(x))
return x
model=Classifier()
def setParameters(m):
if type(m) == torch.nn.Linear:
torch.nn.init.uniform_(m.weight.data, -0.3, 0.3)
torch.nn.init.constant_(m.bias.data, 1)
model.apply(setParameters)
model.layer1.bias.requires_grad = False
model.layer2.bias.requires_grad = False
The code I use to train the network is the following:
from google.colab import drive
import torch
import random
drive.mount('/content/drive')
%cd drive/MyDrive/deeplearning/ass1/data
numbers = []
results = []
with open('data.txt') as f:
lines = f.readlines()
random.shuffle(lines)
for line in lines:
digitsOfNumber = [int(x) for x in str(line[0:6])]
resultInteger = int(line[7:8])
numbers.append(digitsOfNumber)
results.append(resultInteger)
numbersTensor = torch.Tensor(numbers)
resultsTensor = torch.tensor(results)
dataset = torch.utils.data.TensorDataset(numbersTensor, resultsTensor)
trainsetSize = int((80/100) * len(dataset))
trainset, testset = torch.utils.data.random_split(dataset, [trainsetSize, len(dataset) - trainsetSize])
print(len(trainset), len(testset))
testloader = torch.utils.data.DataLoader(testset, batch_size=len(testset), shuffle=False)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=len(trainset), shuffle=False)
def get_accuracy(model, dataloader):
model.eval()
with torch.no_grad():
correct=0
for x, y in iter(dataloader):
out=model(x)
correct+=(torch.argmax(out, axis=1)==y).sum()
return correct/len(dataloader.dataset)
epochs=1425
losses=[]
for epoch in range(epochs):
print("Test accuracy: ", get_accuracy(model, testloader).item())
model.train()
print("Epoch: ", epoch)
for x, y in iter(trainloader):
out=model(x)
l=loss(out, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
losses.append(l.item())
print("Final accuracy: ", get_accuracy(model, testloader))
for name, param in model.named_parameters():
print(name, param)
The last part is the one I use to print out the accuracy and to train the network accordingly. How can I fix my issue?
Thank you in advance for your time and patience.
The last layer of your model produces a tensor of shape (batch size, 1), since you have set out_features = 1. I assume your dataset has more than 1 class?
When you are calculating your accuracy, torch.argmax(out, axis=1) will always give the same class index, being 0 in this case. This explains why your accuracy is constant.
I advise looking into your dataset and finding out how many classes you have, and modify your model based on that. If you have 10 classes, the last layer should have 10 output features based on how the rest of your code is set up.

Keras model prediction always returns nearly the same number

I have trained a keras model on a large dataset of hourly stock closing prices (with a lookback period of 30 hours and normalized in that period to [0,1]). The train_y or expected prediction is 0 or 1 if the closing price of the next hour is lower or higher than the last hour of the lookback period (1 -> buy, 0 -> sell).
This is the code for training the dataset:
df = pd.read_csv(dataset_loc)
close_data = df['close'].to_list()
train_x = []
train_y = []
for i in range(lookback, train_range):
add_x = close_data[i-lookback:i]
add_y = 0
if close_data[i] > close_data[i-1]:
add_y = 1
add_x = (add_x - np.min(add_x))/np.ptp(add_x)
train_x.append(add_x)
train_y.append(add_y)
train_x = np.reshape(train_x, (len(train_x),lookback))
train_y = np.reshape(train_y, (len(train_y),1))
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(train_x.shape[1],1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
model.compile(optimizer='adam',loss='mean_squared_error')
model.fit(train_x, train_y, epochs=1, batch_size=1)
model.save(output_loc)
But when I try to actually predict the market price data it always returns some value around 0.55xxx over a large dataset. The evaluation returned 0.266 so I don't think the problem is that the model is to indecisive. Also the dataset was very large (50000).
This is the code for predicting:
input_add = np.array(close)
input_add = (input_add - np.min(input_add))/np.ptp(input_add)
input_add = np.reshape(input_add, (1,len(input_add)))
prediction = model.predict(input_add, verbose=0)
print(prediction) # -> always returns 0.55-ish
if float(prediction) > 0.5:
// buy
else:
// sell
I am fairly new to keras and nns in general, but I suspect that it might have to do something with the epoch size, batch size or the shape of the arrays, but I can't figure out the exact problem.

Selecting Best Performing Model over Various iterations

I want to create a for loop in order to run my model various times and keep the best performing model for each run. This because I've noticed that each time I train my model it might perform better on one run and much worse on another. Thus I want to store possibly each model in a list or just select the best.
I have the current process but I'm not sure if this is the most adequate manner and also I'm not actually sure on how to select the best performing model through all these iterations. Here I am doing it only for 10 iterations, but I want to know if there is a better way of doing this.
My Code Implementation
def build_model(input1, input2):
"""
Creates the a multi-channel ANN, capable of accepting multiple inputs.
:param: none
:return: the model of the ANN with a single output given
"""
input1 = np.expand_dims(input1,1)
# Define Inputs for ANN
input1 = Input(shape = (input1.shape[1], ), name = "input1")
input2 = Input(shape = (input2.shape[1],), name = "input2")
# First Branch of ANN (Weight)
x = Dense(units = 1, activation = "relu")(input1)
x = BatchNormalization()(x)
# Second Branch of ANN (Word Embeddings)
y = Dense(units = 36, activation = "relu")(input2)
y = BatchNormalization()(y)
# Merge the input models into a single large vector
combined = Concatenate()([x, y])
#Apply Final Output Layer
outputs = Dense(1, name = "output")(combined)
# Create an Interpretation Model (Accepts the inputs from previous branches and has single output)
model = Model(inputs = [input1, input2], outputs = outputs)
# Compile the Model
model.compile(loss='mse', optimizer = Adam(lr = 0.01), metrics = ['mse'])
# Summarize the Model Summary
model.summary()
return model
test_outcomes = [] # list of model scores
r2_outcomes = [] #list of r2 scores
stored_models = [] #list of stored_models
for i in range(10):
model = build_model(x_train['input1'], x_train['input2'])
print("Model Training")
model.fit([x_train['input1'], x_train['input2']], y_train,
batch_size = 25, epochs = 60, verbose = 0 #, validation_split = 0.2
,validation_data = ([x_valid['input1'],x_valid['input2']], y_valid))
#Determine Model Predictions
print("Model Predictions")
y_pred = model.predict([x_valid['input1'], x_valid['input2']])
y_pred = y_pred.flatten()
#Evaluate the Model
print("Model Evaluations")
score = model.evaluate([x_valid['input1'], x_valid['input2']], y_valid, verbose=1)
test_loss = round(score[0], 3)
print ('Test loss:', test_loss)
test_outcomes.append(test_loss)
#Calculate R_Squared
r_squared = r2_score(y_valid, y_pred)
print(r_squared)
r2_outcomes.append(r_squared)
#Store Final Model
print("Model Stored")
stored_models.append(model) #list of stored_models
mean_test= np.mean(test_outcomes)
r2_means = np.mean(r2_outcomes)
Output Example
You should use Callbacks
you can stop training using callback
Here an example of how you can create a custom callback in order stop training when certain accuracy threshold
#example
acc_threshold =0.95
class myCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if(logs.get('acc') > acc_threshold):
print("\nReached %2.2f%% accuracy, so stopping training!!" %(acc_threshold))
self.model.stop_training = True
my_callback = myCallback()
model.fit([x_train['input1'], x_train['input2']], y_train,
batch_size = 25, epochs = 60, verbose = 0 #, validation_split = 0.2
,validation_data = ([x_valid['input1'],x_valid['input2']], y_valid),
callbacks=my_callback )
You can also use EarlyStopping to monitor metrics (Like stopping when loss isnt improving)

How to replace loss function during training tensorflow.keras

I want to replace the loss function related to my neural network during training, this is the network:
model = tensorflow.keras.models.Sequential()
model.add(tensorflow.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape))
model.add(tensorflow.keras.layers.Conv2D(64, (3, 3), activation="relu"))
model.add(tensorflow.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tensorflow.keras.layers.Dropout(0.25))
model.add(tensorflow.keras.layers.Flatten())
model.add(tensorflow.keras.layers.Dense(128, activation="relu"))
model.add(tensorflow.keras.layers.Dropout(0.5))
model.add(tensorflow.keras.layers.Dense(output_classes, activation="softmax"))
model.compile(loss=tensorflow.keras.losses.categorical_crossentropy, optimizer=tensorflow.keras.optimizers.Adam(0.001), metrics=['accuracy'])
history = model.fit(x_train, y_train, batch_size=128, epochs=5, validation_data=(x_test, y_test))
so now I want to change tensorflow.keras.losses.categorical_crossentropy with another, so I made this:
model.compile(loss=tensorflow.keras.losses.mse, optimizer=tensorflow.keras.optimizers.Adam(0.001), metrics=['accuracy'])
history = model.fit(x_improve, y_improve, epochs=1, validation_data=(x_test, y_test)) #FIXME bug during training
but I have this error:
ValueError: No gradients provided for any variable: ['conv2d/kernel:0', 'conv2d/bias:0', 'conv2d_1/kernel:0', 'conv2d_1/bias:0', 'dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
Why? How can I fix it? There is another way to change loss function?
Thanks
I'm currently working on google colab with Tensorflow and Keras and i was not able to recompile a model mantaining the weights, every time i recompile a model like this:
with strategy.scope():
model = hd_unet_model(INPUT_SIZE)
model.compile(optimizer=Adam(lr=0.01),
loss=tf.keras.losses.MeanSquaredError() ,
metrics=[tf.keras.metrics.MeanSquaredError()])
the weights gets resetted.
so i found an other solution, all you need to do is:
Get the model with the weights you want ( load it or something else )
gets the weights of the model like this:
weights = model.get_weights()
recompile the model ( to change the loss function )
set again the weights of the recompiled model like this:
model.set_weights(weights)
launch the training
i tested this method and it seems to work.
so to change the loss mid-Training you can:
Compile with the first loss.
Train of the first loss.
Save the weights.
Recompile with the second loss.
Load the weights.
Train on the second loss.
So, a straightforward answer I would give is: switch to pytorch if you want to play this kind of games. Since in pytorch you define your training and evaluation functions, it takes just an if statement to switch from a loss function to another one.
Also, I see in your code that you want to switch from cross_entropy to mean_square_error, the former is suitable for classification the latter for regression, so this is not really something you can do, in the code that follows I switched from mean squared error to mean squared logarithmic error, which are both loss suitable for regression.
Despite other answers offers solutions to your question (see change-loss-function-dynamically-during-training) it is not clear wether you can trust or not the results. Some people found that even with a customised function sometimes Keras keep training with the first loss.
Solution:
My solution is based on train_on_batch, which allows us to train a model in a for loop and therefore stop training it whenever we prefer to recompile the model with a new loss function. Please note that recompiling the model does not reset the weights (see:Does recompiling a model re-initialize the weights?).
The dataset can be found here Boston housing dataset
# Regression Example With Boston Dataset: Standardized and Larger
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from keras.losses import mean_squared_error, mean_squared_logarithmic_error
from matplotlib import pyplot
import matplotlib.pyplot as plt
# load dataset
dataframe = read_csv("housing.csv", delim_whitespace=True, header=None)
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:13]
y = dataset[:,13]
trainX, testX, trainy, testy = train_test_split(X, y, test_size=0.33, random_state=42)
# create model
model = Sequential()
model.add(Dense(13, input_dim=13, kernel_initializer='normal', activation='relu'))
model.add(Dense(6, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
batch_size = 25
# have to define manually a dict to store all epochs scores
history = {}
history['history'] = {}
history['history']['loss'] = []
history['history']['mean_squared_error'] = []
history['history']['mean_squared_logarithmic_error'] = []
history['history']['val_loss'] = []
history['history']['val_mean_squared_error'] = []
history['history']['val_mean_squared_logarithmic_error'] = []
# first compiling with mse
model.compile(loss='mean_squared_error', optimizer='adam', metrics=[mean_squared_error, mean_squared_logarithmic_error])
# define number of iterations in training and test
train_iter = round(trainX.shape[0]/batch_size)
test_iter = round(testX.shape[0]/batch_size)
for epoch in range(2):
# train iterations
loss, mse, msle = 0, 0, 0
for i in range(train_iter):
start = i*batch_size
end = i*batch_size + batch_size
batchX = trainX[start:end,]
batchy = trainy[start:end,]
loss_, mse_, msle_ = model.train_on_batch(batchX,batchy)
loss += loss_
mse += mse_
msle += msle_
history['history']['loss'].append(loss/train_iter)
history['history']['mean_squared_error'].append(mse/train_iter)
history['history']['mean_squared_logarithmic_error'].append(msle/train_iter)
# test iterations
val_loss, val_mse, val_msle = 0, 0, 0
for i in range(test_iter):
start = i*batch_size
end = i*batch_size + batch_size
batchX = testX[start:end,]
batchy = testy[start:end,]
val_loss_, val_mse_, val_msle_ = model.test_on_batch(batchX,batchy)
val_loss += val_loss_
val_mse += val_mse_
val_msle += msle_
history['history']['val_loss'].append(val_loss/test_iter)
history['history']['val_mean_squared_error'].append(val_mse/test_iter)
history['history']['val_mean_squared_logarithmic_error'].append(val_msle/test_iter)
# recompiling the model with new loss
model.compile(loss='mean_squared_logarithmic_error', optimizer='adam', metrics=[mean_squared_error, mean_squared_logarithmic_error])
for epoch in range(2):
# train iterations
loss, mse, msle = 0, 0, 0
for i in range(train_iter):
start = i*batch_size
end = i*batch_size + batch_size
batchX = trainX[start:end,]
batchy = trainy[start:end,]
loss_, mse_, msle_ = model.train_on_batch(batchX,batchy)
loss += loss_
mse += mse_
msle += msle_
history['history']['loss'].append(loss/train_iter)
history['history']['mean_squared_error'].append(mse/train_iter)
history['history']['mean_squared_logarithmic_error'].append(msle/train_iter)
# test iterations
val_loss, val_mse, val_msle = 0, 0, 0
for i in range(test_iter):
start = i*batch_size
end = i*batch_size + batch_size
batchX = testX[start:end,]
batchy = testy[start:end,]
val_loss_, val_mse_, val_msle_ = model.test_on_batch(batchX,batchy)
val_loss += val_loss_
val_mse += val_mse_
val_msle += msle_
history['history']['val_loss'].append(val_loss/test_iter)
history['history']['val_mean_squared_error'].append(val_mse/test_iter)
history['history']['val_mean_squared_logarithmic_error'].append(val_msle/test_iter)
# Some plots to check what is going on
# loss function
pyplot.subplot(311)
pyplot.title('Loss')
pyplot.plot(history['history']['loss'], label='train')
pyplot.plot(history['history']['val_loss'], label='test')
pyplot.legend()
# Only mean squared error
pyplot.subplot(312)
pyplot.title('Mean Squared Error')
pyplot.plot(history['history']['mean_squared_error'], label='train')
pyplot.plot(history['history']['val_mean_squared_error'], label='test')
pyplot.legend()
# Only mean squared logarithmic error
pyplot.subplot(313)
pyplot.title('Mean Squared Logarithmic Error')
pyplot.plot(history['history']['mean_squared_logarithmic_error'], label='train')
pyplot.plot(history['history']['val_mean_squared_logarithmic_error'], label='test')
pyplot.legend()
plt.tight_layout()
pyplot.show()
The resulting plot confirm that the loss function is changing after the second epoch:
The drop in the loss function is due to the fact that the model is switching from normal mean squared error to the logarithmic one, which has much lower values. Printing the scores also prove that the used loss truly changed:
print(history['history']['loss'])
[599.5209197998047, 570.4041115897043, 3.8622902120862688, 2.1578191178185597]
print(history['history']['mean_squared_error'])
[599.5209197998047, 570.4041115897043, 510.29034205845426, 425.32058388846264]
print(history['history']['mean_squared_logarithmic_error'])
[8.624503476279122, 6.346359729766846, 3.8622902120862688, 2.1578191178185597]
In the first two epochs the values of loss are equal to ones of mean_square_error and during the third and fourth epochs the values becomes equal to the ones of mean_square_logarithmic_error, which is the new loss that was set. So it seems that using train_on_batch allows to change loss function, nevertheless I want to stress out again that this is basically what one should do on pytoch to achieve the same results, with the difference that the behaviour of pytorch (in this scenario and in my opinion) is more reliable.

Neural network has <0.001 validation and testing loss but 0% accuracy when doing a prediction

I've been training an MLP to predict the time remaining on an assembly sequence. The Training loss, Validation loss and MSE are all less 0.001, however, when I try to do a prediction with one of the datasets I trained the network with the it can't correctly identify any of the outputs from the set of inputs. What am I doing wrong that is producing this error?
I am also struggling to understand how, when the model is deployed, how do I perform the scaling of the result for one prediction? scaler.inverse_transform won't work because the data for that scaler used during training has been lost as the prediction would be done in a separate script to the training using the model the training produced. Is this information saved in the model builder?
I have tried to change the batch size during training, rounding the time column of the dataset to the nearest second (previously was 0.1 seconds), trained over 50, 100 and 200 epochs and I always end up with no correct predictions. I am also training an LSTM to see which is more accurate but that is also having the same issue. The dataset is split 70-30 training-testing and then training is then split 75-25 into training and validation.
Data scaling and model training code:
def scale_data(training_data, training_data_labels, testing_data, testing_data_labels):
# Create X and Y scalers between 0 and 1
x_scaler = MinMaxScaler(feature_range=(0, 1))
y_scaler = MinMaxScaler(feature_range=(0, 1))
# Scale training data
x_scaled_training = x_scaler.fit_transform(training_data)
y_scaled_training = y_scaler.fit_transform(training_data_labels)
# Scale testing data
x_scaled_testing = x_scaler.transform(testing_data)
y_scaled_testing = y_scaler.transform(testing_data_labels)
return x_scaled_training, y_scaled_training, x_scaled_testing, y_scaled_testing
def train_model(training_data, training_labels, testing_data, testing_labels, number_of_epochs, number_of_columns):
model_hidden_neuron_number_list = []
model_repeat_list = []
model_error_rate_list = []
for hidden_layer_1_units in range(int(np.floor(number_of_columns / 2)), int(np.ceil(number_of_columns * 2))):
print("Training starting, number of hidden units = %d" % hidden_layer_1_units)
for repeat in range(1, 6):
print("Repeat %d" % repeat)
model = k.Sequential()
model.add(Dense(hidden_layer_1_units, input_dim=number_of_columns,
activation='relu', name='hidden_layer_1'))
model.add(Dense(1, activation='linear', name='output_layer'))
model.compile(loss='mean_squared_error', optimizer='adam')
# Train Model
model.fit(
training_data,
training_labels,
epochs=number_of_epochs,
shuffle=True,
verbose=2,
callbacks=[logger],
batch_size=1024,
validation_split=0.25
)
# Test Model
test_error_rate = model.evaluate(testing_data, testing_labels, verbose=0)
print("Error on testing data is %.3f" % test_error_rate)
model_hidden_neuron_number_list.append(hidden_layer_1_units)
model_repeat_list.append(repeat)
model_error_rate_list.append(test_error_rate)
# Save Model
model_builder = tf.saved_model.builder.SavedModelBuilder("MLP/models/{hidden_layer_1_units}/{repeat}".format(hidden_layer_1_units=hidden_layer_1_units, repeat=repeat))
inputs = {
'input': tf.saved_model.build_tensor_info(model.input)
}
outputs = { 'time_remaining':tf.saved_model.utils.build_tensor_info(model.output)
}
signature_def = tf.saved_model.signature_def_utils.build_signature_def(
inputs=inputs,
outputs=outputs, method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
)
model_builder.add_meta_graph_and_variables(
K.get_session(),
tags=[tf.saved_model.tag_constants.SERVING],
signature_def_map={tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def
}
)
model_builder.save()
And then to do a prediction:
file_name = top_level_file_path + "./MLP/models/19/1/"
testing_dataset = pd.read_csv(file_path + os.listdir(file_path)[0])
number_of_rows = len(testing_dataset.index)
number_of_columns = len(testing_dataset.columns)
newcol = [number_of_rows]
max_time = testing_dataset['Time'].max()
for j in range(0, number_of_rows - 1):
newcol.append(max_time - testing_dataset.iloc[j].iloc[number_of_columns - 1])
x_scaler = MinMaxScaler(feature_range=(0, 1))
y_scaler = MinMaxScaler(feature_range=(0, 1))
# Scale training data
data_scaled = x_scaler.fit_transform(testing_dataset)
labels = pd.read_csv("Labels.csv")
labels_scaled = y_scaler.fit_transform(labels)
signature_key = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
input_key = 'input'
output_key = 'time_remaining'
with tf.Session(graph=tf.Graph()) as sess:
saved_model = tf.saved_model.loader.load(sess, [tf.saved_model.tag_constants.SERVING], file_name)
signature = saved_model.signature_def
x_tensor_name = signature[signature_key].inputs[input_key].name
y_tensor_name = signature[signature_key].outputs[output_key].name
x = sess.graph.get_tensor_by_name(x_tensor_name)
y = sess.graph.get_tensor_by_name(y_tensor_name)
#np.expand_dims(data_scaled[600], axis=0)
predictions = sess.run(y, {x: data_scaled})
predictions = y_scaler.inverse_transform(predictions)
#print(np.round(predictions, 2))
correct_result = 0
for i in range(0, number_of_rows):
correct_result = 0
print(np.round(predictions[i]), " ", np.round(newcol[i]))
if np.round(predictions[i]) == np.round(newcol[i]):
correct_result += 1
print((correct_result/number_of_rows)*100)
The output of the first row should 96.0 but it produces 110.0, the last should be 0.1 but is -40.0 when no negatives appear in the dataset.
You can't compute accuracy when you do regression. Compute the mean squared error on the test set as well.
Second, when it comes to the scalers, you always do scaler.fit_transform on the training date so the scaler will compute the parameters (in this case min and max if you use min-max scaler) on the training data. Then, when performing inference on the test set, you should only do scaler.transform prior to feeding the data to the model.

Categories

Resources