How convert this Pytorch loss function to Tensorflow? - python

This Code for a paper I read had a loss function written using Pytorch, I tried to convert it as best as I could but am getting all Zero's as model predictions, so would like to ask the following:
Are the methods I used the correct equivalent in Tensorflow?
Why is the model predicting only Zero's?
Here is the function:
#Pytorch
class AdjMSELoss1(nn.Module):
def __init__(self):
super(AdjMSELoss1, self).__init__()
def forward(self, outputs, labels):
outputs = torch.squeeze(outputs)
alpha = 2
loss = (outputs - labels)**2
adj = torch.mul(outputs, labels)
adj[adj>0] = 1 / alpha
adj[adj<0] = alpha
loss = loss * adj
return torch.mean(loss)
#Tensorflow
def custom_loss_function(outputs,labels):
outputs = tf.squeeze(outputs)
alpha = 2.0
loss = (outputs - labels) ** 2.0
adj = tf.math.multiply(outputs,labels)
adj = tf.where(tf.greater(adj, 0.0), tf.constant(1/alpha), adj)
adj = tf.where(tf.less(adj, 0.0), tf.constant(alpha), adj)
loss = loss * adj
return tf.reduce_mean(loss)
The function compiles correctly and is being used in the loss and metric parameters, it is outputing results in metrics logs that appear to be correct (Similar to val_loss) but the output of the model after running is just predicting all 0's
model.compile(
loss= custom_loss_function,
optimizer=optimization,
metrics = [custom_loss_function]
)
MODEL
#Simplified for readability
model = Sequential()
model.add(LSTM(32,input_shape=(SEQ_LEN,feature_number),return_sequences=True,))
model.add(Dropout(0.3))
model.add(LSTM(96, return_sequences = False))
model.add(Dropout(0.3))
model.add(Dense(1))
return model
Inputs/Features are pct_change Price for the previous SEQ_LEN days. (Given SEQ_LEN days tries to predict next day: Target)
Outputs/Targets are the next day's price pct_change * 100 (Ex: 5 for 5%). (1 value per row)
Note: The model predicts normally when RMSE() is set as the loss function, as mentioned when using the custom_loss_function above it's just predicting Zero's

Try this custom_loss:
def custom_loss(y_pred, y_true):
alpha = 2.0
loss = (y_pred - y_true) ** 2.0
adj = tf.math.multiply(y_pred,y_true)
adj = tf.where(tf.greater(adj, 0.0), tf.constant(1/alpha), adj)
adj = tf.where(tf.less(adj, 0.0), tf.constant(alpha), adj)
loss = loss * adj
return tf.reduce_mean(loss)
I check with the below code and work correctly (Code for creating a model for learning and predicting the sum of two variables with the custom_loss):
from keras.models import Sequential
from keras.layers import Dense
import tensorflow as tf
import numpy as np
x = np.random.rand(1000,2)
y = x.sum(axis=1)
y = y.reshape(-1,1)
def custom_loss(y_pred, y_true):
alpha = 2.0
loss = (y_pred - y_true) ** 2.0
adj = tf.math.multiply(y_pred,y_true)
adj = tf.where(tf.greater(adj, 0.0), tf.constant(1/alpha), adj)
adj = tf.where(tf.less(adj, 0.0), tf.constant(alpha), adj)
loss = loss * adj
return tf.reduce_mean(loss)
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=2))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1,))
model.compile(optimizer='adam', loss=custom_loss)
model.fit(x, y, epochs=200, batch_size=16)
for _ in range(10):
rnd_num = np.random.randint(50, size=2)[None, :]
pred_add = model.predict(rnd_num)
print(f'predict sum of {rnd_num[0]} -> {pred_add}')
Output:
Epoch 1/200
63/63 [==============================] - 1s 2ms/step - loss: 0.2903
Epoch 2/200
63/63 [==============================] - 0s 2ms/step - loss: 0.0084
Epoch 3/200
63/63 [==============================] - 0s 2ms/step - loss: 0.0016
...
Epoch 198/200
63/63 [==============================] - 0s 2ms/step - loss: 3.3231e-07
Epoch 199/200
63/63 [==============================] - 0s 2ms/step - loss: 5.1004e-07
Epoch 200/200
63/63 [==============================] - 0s 2ms/step - loss: 9.8688e-08
predict sum of [43 44] -> [[82.81973]]
predict sum of [39 13] -> [[48.97299]]
predict sum of [36 46] -> [[78.05187]]
predict sum of [46 7] -> [[49.445843]]
predict sum of [35 11] -> [[43.311478]]
predict sum of [33 1] -> [[31.695848]]
predict sum of [6 8] -> [[13.433815]]
predict sum of [14 38] -> [[49.54941]]
predict sum of [ 1 40] -> [[39.709686]]
predict sum of [10 2] -> [[11.325197]]

Related

Keras Neural Network With Custom Input

What I'm trying to accomplish is having a set relationship between some custom input data and output data then have a neural network figure out this relationship/rule to predict future output given the input. I've set up some test code here where a random list of inputs is generated and if it is more than 0.5 the output is 1 otherwise the output is 0.
from tensorflow import keras
import numpy as np
# generate data
data_input_generate = np.random.random((6400, 1))
data_output_generate = np.random.randint(2, size=(6400, 1))
data_input = np.vstack([data_input_generate, data_input_generate])
data_output = np.vstack([data_output_generate, data_output_generate])
for i in range(len(data_input)):
if data_input[i] >= 0.5:
data_output[i] = [1]
else:
data_output[i] = [0]
# setup neural network
Inputs = keras.layers.Input(shape=(1, ))
hidden1 = keras.layers.Dense(units=100, activation="sigmoid")(Inputs)
hidden2 = keras.layers.Dense(units=100, activation='softmax')(hidden1)
predictions = keras.layers.Dense(units=1, activation='relu')(hidden2)
# initialize model
model = keras.Model([Inputs], outputs=predictions)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit
model.fit(data_input, data_output, batch_size=10, epochs=5)
# predict
predictions = model.predict(data_input_generate)
# print predictions
for i in range(10):
print(f"Value: {data_input_generate[i]}, Result: {data_output_generate[i]}, Prediction: {predictions[i]}")
The problem is, after fitting the model, the accuracy stays at 50%. Is this a problem with my layer activation function or the way I set up the model? My goal is to correctly predict the output with fairly high accuracy. Thanks in advance!
Try using a sigmoid activation function on your output layer. Here is a working example:
from tensorflow import keras
import numpy as np
# generate data
data_input_generate = np.random.random((6400, 1))
data_output_generate = np.random.randint(2, size=(6400, 1))
data_input = np.vstack([data_input_generate, data_input_generate])
data_output = np.vstack([data_output_generate, data_output_generate])
for i in range(len(data_input)):
if data_input[i] >= 0.5:
data_output[i] = [1]
else:
data_output[i] = [0]
# setup neural network
Inputs = keras.layers.Input(shape=(1, ))
hidden1 = keras.layers.Dense(units=64, activation="relu")(Inputs)
hidden2 = keras.layers.Dense(units=32, activation='relu')(hidden1)
dropout = keras.layers.Dropout(0.8)(hidden2)
predictions = keras.layers.Dense(units=1, activation='sigmoid')(dropout)
# initialize model
model = keras.Model([Inputs], outputs=predictions)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit
model.fit(data_input, data_output, batch_size=10, epochs=5)
# predict
predictions = model.predict(data_input_generate)
# print predictions
for i in range(10):
print(f"Value: {data_input_generate[i]}, Result: {data_output_generate[i]}, Prediction: {predictions[i]}")
Epoch 1/5
1280/1280 [==============================] - 4s 3ms/step - loss: 0.3632 - accuracy: 0.8327
Epoch 2/5
1280/1280 [==============================] - 3s 3ms/step - loss: 0.1870 - accuracy: 0.9427
Epoch 3/5
1280/1280 [==============================] - 3s 3ms/step - loss: 0.1528 - accuracy: 0.9475
Epoch 4/5
1280/1280 [==============================] - 3s 2ms/step - loss: 0.1461 - accuracy: 0.9482
Epoch 5/5
1280/1280 [==============================] - 2s 2ms/step - loss: 0.1384 - accuracy: 0.9493
Value: [0.79415764], Result: [0], Prediction: [0.9997529]
Value: [0.38311113], Result: [1], Prediction: [1.7478478e-05]
Value: [0.05360975], Result: [0], Prediction: [2.3240638e-07]
Value: [0.78635261], Result: [1], Prediction: [0.99970365]
Value: [0.74414175], Result: [1], Prediction: [0.99921006]
Value: [0.47845171], Result: [1], Prediction: [0.07256863]
Value: [0.53008247], Result: [0], Prediction: [0.886382]
Value: [0.40377478], Result: [1], Prediction: [9.9769844e-05]
Value: [0.18209166], Result: [1], Prediction: [5.199377e-07]
Value: [0.00937745], Result: [1], Prediction: [1.7613968e-07]

Why I am getting a negative loss and negative validation loss in my model

I am training a variational autoencoder using USPS dataset of shape (7291, 16, 16). Below is my code snipet. I also tried the same code snipet on MNIST dataset of shape (60000,28,28) and everything seems to work fine. Both are gray scale images. I can figure out why I am getting the a negative value for training loss and validation loss for USPS dataset. The code execution is quite straight forwards, The only changes from MNIST model is mnist.load_data() to usps.load_data().
I also have also tried reducing the number of layers in both the encoder and decoder network but the result for the USPS model appears the same. I can figure out what exactly I am getting wrong. please I need your assistance to understand the reason for the negative values.
!pip install extra_keras_datasets
#######################################
from extra_keras_datasets import usps
import keras
from keras.layers import Conv2D, Conv2DTranspose, Input, Flatten, Dense, Lambda, Reshape
#from keras.layers import BatchNormalization
from keras.models import Model
from keras.datasets import mnist
import tensorflow.compat.v1.keras.backend as K
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import numpy as np
import matplotlib.pyplot as plt
# Load MNIST
# (x_train, y_train), (x_test, y_test) = mnist.load_data()
(x_train, y_train), (x_test, y_test) = usps.load_data()
#Normalize and reshape ============
#Norm.
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = x_train / 255
x_test = x_test / 255
# Reshape
img_width = x_train.shape[1]
img_height = x_train.shape[2]
num_channels = 1 #MNIST --> grey scale so 1 channel
x_train = x_train.reshape(x_train.shape[0], img_height, img_width, num_channels)
x_test = x_test.reshape(x_test.shape[0], img_height, img_width, num_channels)
input_shape = (img_height, img_width, num_channels)
# ========================
# BUILD THE MODEL
# # ================= #############
# # Encoder
#Let us define 4 conv2D, flatten and then dense
# # ================= ############
latent_dim = 2 # Number of latent dim parameters
#Create the model
input_img = Input(shape=input_shape, name='encoder_input')
x = Conv2D(32, 3, padding='same', activation='relu')(input_img)
x = Conv2D(64, 3, padding='same', activation='relu',strides=(2, 2))(x)
x = Conv2D(64, 3, padding='same', activation='relu')(x)
x = Conv2D(64, 3, padding='same', activation='relu')(x)
conv_shape = K.int_shape(x) #Shape of conv to be provided to decoder
print(conv_shape)
#Flatten
x = Flatten()(x)
x = Dense(32, activation='relu')(x)
# Two outputs, for latent mean and log variance (std. dev.)
#Use these to sample random variables in latent space to which inputs are mapped.
z_mu = Dense(latent_dim, name='latent_mu')(x) #Mean values of encoded input
z_sigma = Dense(latent_dim, name='latent_sigma')(x) #Std dev. (variance) of encoded input
#REPARAMETERIZATION TRICK
# Define sampling function to sample from the distribution
# Reparameterize sample based on the process defined by Gunderson and Huang
# into the shape of: mu + sigma squared x eps
#This is to allow gradient descent to allow for gradient estimation accurately.
def sample_z(args):
z_mu, z_sigma = args
eps = K.random_normal(shape=(K.shape(z_mu)[0], K.int_shape(z_mu)[1]))
return z_mu + K.exp(z_sigma / 2) * eps
# sample vector from the latent distribution
# z is the labda custom layer we are adding for gradient descent calculations
# using mu and variance (sigma)
z = Lambda(sample_z, output_shape=(latent_dim, ), name='z')([z_mu, z_sigma])
#Z (lambda layer) will be the last layer in the encoder.
# Define and summarize encoder model.
encoder = Model(input_img, [z_mu, z_sigma, z], name='encoder')
print(encoder.summary())
Decoder
x = Dense(conv_shape[1]*conv_shape[2]*conv_shape[3], activation='relu')(decoder_input)
# reshape to the shape of last conv. layer in the encoder, so we can
x = Reshape((conv_shape[1], conv_shape[2], conv_shape[3]))(x)
# upscale (conv2D transpose) back to original shape
# use Conv2DTranspose to reverse the conv layers defined in the encoder
x = Conv2DTranspose(32, 3, padding='same', activation='relu',strides=(2, 2))(x)
#Can add more conv2DTranspose layers, if desired.
#Using sigmoid activation
x = Conv2DTranspose(num_channels, 3, padding='same', activation='sigmoid', name='decoder_output')(x)
# Define and summarize decoder model
decoder = Model(decoder_input, x, name='decoder')
# apply the decoder to the latent sample
z_decoded = decoder(z)
decoder.summary()
custom loss and model fitting
#VAE is trained using two loss functions reconstruction loss and KL divergence
#Let us add a class to define a custom layer with loss
class CustomLayer(keras.layers.Layer):
def vae_loss(self, x, z_decoded):
x = K.flatten(x)
z_decoded = K.flatten(z_decoded)
# Reconstruction loss (as we used sigmoid activation we can use binarycrossentropy)
recon_loss = keras.metrics.binary_crossentropy(x, z_decoded)
# KL divergence
kl_loss = -5e-4 * K.mean(1 + z_sigma - K.square(z_mu) - K.exp(z_sigma), axis=-1)
return K.mean(recon_loss + kl_loss)
# add custom loss to the class
def call(self, inputs):
x = inputs[0]
z_decoded = inputs[1]
loss = self.vae_loss(x, z_decoded)
self.add_loss(loss, inputs=inputs)
return x
# apply the custom loss to the input images and the decoded latent distribution sample
y = CustomLayer()([input_img, z_decoded])
# y is basically the original image after encoding input img to mu, sigma, z
# and decoding sampled z values.
#This will be used as output for vae
# =================
# VAE
# =================
vae = Model(input_img, y, name='vae')
# Compile VAE
vae.compile(optimizer='adam', loss=None)
vae.summary()
# Train autoencoder
vae.fit(x_train, None, epochs = 10, batch_size = 32, validation_split = 0.2)
Here is my training History.
5832/5832 [==============================] - 5s 928us/sample - loss: 0.0345 - val_loss: -0.0278
Epoch 2/10
5832/5832 [==============================] - 4s 740us/sample - loss: -0.0301 - val_loss: -0.0292
Epoch 3/10
5832/5832 [==============================] - 4s 746us/sample - loss: -0.0307 - val_loss: -0.0293
Epoch 4/10
5832/5832 [==============================] - 4s 751us/sample - loss: -0.0307 - val_loss: -0.0294
Epoch 5/10
5832/5832 [==============================] - 4s 753us/sample - loss: -0.0307 - val_loss: -0.0294
Epoch 6/10
5832/5832 [==============================] - 4s 746us/sample - loss: -0.0307 - val_loss: -0.0294
Epoch 7/10
5832/5832 [==============================] - 4s 750us/sample - loss: -0.0307 - val_loss: -0.0294
Epoch 8/10
5832/5832 [==============================] - 4s 742us/sample - loss: -0.0307 - val_loss: -0.0294
Epoch 9/10
5832/5832 [==============================] - 4s 751us/sample - loss: -0.0307 - val_loss: -0.0294
Epoch 10/10
5832/5832 [==============================] - 4s 748us/sample - loss: -0.0307 - val_loss: -0.0294

Siamese network on MNIST dataset is not getting trained

I train Siamese network with constructive loss on two classes of MNIST dataset to identify whether two images are similar or not. Although the loss is decreasing in the beginning, it freezes later with accuracy around 0.5.
The model is trained on pairs of images and a label (0.0 for different, 1.0 for identical). I used only two classes for simplicity (zeros and ones) and prepared the dataset, so that it contains every pair of images. I've checked that the dataset is consistent (image pairs from dataset). I've also experimented with data normalization, different batch sizes, learning rates, initializations and regularization constants with no luck.
This is the model:
class Encoder(Model):
"""
A network that finds a 50-dimensional representation of the input images
so that the distances between them minimize the constructive loss
"""
def __init__(self):
super(Encoder, self).__init__(name='encoder')
self.cv = Conv2D(32, (3, 3), activation='relu', padding='Same',
input_shape=(28, 28, 1),
kernel_regularizer=tf.keras.regularizers.l2(0.01))
self.pool = MaxPooling2D((2, 2))
self.flatten = Flatten()
self.dense = Dense(50, activation=None,
kernel_regularizer=tf.keras.regularizers.l2(0.01))
def call(self, inputs, training=None, mask=None):
""" Forward pass for one image """
x = self.cv(inputs)
x = self.pool(x)
x = self.flatten(x)
x = self.dense(x)
return x
#staticmethod
def distance(difference):
""" The D function from the paper which is used in loss """
distance = tf.sqrt(tf.reduce_sum(tf.pow(difference, 2), 0))
return distance
The loss and accuracy:
def simnet_loss(target, x1, x2):
difference = x1 - x2
distance_vector = tf.map_fn(lambda x: Encoder.distance(x), difference)
loss = tf.map_fn(lambda distance: target * tf.square(distance) +
(1.0 - target) * tf.square(tf.maximum(0.0, 1.0 - distance)), distance_vector)
average_loss = tf.reduce_mean(loss)
return average_loss
def accuracy(y_true, y_pred):
distance_vector = tf.map_fn(lambda x: Encoder.distance(x), y_pred)
accuracy = tf.keras.metrics.binary_accuracy(y_true, distance_vector)
return accuracy
Training:
def train_step(images, labels):
with tf.GradientTape() as tape:
x1, x2 = images[:, 0, :, :, :], images[:, 1, :, :, :]
x1 = model(x1)
x2 = model(x2)
loss = simnet_loss(labels, x1, x2)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
return loss
model = Encoder()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
for epoch in range(n_epoch):
epoch_loss = 0
n_batches = int(x_train.shape[0]/batch_size)
for indices in np.array_split(np.arange(x_train.shape[0]), indices_or_sections=n_batches):
x = np.take(x_train, indices, axis=0)
y = np.take(y_train, indices, axis=0)
epoch_loss += train_step(x, y)
epoch_loss = epoch_loss / n_batches
accuracy = test_step(x_train, y_train)
val_accuracy = test_step(x_test, y_test)
tf.print("epoch:", epoch, "loss:", epoch_loss, "accuracy:", accuracy,
"val_accuracy:", val_accuracy, output_stream=sys.stdout)
The code above produces:
epoch: 0 loss: 0.755419433 accuracy: 0.318898171 val_accuracy:
0.310316473
epoch: 1 loss: 0.270610392 accuracy: 0.369466901 val_accuracy:
0.360871345
epoch: 2 loss: 0.262594223 accuracy: 0.430587918 val_accuracy:
0.418002456
epoch: 3 loss: 0.258690506 accuracy: 0.428258181 val_accuracy:
0.427044809
epoch: 4 loss: 0.25654456 accuracy: 0.43497327 val_accuracy:
0.44800657
epoch: 5 loss: 0.255373538 accuracy: 0.444840342 val_accuracy:
0.454993844
epoch: 6 loss: 0.254594624 accuracy: 0.453885168 val_accuracy:
0.454171807

Why is accuracy lower 0.01, but prediction very good (99,99%)

I did my first own neural network with TensorFlow 2 in Python.
My idea was to build a neural network which is able to find the solution to translate binary numbers (8-bit) in decimal numbers.
After a few tries: Yeah it works very precise!
But what I don't understand: The accuracy is very low.
Second thing is: The model has to train over 200.000 values!
For 256 possible answers. Where are the failure in my code/model?
#dataset
def dataset(length, num):
global testdata, solution
testdata = np.random.randint(2, size=(num, length))
solution = testdata.copy()
solution = np.zeros((num, 1))
for i in range(num):
for n in range(length):
x = testdata [i,length - n -1] * (2 ** n)
solution [i] += x
length = 8
num = 220000
dataset (length, num)
#Modell
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(8, activation='relu'),
tf.keras.layers.Dense(1, activation='relu')
])
model.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['accuracy'])
#Training und Evaluate
model.fit(testdata, solution, epochs=4)
model.evaluate(t_testdata, t_solution, verbose=2)
model.summary()
loss: 6.6441e-05 - accuracy: 0.0077
Shouldn't it be like 0.77 or higher?
You should not consider accuracy as metrics for the regression problem, since you are trying to output a single value, even if the small changes in the precision it will result as zero, you can consider below example.
Consider you are trying to predict value 15, and the model returns value 14.99, the resulting accuracy will still be zero.
m = tf.keras.metrics.Accuracy()
_ = m.update_state([[15]], [[14.99]])
m.result().numpy()
Result:
0.0
You can consider the below list of metrics for regression.
Regression metrics
MeanSquaredError class
RootMeanSquaredError class
MeanAbsoluteError class
MeanAbsolutePercentageError class
MeanSquaredLogarithmicError class
CosineSimilarity class
LogCoshError class
I have tried the same problem with one of the above listed metrics and below is the result.
def bin2int(bin_list):
#bin_list = [0, 0, 0, 1]
int_val = ""
for k in bin_list:
int_val += str(int(k))
#int_val = 11011011
return int(int_val, 2)
def dataset(num):
# num - no of samples
bin_len = 8
X = np.zeros((num, bin_len))
Y = np.zeros((num))
for i in range(num):
X[i] = np.around(np.random.rand(bin_len)).astype(int)
Y[i] = bin2int(X[i])
return X, Y
no_of_smaples = 220000
trainX, trainY = dataset(no_of_smaples)
testX, testY = dataset(5)
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(8, activation='relu'),
tf.keras.layers.Dense(1, activation='relu')
])
model.compile(optimizer='adam',
loss='mean_absolute_error',
metrics=['mse'])
model.fit(trainX, trainY,validation_data = (testX,testY),epochs=4)
model.summary()
Output:
Epoch 1/4
6875/6875 [==============================] - 15s 2ms/step - loss: 27.6938 - mse: 2819.9429 - val_loss: 0.0066 - val_mse: 5.2560e-05
Epoch 2/4
6875/6875 [==============================] - 15s 2ms/step - loss: 0.0580 - mse: 0.1919 - val_loss: 0.0066 - val_mse: 6.0013e-05
Epoch 3/4
6875/6875 [==============================] - 16s 2ms/step - loss: 0.0376 - mse: 0.0868 - val_loss: 0.0106 - val_mse: 1.2932e-04
Epoch 4/4
6875/6875 [==============================] - 15s 2ms/step - loss: 0.0317 - mse: 0.0466 - val_loss: 0.0177 - val_mse: 3.2429e-04
Model: "sequential_11"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_24 (Dense) multiple 72
_________________________________________________________________
dense_25 (Dense) multiple 9
_________________________________________________________________
round_4 (Round) multiple 0
=================================================================
Total params: 81
Trainable params: 81
Non-trainable params: 0
Predict:
model.predict([[0., 0., 0., 0., 0., 1., 1., 0.]])
array([[5.993815]], dtype=float32)

Keras LSTM + TensorFlow and a number sequence (improve loss)

first of all, I'm running with the following setup:
Running on windows 10
Python 3.6.2
TensorFlow 1.8.0
Keras 2.1.6
I'm trying to predict, or at least guesstimate the following number sequence:
https://codepen.io/anon/pen/RJRPPx (limited to 20,000 for testing), the full sequence contains about one million records.
And here is the code (run.py)
import lstm
import time
import matplotlib.pyplot as plt
def plot_results(predicted_data, true_data):
fig = plt.figure(facecolor='white')
ax = fig.add_subplot(111)
ax.plot(true_data, label='True Data')
plt.plot(predicted_data, label='Prediction')
plt.legend()
plt.show()
def plot_results_multiple(predicted_data, true_data, prediction_len):
fig = plt.figure(facecolor='white')
ax = fig.add_subplot(111)
ax.plot(true_data, label='True Data')
#Pad the list of predictions to shift it in the graph to it's correct start
for i, data in enumerate(predicted_data):
padding = [None for p in range(i * prediction_len)]
plt.plot(padding + data, label='Prediction')
plt.legend()
plt.show()
#Main Run Thread
if __name__=='__main__':
global_start_time = time.time()
epochs = 10
seq_len = 50
print('> Loading data... ')
X_train, y_train, X_test, y_test = lstm.load_data('dice_amplified/primeros_20_mil.csv', seq_len, True)
print('> Data Loaded. Compiling...')
model = lstm.build_model([1, 50, 100, 1])
model.fit(
X_train,
y_train,
batch_size = 512,
nb_epoch=epochs,
validation_split=0.05)
predictions = lstm.predict_sequences_multiple(model, X_test, seq_len, 50)
#predicted = lstm.predict_sequence_full(model, X_test, seq_len)
#predicted = lstm.predict_point_by_point(model, X_test)
print('Training duration (s) : ', time.time() - global_start_time)
plot_results_multiple(predictions, y_test, 50)
I have tried to:
increase and decrease epochs.
increase and decrease batch size.
amplify the data.
The following plot represents:
epochs = 10
batch_size = 512
validation_split = 0.05
As well, as far as I understand, the loss should be decreasing with more epochs? Which doesn't seem to be happening!
Using TensorFlow backend.
> Loading data...
> Data Loaded. Compiling...
> Compilation Time : 0.03000473976135254
Train on 17056 samples, validate on 898 samples
Epoch 1/10
17056/17056 [==============================] - 31s 2ms/step - loss: 29927.0164 - val_loss: 289.8873
Epoch 2/10
17056/17056 [==============================] - 29s 2ms/step - loss: 29920.3513 - val_loss: 290.1069
Epoch 3/10
17056/17056 [==============================] - 29s 2ms/step - loss: 29920.4602 - val_loss: 292.7868
Epoch 4/10
17056/17056 [==============================] - 27s 2ms/step - loss: 29915.0955 - val_loss: 286.7317
Epoch 5/10
17056/17056 [==============================] - 26s 2ms/step - loss: 29913.6961 - val_loss: 298.7889
Epoch 6/10
17056/17056 [==============================] - 26s 2ms/step - loss: 29920.2068 - val_loss: 287.5138
Epoch 7/10
17056/17056 [==============================] - 28s 2ms/step - loss: 29914.0650 - val_loss: 295.2230
Epoch 8/10
17056/17056 [==============================] - 25s 1ms/step - loss: 29912.8860 - val_loss: 295.0592
Epoch 9/10
17056/17056 [==============================] - 28s 2ms/step - loss: 29907.4067 - val_loss: 286.9338
Epoch 10/10
17056/17056 [==============================] - 46s 3ms/step - loss: 29914.6869 - val_loss: 289.3236
Any recommendations? How could I improve it? Thanks!
Lstm.py contents:
import os
import time
import warnings
import numpy as np
from numpy import newaxis
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Hide messy TensorFlow warnings
warnings.filterwarnings("ignore") #Hide messy Numpy warnings
def load_data(filename, seq_len, normalise_window):
f = open(filename, 'rb').read()
data = f.decode().split('\n')
sequence_length = seq_len + 1
result = []
for index in range(len(data) - sequence_length):
result.append(data[index: index + sequence_length])
if normalise_window:
result = normalise_windows(result)
result = np.array(result)
row = round(0.9 * result.shape[0])
train = result[:int(row), :]
np.random.shuffle(train)
x_train = train[:, :-1]
y_train = train[:, -1]
x_test = result[int(row):, :-1]
y_test = result[int(row):, -1]
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
return [x_train, y_train, x_test, y_test]
def normalise_windows(window_data):
normalised_data = []
for window in window_data:
normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
normalised_data.append(normalised_window)
return normalised_data
def build_model(layers):
model = Sequential()
model.add(LSTM(
input_shape=(layers[1], layers[0]),
output_dim=layers[1],
return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(
layers[2],
return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(
output_dim=layers[3]))
model.add(Activation("linear"))
start = time.time()
model.compile(loss="mse", optimizer="rmsprop")
print("> Compilation Time : ", time.time() - start)
return model
def predict_point_by_point(model, data):
#Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time
predicted = model.predict(data)
predicted = np.reshape(predicted, (predicted.size,))
return predicted
def predict_sequence_full(model, data, window_size):
#Shift the window by 1 new prediction each time, re-run predictions on new window
curr_frame = data[0]
predicted = []
for i in range(len(data)):
predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
curr_frame = curr_frame[1:]
curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
return predicted
def predict_sequences_multiple(model, data, window_size, prediction_len):
#Predict sequence of 50 steps before shifting prediction run forward by 50 steps
prediction_seqs = []
for i in range(int(len(data)/prediction_len)):
curr_frame = data[i*prediction_len]
predicted = []
for j in range(prediction_len):
predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
curr_frame = curr_frame[1:]
curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
prediction_seqs.append(predicted)
return prediction_seqs
Addendum:
At nuric suggestion I modified the model as following:
def build_model(layers):
model = Sequential()
model.add(LSTM(input_shape=(layers[1], layers[0]), output_dim=layers[1], return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(layers[2], return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(output_dim=layers[3]))
model.add(Activation("linear"))
model.add(Dense(64, input_dim=50, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))
start = time.time()
model.compile(loss="mse", optimizer="rmsprop")
print("> Compilation Time : ", time.time() - start)
return model
Still a bit lost on this one...
Even though you normalise the input, you don't normalise the output. The LSTM by default has a tanh output which means you will have a limited feature space, ie the dense layer won't be able to regress to large numbers.
You have a fixed length numerical input (50,), directly pass that to Dense layers with relu activation and will perform better on regression tasks, something simple like:
model = Sequential()
model.add(Dense(64, input_dim=50, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))
For regression it is also preferable to use l2 regularizers instead of Dropout because you are not really feature extracting for classification etc.

Categories

Resources