Generating Vanishing and Exploding gradients problem in RNN using Keras - python

I understand the Vanishing and exploding gradients problem in Vanilla RNNs and why this happens. However, I would like to create this problem purposefully to understand in a better way. I have taken a below code from https://www.datatechnotes.com/2018/12/rnn-example-with-keras-simplernn-in.html.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN
# convert into dataset matrix
def convertToMatrix(data, step):
X, Y =[], []
for i in range(len(data)-step):
d=i+step
X.append(data[i:d,])
Y.append(data[d,])
return np.array(X), np.array(Y)
step = 4
N = 1000
Tp = 800
t=np.arange(0,N)
x=np.sin(0.02*t)+2*np.random.rand(N)
df = pd.DataFrame(x)
df.head()
plt.plot(df)
plt.show()
values=df.values
train,test = values[0:Tp,:], values[Tp:N,:]
# add step elements into train and test
test = np.append(test,np.repeat(test[-1,],step))
train = np.append(train,np.repeat(train[-1,],step))
trainX,trainY =convertToMatrix(train,step)
testX,testY =convertToMatrix(test,step)
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
model = Sequential()
model.add(SimpleRNN(units=32, input_shape=(1,step), activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='rmsprop')
model.summary()
model.fit(trainX,trainY, epochs=100, batch_size=16, verbose=2)
trainPredict = model.predict(trainX)
testPredict= model.predict(testX)
predicted=np.concatenate((trainPredict,testPredict),axis=0)
trainScore = model.evaluate(trainX, trainY, verbose=0)
print(trainScore)
How should I modify this code to create this problem? Thank you.

Vanishing gradient is the problem when we use the sigmoid activation function. If you change relu to sigmoid, you may encounter vanishing gradient problem.
model = Sequential()
model.add(SimpleRNN(units=32, input_shape=(1,step), activation="sigmoid"))
model.add(Dense(8, activation="sigmoid"))

Related

Setting output variable in deep learning

I have this code:
from numpy import loadtxt
from keras.models import Sequential
from keras.layers import Dense
dataset = loadtxt('pima-indians-diabetes.csv', delimiter=',')
X = dataset[:,0:8]
y = dataset[:,8]
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, epochs=150, batch_size=10)
_, accuracy = model.evaluate(X, y)
print('Accuracy: %.2f' % (accuracy*100))
I need to change the output column so it predicts/learns from a score(for instance 1 to a million) instead of 0 or 1(sigmoid).
As for your case you need to use relu as your activation function in the last layer (output layer) instead of sigmoid
The range of relu is [0,inf).Then in that case you need to use 'MSE' as your loss metric.
Conceptually, the problem which you are trying to solve is a regression type of problem.

LSTM Neural Network for temperature time series predictions

I'm learning to work with neural networks applied to time-series so I tuned and LSTM example that I found to make predictions of daily temperature data. However, I found that the results are extremely poor as is shown in the image. (I only predict the last 92 days in order to save time for now).
This is the code I implemented. The data are 3 column dataframe (minimum, maximum and mean daily temperatures), but I only employ one of the columns at one time.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tools.eval_measures import rmse
from sklearn.preprocessing import MinMaxScaler
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
import warnings
warnings.filterwarnings("ignore")
input_file2 = "TemperaturasCampillos.txt"
seriesT = pd.read_csv(input_file2,sep = "\t", decimal = ".", names = ["Minimas","Maximas","Medias"])
seriesT[seriesT==-999]=np.nan
date1 = '2010-01-01'
date2 = '2010-09-01'
date3 = '2020-05-17'
date4 = '2020-12-31'
mydates = pd.date_range(date2, date3).tolist()
seriesT['Fecha'] = mydates
seriesT.set_index('Fecha',inplace=True) # Para que los índices sean fechas y así se ponen en el eje x de forma predeterminada
seriesT.index = seriesT.index.to_pydatetime()
df = seriesT.drop(seriesT.columns[[1, 2]], axis=1) # df.columns is zero-based pd.Index
n_input = 92
train, test = df[:-n_input], df[-n_input:]
scaler = MinMaxScaler()
scaler.fit(train)
train = scaler.transform(train)
test = scaler.transform(test)
#n_input = 365
n_features = 1
generator = TimeseriesGenerator(train, train, length=n_input, batch_size=1)
model = Sequential()
model.add(LSTM(200, activation='relu', input_shape=(n_input, n_features)))
model.add(Dropout(0.15))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit_generator(generator,epochs=150)
#create an empty list for each of our 12 predictions
#create the batch that our model will predict off of
#save the prediction to our list
#add the prediction to the end of the batch to be used in the next prediction
pred_list = []
batch = train[-n_input:].reshape((1, n_input, n_features))
for i in range(n_input):
pred_list.append(model.predict(batch)[0])
batch = np.append(batch[:,1:,:],[[pred_list[i]]],axis=1)
df_predict = pd.DataFrame(scaler.inverse_transform(pred_list),
index=df[-n_input:].index, columns=['Prediction'])
df_test = pd.concat([df,df_predict], axis=1)
plt.figure(figsize=(20, 5))
plt.plot(df_test.index, df_test['Minimas'])
plt.plot(df_test.index, df_test['Prediction'], color='r')
plt.legend(loc='best', fontsize='xx-large')
plt.xticks(fontsize=18)
plt.yticks(fontsize=16)
plt.show()
As you can see if you click in the image link, I get a predict too smoothed, good to see the seasonality but is not what I am looking forward.
In addition, I tried to add more layers to the neural network shown, so the network looks something like:
#n_input = 365
n_features = 1
generator = TimeseriesGenerator(train, train, length=n_input, batch_size=1)
model = Sequential()
model.add(LSTM(200, activation='relu', input_shape=(n_input, n_features)))
model.add(LSTM(128, activation='relu'))
model.add(LSTM(256, activation='relu'))
model.add(LSTM(128, activation='relu'))
model.add(LSTM(64, activation='relu'))
model.add(LSTM(n_features, activation='relu'))
model.add(Dropout(0.15))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit_generator(generator,epochs=100)
but I get this error:
ValueError: Input 0 is incompatible with layer lstm_86: expected ndim=3, found ndim=2
Of course, as the model has a bad performance I cannot assure that out-of-sample predictions would be accurate.
Why I cannot implement more layers to the network? How could I improve the performance?
You are missing one argument: return_sequences.
When you have more than one LSTM layer, you should set it to TRUE. Because, otherwise, that layer will only output the last hidden state. Add it to each LSTM layer.
model.add(LSTM(128, activation='relu', return_sequences=True))
About poor performance: my guess it is because of you have low amount of data for this application (the data seems pretty noise), add layers won't help too much.

ValueError : Input 0 of layer lstm is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: [None, 18]

I'm new with Keras and I'm trying to build a model for personal use/future learning. I've just started with python and I came up with this code (with help of videos and tutorials). I have a data of 16324 instances, each instance consists of 18 features and 1 dependent variable.
import pandas as pd
import os
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"-TEST-{int(time.time())}"
df = pd.read_csv("EntryData.csv", names=['1SH5', '1SHA', '1SA5', '1SAA', '1WH5', '1WHA', '2SA5', '2SAA', '2SH5', '2SHA', '2WA5', '2WAA', '3R1', '3R2', '3R3', '3R4', '3R5', '3R6', 'Target'])
df_val = 14554
validation_df = df[df.index > df_val]
df = df[df.index <= df_val]
train_x = df.drop(columns=['Target'])
train_y = df[['Target']]
validation_x = validation_df.drop(columns=['Target'])
validation_y = validation_df[['Target']]
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
model.compile(loss='sparse_categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
tensorboard = TensorBoard(log_dir=f'logs/{NAME}')
filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones
history = model.fit(
train_x, train_y,
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_data=(validation_x, validation_y),
callbacks=[tensorboard, checkpoint],)
score = model.evaluate(validation_x, validation_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
model.save("models/{}".format(NAME))
In line
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
is throwing an error:
ValueError: Input 0 of layer lstm is incompatible with the layer:
expected ndim=3, found ndim=2. Full shape received: [None, 18]
I was searching for solution on this site and on google for few hours now and I was not able to find proper answer for this or I was not able to implement the solution for similar problem.
Thank you for any tips.
An LSTM network expects three dimensional input of this format:
(n_samples, time_steps, features)
There are two main ways this can be a problem.
Your input is 2D
You have stacked (multiple) LSTM layers
1. Your input is 2D
You need to turn your input to 3D.
x = x.reshape(len(x), 1, x.shape[1])
# or
x = np.expand_dims(x, 1)
Then, specify the right input shape in the first layer:
LSTM(64, input_shape=(x.shape[1:]))
2. You have stacked LSTM layers
By default, LSTM layers will not return sequences, i.e., they will return 2D output. This means that the second LSTM layer will not have the 3D input it needs. To address this, you need to set the return_sequences=True:
tf.keras.layers.LSTM(8, return_sequences=True),
tf.keras.layers.LSTM(8)
Here's how to reproduce and solve the 2D input problem:
import tensorflow as tf
import numpy as np
x = np.random.rand(100, 10)
# x = np.expand_dims(x, 1) # uncomment to solve the problem
y = np.random.randint(0, 2, 100)
model = tf.keras.Sequential([
tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
history = model.fit(x, y, validation_split=0.1)
Here's how to reproduce and solve the stacked LSTM layers problem:
import tensorflow as tf
import numpy as np
x = np.random.rand(100, 1, 10)
y = np.random.randint(0, 2, 100)
model = tf.keras.Sequential([
tf.keras.layers.LSTM(8), # use return_sequences=True to solve the problem
tf.keras.layers.LSTM(8),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
history = model.fit(x, y, validation_split=0.1)

TensorFlow - Stop training when losses reach a defined value

I used the first example here as an example of network.
How to stop the training when the loss reach a fixed value ?
So, for example, I would like to fix a maximum of 3000 epochs and the training will stop when the loss will be under 0.2.
I read this topic but it is not the solution I found.
I would want to stop the training when the loss reach a value, not when there is no improvement like with this function proposed in the precedent topic.
Here is the code:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
# Generate dummy data
import numpy as np
x_train = np.random.random((1000, 20))
y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), num_classes=10)
x_test = np.random.random((100, 20))
y_test = keras.utils.to_categorical(np.random.randint(10, size=(100, 1)), num_classes=10)
model = Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(Dense(64, activation='relu', input_dim=20))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
model.fit(x_train, y_train,
epochs=3000,
batch_size=128)
score = model.evaluate(x_test, y_test, batch_size=128)
You can use some method like this if you would switch to TensorFlow 2.0:
class haltCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if(logs.get('loss') <= 0.05):
print("\n\n\nReached 0.05 loss value so cancelling training!\n\n\n")
self.model.stop_training = True
You just need to create a callback like that and then add that callback to your model.fit so it becomes something like this:
model.fit(x_train, y_train,
epochs=3000,
batch_size=128,
callbacks=['trainingStopCallback'])
This way, the fitting should stop whenever it reaches down below 0.05 (or whatever value you put on while defining it).
Also, since it's been a long time you asked this question but it still has no actual answer for using it with TensorFlow 2.0, I updated your code snippet to TensorFlow 2.0 so everyone can now easily find and use it with their new projects.
import tensorflow as tf
# Generate dummy data
import numpy as np
x_train = np.random.random((1000, 20))
y_train = tf.keras.utils.to_categorical(
np.random.randint(10, size=(1000, 1)), num_classes=10)
x_test = np.random.random((100, 20))
y_test = tf.keras.utils.to_categorical(
np.random.randint(10, size=(100, 1)), num_classes=10)
model = tf.keras.models.Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(tf.keras.layers.Dense(64, activation='relu', input_dim=20))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
class haltCallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if(logs.get('loss') <= 0.05):
print("\n\n\nReached 0.05 loss value so cancelling training!\n\n\n")
self.model.stop_training = True
trainingStopCallback = haltCallback()
sgd = tf.keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy', 'loss'])
model.fit(x_train, y_train,
epochs=3000,
batch_size=128,
callbacks=['trainingStopCallback'])
score = model.evaluate(x_test, y_test, batch_size=128)
Documentation here : EarlyStopping
keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto', baseline=None)
I solved it by doing this:
history = model.fit(training1, training2, epochs=100, verbose=True)
loss=history.history["loss"]
for x in loss:
if x <= 1:
print("Reached 1 loss value, cancelling training")
model.stop_training = True
break

Regression result in keras using python

This is a regression problem. Below is my code
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.cross_validation import cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
os.chdir(r'C:\Users\Swapnil\Desktop\RP TD\first\Changes')
## Load the dataset
dataset1 = pd.read_csv("Main Lane Plaza 1.csv")
X_train = dataset1.iloc[:,0:11].values
Y_train = dataset1.iloc[:,11].values
dataset2 = pd.read_csv("Main Lane Plaza 1_070416010117.csv")
X_test = dataset2.iloc[:,0:11].values
Y_test = dataset2.iloc[:,11].values
##Define base model
def base_model():
model = Sequential()
model.add(Dense(11, input_dim=11, kernel_initializer='normal',
activation='sigmoid'))
model.add(Dense(7, kernel_initializer='normal', activation='sigmoid'))
model.add(Dense(1, kernel_initializer='normal'))
model.compile(loss='mean_squared_error', optimizer = 'adam')
return model
seed = 7
np.random.seed(seed)
clf = KerasRegressor(build_fn=base_model, nb_epoch=100,
batch_size=5,verbose=0)
clf.fit(X_train, Y_train)
res = clf.predict(X_train)
##Result
clf.score(X_test, Y_test)
Not sure if the score should be negative??
Kindly advise if i am doing something wrong.
Thanks in advance.
I am not able to figure it out can this be problem due to feature scaling as I did feature scaling using R and saved the csv files to use in python.
When you get a negative score for regression problem, it usually means that your the model you choose can't fit your data well.
You have layer 1 activation as sigmoid, layer 2 also as sigmoid and then final layer as 1 output.
change the activations to relu, as sigmoid would be squashing the values between 0 to 1. Making the numbers really small, causing the vanishing gradient problem over the 2 hidden layer.
def base_model():
model = Sequential()
model.add(Dense(11, input_dim=11, kernel_initializer='normal', activation='relu'))
model.add(Dense(7, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
model.compile(loss='mean_squared_error', optimizer='adam')
return model

Categories

Resources