DNN model suddenly not converging (having low loss and not changing much)

DNN model suddenly not converging (having low loss and not changing much) - python

This is my code below.
I worked with the exact same model and it converged well when I didn't scaled the data.
The loss started somewhere around 1.0 and went down to 0.45.
However, after rewriting my code the model loss is bit weird. It starts from 0.4 and not converging. I'm wondering if I introduced any errors in the code while rewriting my code. Can someone please help me where I did it wrong??
label = np.array(df['label'])
features = np.array(df.iloc[:, 0:8])
X_train, X_rem, y_train, y_rem = train_test_split(features, label, test_size=0.3, random_state=seed)
X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, test_size=0.5, random_state=seed)
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
def DNN():
input_layer = Input(shape=(8,), name='input_layer')
Dense_1 = Dense(64, activation = 'relu', kernel_initializer = HeUniform(seed =1))(input_layer)
Dense_2 = Dense(32, activation = 'relu', kernel_initializer = HeUniform(seed =1))(Dense_1)
output_layer = Dense(1, activation='sigmoid', kernel_initializer = GlorotUniform(seed =1))(Dense_2)
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['acc', 'Recall', 'AUC'])
return model
model = DNN()
my_callbacks = [
EarlyStopping(patience=10),
ReduceLROnPlateau(monitor="val_loss", factor=0.3, patience= 5, cooldown = 3, mode="min", min_lr=0.0001, verbose =1),
ModelCheckpoint(monitor="val_loss", save_best_only = True, filepath = './checkpoint'),
TensorBoard(log_dir='./logs')
]
history = model.fit(X_train, y_train, batch_size=64, epochs=150, validation_data=(X_val, y_val), callbacks=my_callbacks)
This outputs:

Related

K-fold cross validation for Keras Neural Network

Hi have already tuned my hyperparameters and would like to perfrom kfold cross validation for my model. I have being looking around for different methods it won't seem to work for me. The code is here below:
tf.get_logger().setLevel(logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Set random seeds for repeatable results
RANDOM_SEED = 3
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)
classes_values = [ "nearmiss", "normal" ]
classes = len(classes_values)
Y = tf.keras.utils.to_categorical(Y - 1, classes)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1)
input_length = X_train[0].shape[0]
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
validation_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test))
def get_reshape_function(reshape_to):
def reshape(image, label):
return tf.reshape(image, reshape_to), label
return reshape
callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3,mode="auto")
model = Sequential()
model.add(Dense(200, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(44, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(68, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(44, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(classes, activation='softmax', name='y_pred'))
# this controls the learning rate
opt = Adam(learning_rate=0.0002, beta_1=0.9, beta_2=0.999)
# this controls the batch size, or you can manipulate the tf.data.Dataset objects yourself
BATCH_SIZE = 32
train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=False)
validation_dataset = validation_dataset.batch(BATCH_SIZE, drop_remainder=False)
# train the neural network
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
history=model.fit(train_dataset, epochs=50, validation_data=validation_dataset, verbose=2, callbacks=callbacks)
model.test_on_batch(X_test, Y_test)
model.metrics_names
# Use this flag to disable per-channel quantization for a model.
# This can reduce RAM usage for convolutional models, but may have
# an impact on accuracy.
disable_per_channel_quantization = False
Appericate if someone could guide me on this as I am very new to TensorFlow and neural network

I haven't tested it, but this should roughly be what you want. You use the sklearn KFold method to split the dataset into different folds, and then you simply fit the model on the current fold.
tf.get_logger().setLevel(logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Set random seeds for repeatable results
RANDOM_SEED = 3
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)
classes_values = [ "nearmiss", "normal" ]
classes = len(classes_values)
Y = tf.keras.utils.to_categorical(Y - 1, classes)
def get_reshape_function(reshape_to):
def reshape(image, label):
return tf.reshape(image, reshape_to), label
return reshape
callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3,mode="auto")
def create_model():
model = Sequential()
model.add(Dense(200, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(44, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(68, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(44, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(classes, activation='softmax', name='y_pred'))
return model
# this controls the learning rate
opt = Adam(learning_rate=0.0002, beta_1=0.9, beta_2=0.999)
# this controls the batch size, or you can manipulate the tf.data.Dataset objects yourself
BATCH_SIZE = 32
kf = KFold(n_splits=5)
kf.get_n_splits(X)
# Loop over the dataset to create seprate folds
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = Y[train_index], Y[test_index]
input_length = X_train[0].shape[0]
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
validation_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=False)
validation_dataset = validation_dataset.batch(BATCH_SIZE, drop_remainder=False)
# Create a new model instance
model = create_model()
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
# train the model on the current fold
history=model.fit(train_dataset, epochs=50, validation_data=validation_dataset, verbose=2, callbacks=callbacks)
model.test_on_batch(X_test, y_test)

Error when checking target: expected dense_192 to have 3 dimensions, but got array with shape (37118, 1)

Dear all: I'm very new to deep learning. I was trying to add a for loop to test all the possible combinations to get the best result. Currently what I have is the following.
def coeff_determination(y_true, y_pred):
SS_res = K.sum(K.square( y_true-y_pred ))
SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
return ( 1 - SS_res/(SS_tot + K.epsilon()) )
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
x_train = x_train.to_numpy()
x_test = x_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
(37118, 105)
(37118,)
(15908, 105)
(15908,)
timesteps = 3
features = 35 #this is the number of features
x_train = x_train.reshape((x_train.shape[0], timesteps, features))
x_test = x_test.reshape((x_test.shape[0], timesteps, features))
dense_layers=[0, 1, 2]
layer_sizes=[32, 64, 128]
LSTM_layers=[1,2,3]
for dense_layer in dense_layers:
for layer_size in layer_sizes:
for LSTM_layer in LSTM_layers:
NAME="{}-lstm-{}-nodes-{}-dense-{}".format(LSTM_layer, layer_size, dense_layer, int(time.time()))
tensorboard = TensorBoard(log_dir=f"LSTM_logs\\{NAME}")
print(NAME)
model = Sequential()
model.add(LSTM(layer_size, input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True))
for i in range(LSTM_layer-1):
model.add(LSTM(layer_size, input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True))
for i in range(dense_layer):
model.add(Dense(layer_size))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam',metrics=[coeff_determination])
epochs = 10
result = model.fit(x_train, y_train, epochs=epochs, batch_size=72, validation_data=(x_test, y_test), verbose=2, shuffle=False)
However, a got a traceback says the following
ValueError: Error when checking target: expected dense_192 to have 3 dimensions, but got array with shape (37118, 1)
and the error occurs in the following line.
---> 19 result = model.fit(x_train, y_train, epochs=epochs, batch_size=72, validation_data=(x_test, y_test), verbose=2, shuffle=False)
Could anyone please kindly give me some hint regarding how to solve the problem. Thanks a lot for your time and support.
Sincerely
Wilson

Use return_sequence = False for your last LSTM layer so it only returns a vector with the last hidden state.
Sincerely,
Alexander
more details: How to use return_sequences option and TimeDistributed layer in Keras?

Incompatible shapes in output layer - Tensorflow

I am trying to build a bi-LSTM model in tensorflow, environment google colab. In the training process, the model have an issue: the last layer says that there is shape incompatibility. I wonder if there is any way to reshape the x_train and y_train, to fix this problem
Traceback
ValueError: Shapes (16, 11) and (16, 10) are incompatible
If I change the value of the neurons units to my output layer, from 11 to 10, it does not give any error and the model can be trained. However, I want the output to be 10 and not 11.
# current output layer (run perfectly)
tf.keras.layers.Dense (11, activation = 'softmax')
# expected output layer (shape incompatibility)
tf.keras.layers.Dense (10, activation = 'softmax')
BiLSTM Model
def build_model(vocab_size, embedding_dim=64, input_length=30):
print('\nbuilding the model...\n')
model = tf.keras.Sequential([
tf.keras.layers.Embedding(input_dim=(vocab_size + 1), output_dim=embedding_dim, input_length=input_length),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(rnn_units,return_sequences=True, dropout=0.2)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(rnn_units,return_sequences=True, dropout=0.2)),
tf.keras.layers.GlobalMaxPool1D(),
tf.keras.layers.Dropout(0.1),
tf.keras.layers.Dense(64, activation='tanh'),
# softmax output layer
tf.keras.layers.Dense(10, activation='softmax')
])
# optimizer & loss
opt = 'RMSprop' #tf.optimizers.Adam(learning_rate=1e-4)
loss = 'categorical_crossentropy'
# Metrics
metrics = ['accuracy', 'AUC','Precision', 'Recall']
# compile model
model.compile(optimizer=opt,
loss=loss,
metrics=metrics)
model.summary()
return model
The BATCH_SIZE is set to 16. And the shapes of y_train and x_train are:
x_train.shape
(800, 30)
y_train.shape
(800,)
Training
def train(model, x_train, y_train, x_validation, y_validation,
epochs, batch_size=32, patience=5,
verbose=2, monitor_es='accuracy', mode_es='auto', restore=True,
monitor_mc='val_accuracy', mode_mc='max'):
print('\ntraining...\n')
# callback
early_stopping = tf.keras.callbacks.EarlyStopping(monitor=monitor_es,
verbose=1, mode=mode_es, restore_best_weights=restore,
min_delta=1e-3, patience=patience)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('tfjsmode.h5', monitor=monitor_mc, mode=mode_mc,
verbose=1, save_best_only=True)
# Define Tensorboard as a Keras callback
tensorboard = TensorBoard(
log_dir='./logs',
histogram_freq=1,
write_images=True
)
keras_callbacks = [tensorboard, early_stopping, model_checkpoint]
# train model
history = model.fit(x_train, y_train,
batch_size=batch_size, epochs=epochs, verbose=verbose,
validation_data=(x_validation, y_validation),
callbacks=keras_callbacks)
return history
Preprocessing
def preprocess(x, padding_shape=30):
return np.array([ord(i.lower()) - ord('a')+1 if not i.isdigit() and i != ' ' else 0 for i in list(x)] + ([0] * (padding_shape - len(x))), dtype=int)
def prepare_dataset(labeldict : dict, test_size=.3, validation_size=.1):
print('preparing the dataset...\n')
from sklearn import preprocessing
# load dataset
# split dataset (as string into panda.core.series.Serie object)
x, y = load_clean_dataset()
x = np.array(list(map(preprocess, x)))
y = np.array(list(map(lambda x: labeldict[x.replace(' ', '_')], y)))
print(('y: {}').format(y))
# create/split train, validation and test and shuffle the data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size, shuffle=True)
print(x.max(), x.min())
x_train_val, x_validation, y_train_val, y_validation = train_test_split(x_train, y_train, test_size=test_size, shuffle=True)
# pandas.core.series.Series to numpy array
x_train, y_train = np.array(x_train), np.array(y_train)
x_validation, y_validation = np.array(x_validation), np.array(y_validation)
x_test, y_test = np.array(x_test), np.array(y_test)
x_train_val, y_train_val = np.array(x_train_val), np.array(y_train_val)
print(('\nx_train: \n{}\n\ny_train: \n{}').format(x_train_val, y_train_val))
y_train = tf.keras.utils.to_categorical(y, num_classes=10)
return (x_train, y_train), (x_validation, y_validation), (x_test, y_test), (x_train_val, y_train_val)

It seems you currently have labels as integers (i.e. not one-hot encoded vectors). For example your y seems to be like,
[0, 1, 8, 9, ....] # a vector of 800 elements
There's two ways to train a model on such data.
Alternative 1 (easiest I guess)
Use sparse_categorical_crossentropy as the loss function of the model
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=metrics)
Alternative 2
Convert your labels to one-hot encoded using,
y_onehot = tf.keras.utils.to_categorical(y, num_classes=10)
and then keep the loss of the model as categorical_crossentropy

is it impossible or possible forecast time series for next value using LSTM in Keras?

I have x_train 62796 and x_test 15684 and I want to predict the values after that. I seek your advice to forecast the values after that using LSTM in Keras. Here is my code :
...
look_back = 20
train_size = int(len(data) * 0.80)
test_size = len(data) - train_size
train = data[0:train_size]
test = data[train_size:len(data)]
x_train, y_train = create_dataset(train, look_back)
x_test, y_test = create_dataset(test, look_back)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
y_train=np.repeat(y_train.reshape(-1,1), 20, axis=1).reshape(-1,20,1)
y_test=np.repeat(y_test.reshape(-1,1), 20, axis=1).reshape(-1,20,1)
...
model = Sequential()
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(1, return_sequences=True))
model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, epochs=10, batch_size=64)
p = model.predict(x_test)
So, predictions = model.predict(x_train) and shape is (62796, 20, 1)
I tried this code
future = []
currentStep = predictions[-20:, :, :] # -20 is last look_back number
for i in range(10):
currentStep = model.predict(currentStep)
future.append(currentStep)
In this code future result is :
but p = model.predict(x_test)'s [:4000] result is :
I want to know how to predict the exact next value. But, The difference between the two results is very large. I don't know where it went wrong or the code went wrong. Here is full source.

how can I predict the next value in keras?

here is my code
...
look_back = 20
train_size = int(len(data) * 0.80)
test_size = len(data) - train_size
train = data[0:train_size]
test = data[train_size:len(data)]
x_train, y_train = create_dataset(train, look_back)
x_test, y_test = create_dataset(test, look_back)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
y_train=np.repeat(y_train.reshape(-1,1), 20, axis=1).reshape(-1,20,1)
y_test=np.repeat(y_test.reshape(-1,1), 20, axis=1).reshape(-1,20,1)
...
model = Sequential()
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(1, return_sequences=True))
model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, epochs=10, batch_size=64)
p = model.predict(x_test)
and I want to predict the next value So,
predictions = model.predict(x_train) and shape is (62796, 20, 1)
and I coded the following site how to use the Keras model to forecast for future dates or events?
future = []
currentStep = predictions[-20:, :, :] # -20 is last look_back number
for i in range(10):
currentStep = model.predict(currentStep)
future.append(currentStep)
in this code future's result is
but p = model.predict(x_test)'s [:4000] result is
I want to know how to predict the exact next value.
But, The difference between the two results is very large.
what is the right way to Predict the next value??
I don't know where it went wrong or the code went wrong.
I hope for your opinion.
full source is https://gist.github.com/Lay4U/654f70bd1fb9c4f7d5bdb21ddcb588ab

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

DNN model suddenly not converging (having low loss and not changing much) - python

Related

K-fold cross validation for Keras Neural Network

Error when checking target: expected dense_192 to have 3 dimensions, but got array with shape (37118, 1)

Incompatible shapes in output layer - Tensorflow

is it impossible or possible forecast time series for next value using LSTM in Keras?

how can I predict the next value in keras?

Categories

Resources