Loss does not change during training of my model

Loss does not change during training of my model - python

I want to predict a time series using cnn-lstm model.This is my model:
def generate_model():
model = keras.models.Sequential([
Conv1D(64, 3, padding='causal', activation='relu', input_shape=(24, 20)),
BatchNormalization(),
Conv1D(64, 3, padding='causal', activation='relu'),
BatchNormalization(),
Conv1D(32, 3, padding='causal', activation='relu'),
MaxPool1D(3),
LSTM(100, dropout=0.2, return_sequences=True),
LSTM(50, dropout=0.3),
Dense(1, activation='relu')
])
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss='mean_squared_error',
metrics=[tf.keras.metrics.MeanAbsoluteError(), tf.keras.metrics.RootMeanSquaredError(), RSquare()])
return model
Then I use this line of code to train my model:
history1 = model1.fit(X1_train, y1_train, epochs=200, batch_size=32, validation_data=(X1_test, y1_test), verbose=2, callbacks=callbacks)
But values of loss and metrics stays the same and does not change. This is how they look.
These are my callbacks, just in case:
from keras.callbacks import LearningRateScheduler
def decay_schedule(epoch, lr):
lr = lr - 0.0001
return lr
lr_scheduler = LearningRateScheduler(decay_schedule)
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='max', min_delta=1e-3, patience=50)
callbacks=[lr_scheduler, callback]
Thank you in advance.

Related

Keras, Google colab freezes on the last step of first epoch

The code :
from google.colab import drive
import tensorflow as tf
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Conv2D, MaxPool2D, Flatten
from tensorflow.python.keras.optimizer_v1 import Adam
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tensorflow.python.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
device_list = tf.test.gpu_device_name()
if device_list != '/device:GPU:0':
raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_list))
datagen_train = tf.keras.preprocessing.image.ImageDataGenerator()
datagen_val = tf.keras.preprocessing.image.ImageDataGenerator()
datagen_test = tf.keras.preprocessing.image.ImageDataGenerator()
size = 128
batch_size=20
tf.compat.v1.disable_eager_execution()
train_set = datagen_train.flow_from_directory("drive/MyDrive/train",
target_size = (size,size),
color_mode = "grayscale",
batch_size = batch_size,
class_mode='categorical',
shuffle=True)
val_set = datagen_val.flow_from_directory("drive/MyDrive/valid",
target_size = (size,size),
color_mode = "grayscale",
batch_size = batch_size,
class_mode='categorical',
shuffle=True)
test_set = datagen_train.flow_from_directory("drive/MyDrive/test",
target_size = (size,size),
color_mode = "grayscale",
batch_size = batch_size,
class_mode='categorical',
shuffle=True)
imgs,labels = next(test_set)
model = Sequential([
Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu', input_shape=(128,128,1)),
MaxPool2D(pool_size =(2,2), strides=2),
Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu'),
MaxPool2D(pool_size =(2,2), strides=2),
Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu'),
MaxPool2D(pool_size =(2,2), strides=2),
Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu'),
MaxPool2D(pool_size =(2,2), strides=2),
Flatten(),
Dense(units=256, activation='relu'),
Dense(units=512, activation='relu'),
Dense(units=2, activation='softmax')
])
checkpoint = ModelCheckpoint("./model.h5", monitor = 'val_acc', verbose=1, save_best_only = True, mode='max')
earlystopping = EarlyStopping(monitor='vall_loss', min_delta=0, patience=3, verbose=1,restore_best_weights= True)
reducelearningrate = ReduceLROnPlateau(monitor='val_loss', factor=0.2,patience=3,verbose=1,min_delta=0.0001)
callbacks_list = [earlystopping,checkpoint,reducelearningrate]
ep = 30
opt = Adam(lr=0.0001)
model.summary()
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x=train_set, epochs=ep,steps_per_epoch=601, validation_data = val_set, validation_steps = 209, verbose=1)
model.save('Drowsines_Detector2.h5')
model.evaluate(x = imgs, y = labels, verbose = 2)
The program on first run in google colab takes 1hour and 30 mins in first epoch. Then if gets stuck on the first epoch step 601/601. Then by cancelling it ang rerunning it, it completes the first epoch very fast, like in 15 or 16 secs. Then sometimes it gets stuck on step 600/601. And sometimes gets stuck on step 601/601. But it does not continue to second epoch. How can I fix this.

How to evaluate an output label in tensorflow trained model

I have the following tensorflow model:\
def build_model():
model = keras.Sequential([
Dense(20, activation=tf.nn.relu, input_shape=[len(all_data[0])]),
Dense(20, activation=tf.nn.relu, input_shape=[20]),
Dense(20, activation=tf.nn.relu, input_shape=[20]),
Dense(1, activation=tf.nn.sigmoid)
])
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.binary_crossentropy,
metrics=[
tf.keras.metrics.BinaryAccuracy(name='accuracy')
]
)
return model
And I have trained the model using the following\
model = build_model()
history = model.fit(all_data, all_labels, epochs=1000)
I would like to evaluate the output on an input [0,33,1,0].
How to do this?

Keep getting NaNs value for scoring when tuning on KerasRegressor

I am trying to tune hyperparameter on the KerasRegressor
However, i only get the result of NaN's which is shown below, may i know what cause the issue?
everything works fine when i try to compile my model... but the scoring for the best parameters it always show NaNs, metrics that i used is RMSE
code snippet at below:
def create_model(optimizer,activation,lstm_unit_1,lstm_unit_2,lstm_unit_3, init='glorot_uniform'):
model = Sequential()
model.add(Conv1D(lstm_unit_1, kernel_size=1, activation=activation, input_shape = (trainX.shape[1], trainX.shape[2])))
model.add(GRU(lstm_unit_2, activation = activation, return_sequences=True, input_shape = (trainX.shape[1], trainX.shape[2])))
model.add(GRU(lstm_unit_3, activation = activation, return_sequences=True, input_shape = (trainX.shape[1], trainX.shape[2])))
model.add(Dense(units = 1))
model.add(Flatten())
model.compile(optimizer = optimizer, loss = 'mse', metrics = ['mean_squared_error'])
return model
model = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn = create_model,
epochs = 150,
verbose=False)
batch_size = [16,32,64,128]
lstm_unit_1 = [128,256,512]
lstm_unit_2 = lstm_unit_1.copy()
lstm_unit_3 = lstm_unit_1.copy()
optimizer = ['SGD','Adam','Adamax','RMSprop']
activation = ['relu','linear','sigmoid',]
param_grid = dict(lstm_unit_1=lstm_unit_1,
lstm_unit_2=lstm_unit_2,
lstm_unit_3=lstm_unit_3,
optimizer=optimizer,
activation=activation,
batch_size = batch_size)
warnings.filterwarnings("ignore")
random = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_jobs=-1, scoring='neg_mean_squared_error')
random_result = random.fit(trainX,trainY)
print(random_result.best_score_)
print(random_result.best_params_)

how to load model and restore training tensorflow

I want to load the training model and resume it from last checkpoint any can help me with that?
I am using tensorflow 2.0 . I have low specs pc so I can't train my model at once.
import tensorflow as tf
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt
from tensorflow.python.keras.metrics import acc
import datetime
from tensorflow.keras.callbacks import TensorBoard
IMAGE_SIZE = 224
CHANNELS = 3
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=10,
horizontal_flip=True
)
train_generator = train_datagen.flow_from_directory(
'data/train/',
color_mode="rgb",
target_size=(IMAGE_SIZE,IMAGE_SIZE),
batch_size=32,
class_mode="sparse",
)
print(train_generator.class_indices)
class_names = list(train_generator.class_indices.keys())
print(class_names)
validation_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=10,
horizontal_flip=True)
validation_generator = validation_datagen.flow_from_directory(
'data/validation/',
target_size=(IMAGE_SIZE,IMAGE_SIZE),
batch_size=32,
class_mode="sparse"
)
test_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=10,
horizontal_flip=True)
test_generator = test_datagen.flow_from_directory(
'data/test/',
target_size=(IMAGE_SIZE,IMAGE_SIZE),
batch_size=32,
class_mode="sparse"
)
input_shape = (IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
n_classes = 2
model = models.Sequential([
layers.InputLayer(input_shape=input_shape),
layers.Conv2D(32, kernel_size = (3,3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, kernel_size = (3,3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, kernel_size = (3,3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(n_classes, activation='softmax'),
])
model.summary()
model.compile(
optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['accuracy']
)
import os
checkpoint_path = "teta/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(
checkpoint_path,save_weights_only=True,verbose=1)
history = model.fit(
train_generator,
steps_per_epoch=30,
batch_size=32,
validation_data=validation_generator,
validation_steps=22,
verbose=1,
callbacks=[cp_callback],
epochs=2,
)
I want to load the training model and resume it from last checkpoint any can help me with that?
I am using tensorflow 2.0 . I have low specs pc so I can't train my model at once.

I would recommend saving the whole model with model.save(*) and then loading it again with model.load(*). See this documentation for more information. In your case, you can just run:
model.load_weights('teta/your_checkpoint')
before calling model.fit(*) again.

Keras : KeyError: 'val_acc'

I'm trying to do multiclass classification for 26 classes. It seems my model is not validating data, though I wrote a validation data generator apart from training data generator. Here is the code:
datagen = ImageDataGenerator(
rotation_range=0.2,
width_shift_range=0.05,
height_shift_range=0.05,
shear_range=0.05,
horizontal_flip=True,
fill_mode='nearest',
)
batch_size = 8
train_generator = datagen.flow(
train_images,
train_labels,
batch_size=batch_size,
shuffle=True,
subset='training',
seed=42)
valid_generator = datagen.flow(
val_images,
val_labels,
batch_size=batch_size,
shuffle=True,
subset='validation',
seed=42)
Here's the model:
img_rows = 256
img_cols = 256
def get_net():
inputs = Input((img_rows, img_cols, 1))
print("inputs shape:",inputs.shape)
#Convolution layers
conv1 = Conv2D(24, 3, strides=(2, 2), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
print("conv1 shape:",conv1.shape)
conv2 = Conv2D(24, 3, strides=(2, 2), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
print("conv2 shape:",conv2.shape)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv2)
print("pool1 shape:",pool1.shape)
drop1 = Dropout(0.25)(pool1)
conv3 = Conv2D(36, 3, strides=(2, 2), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(drop1)
print("conv3 shape:",conv3.shape)
conv4 = Conv2D(36, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3)
print("conv4 shape:",conv4.shape)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv4)
print("pool2 shape:",pool2.shape)
drop2 = Dropout(0.25)(pool2)
conv5 = Conv2D(48, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(drop2)
print("conv5 shape:",conv5.shape)
conv6 = Conv2D(48, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)
print("conv6 shape:",conv6.shape)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv6)
print("pool3 shape:",pool3.shape)
drop3 = Dropout(0.25)(pool3)
#Flattening
flat = Flatten()(drop3)
#Fully connected layers
dense1 = Dense(128, activation = 'relu', use_bias=True, kernel_initializer = 'he_normal')(flat)
print("dense1 shape:",dense1.shape)
drop4 = Dropout(0.5)(dense1)
dense2 = Dense(128, activation = 'relu', use_bias=True, kernel_initializer = 'he_normal')(drop4)
print("dense2 shape:",dense2.shape)
drop5 = Dropout(0.5)(dense2)
dense4 = Dense(26, activation = 'softmax', use_bias=True, kernel_initializer = 'he_normal')(drop5)
print("dense4 shape:",dense4.shape)
#drop7 = Dropout(0.25)(dense4)
model = Model(input = inputs, output = dense4)
optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=0.00000001, decay=0.0)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])
return model
Here's the code for training:
def train():
model = get_net()
print("got model")
model.summary()
model_checkpoint = ModelCheckpoint('seqnet.hdf5', monitor='loss',verbose=1, save_best_only=True)
print('Fitting model...')
#model.fit_generator(train_generator, validation_data=validation_generator, steps_per_epoch=len(train_generator), epochs=2)
history = model.fit_generator(
train_generator,
steps_per_epoch = len(train_generator) // batch_size,
validation_data = valid_generator,
validation_steps = len(valid_generator) // batch_size,
epochs = 50)
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
return model
model = train()
Training log with last few epochs - note it prints info only about training accuracy and training loss, no info regarding validation.
Epoch 48/50
18/18 [==============================] - 12s 639ms/step - loss: 1.8327 - acc: 0.3125
Epoch 49/50
18/18 [==============================] - 11s 604ms/step - loss: 1.7274 - acc: 0.3840
Epoch 50/50
18/18 [==============================] - 11s 609ms/step - loss: 1.5989 - acc: 0.3542
dict_keys(['acc', 'loss'])
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-72-387ae82a9daf> in <module>()
39 return model
40
---> 41 model = train()
<ipython-input-72-387ae82a9daf> in train()
21 # summarize history for accuracy
22 plt.plot(history.history['acc'])
---> 23 plt.plot(history.history['val_acc'])
24 plt.title('model accuracy')
25 plt.ylabel('accuracy')
KeyError: 'val_acc'
Here's the incomplete curve (according to my code two plots should have been produced - one is training accuracy vs validation accuracy, another is training loss vs validation loss) :

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Loss does not change during training of my model - python

Related

Keras, Google colab freezes on the last step of first epoch

How to evaluate an output label in tensorflow trained model

Keep getting NaNs value for scoring when tuning on KerasRegressor

how to load model and restore training tensorflow

Keras : KeyError: 'val_acc'

Categories

Resources