Loss does not change during training of my model - python

I want to predict a time series using cnn-lstm model.This is my model:
def generate_model():
model = keras.models.Sequential([
Conv1D(64, 3, padding='causal', activation='relu', input_shape=(24, 20)),
BatchNormalization(),
Conv1D(64, 3, padding='causal', activation='relu'),
BatchNormalization(),
Conv1D(32, 3, padding='causal', activation='relu'),
MaxPool1D(3),
LSTM(100, dropout=0.2, return_sequences=True),
LSTM(50, dropout=0.3),
Dense(1, activation='relu')
])
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss='mean_squared_error',
metrics=[tf.keras.metrics.MeanAbsoluteError(), tf.keras.metrics.RootMeanSquaredError(), RSquare()])
return model
Then I use this line of code to train my model:
history1 = model1.fit(X1_train, y1_train, epochs=200, batch_size=32, validation_data=(X1_test, y1_test), verbose=2, callbacks=callbacks)
But values of loss and metrics stays the same and does not change. This is how they look.
These are my callbacks, just in case:
from keras.callbacks import LearningRateScheduler
def decay_schedule(epoch, lr):
lr = lr - 0.0001
return lr
lr_scheduler = LearningRateScheduler(decay_schedule)
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='max', min_delta=1e-3, patience=50)
callbacks=[lr_scheduler, callback]
Thank you in advance.

Related

Keras, Google colab freezes on the last step of first epoch

The code :
from google.colab import drive
import tensorflow as tf
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Conv2D, MaxPool2D, Flatten
from tensorflow.python.keras.optimizer_v1 import Adam
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tensorflow.python.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
device_list = tf.test.gpu_device_name()
if device_list != '/device:GPU:0':
raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_list))
datagen_train = tf.keras.preprocessing.image.ImageDataGenerator()
datagen_val = tf.keras.preprocessing.image.ImageDataGenerator()
datagen_test = tf.keras.preprocessing.image.ImageDataGenerator()
size = 128
batch_size=20
tf.compat.v1.disable_eager_execution()
train_set = datagen_train.flow_from_directory("drive/MyDrive/train",
target_size = (size,size),
color_mode = "grayscale",
batch_size = batch_size,
class_mode='categorical',
shuffle=True)
val_set = datagen_val.flow_from_directory("drive/MyDrive/valid",
target_size = (size,size),
color_mode = "grayscale",
batch_size = batch_size,
class_mode='categorical',
shuffle=True)
test_set = datagen_train.flow_from_directory("drive/MyDrive/test",
target_size = (size,size),
color_mode = "grayscale",
batch_size = batch_size,
class_mode='categorical',
shuffle=True)
imgs,labels = next(test_set)
model = Sequential([
Conv2D(filters=64, kernel_size=(3,3), padding='same', activation='relu', input_shape=(128,128,1)),
MaxPool2D(pool_size =(2,2), strides=2),
Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu'),
MaxPool2D(pool_size =(2,2), strides=2),
Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu'),
MaxPool2D(pool_size =(2,2), strides=2),
Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu'),
MaxPool2D(pool_size =(2,2), strides=2),
Flatten(),
Dense(units=256, activation='relu'),
Dense(units=512, activation='relu'),
Dense(units=2, activation='softmax')
])
checkpoint = ModelCheckpoint("./model.h5", monitor = 'val_acc', verbose=1, save_best_only = True, mode='max')
earlystopping = EarlyStopping(monitor='vall_loss', min_delta=0, patience=3, verbose=1,restore_best_weights= True)
reducelearningrate = ReduceLROnPlateau(monitor='val_loss', factor=0.2,patience=3,verbose=1,min_delta=0.0001)
callbacks_list = [earlystopping,checkpoint,reducelearningrate]
ep = 30
opt = Adam(lr=0.0001)
model.summary()
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x=train_set, epochs=ep,steps_per_epoch=601, validation_data = val_set, validation_steps = 209, verbose=1)
model.save('Drowsines_Detector2.h5')
model.evaluate(x = imgs, y = labels, verbose = 2)
The program on first run in google colab takes 1hour and 30 mins in first epoch. Then if gets stuck on the first epoch step 601/601. Then by cancelling it ang rerunning it, it completes the first epoch very fast, like in 15 or 16 secs. Then sometimes it gets stuck on step 600/601. And sometimes gets stuck on step 601/601. But it does not continue to second epoch. How can I fix this.

How to evaluate an output label in tensorflow trained model

I have the following tensorflow model:\
def build_model():
model = keras.Sequential([
Dense(20, activation=tf.nn.relu, input_shape=[len(all_data[0])]),
Dense(20, activation=tf.nn.relu, input_shape=[20]),
Dense(20, activation=tf.nn.relu, input_shape=[20]),
Dense(1, activation=tf.nn.sigmoid)
])
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.binary_crossentropy,
metrics=[
tf.keras.metrics.BinaryAccuracy(name='accuracy')
]
)
return model
And I have trained the model using the following\
model = build_model()
history = model.fit(all_data, all_labels, epochs=1000)
I would like to evaluate the output on an input [0,33,1,0].
How to do this?

Keep getting NaNs value for scoring when tuning on KerasRegressor

I am trying to tune hyperparameter on the KerasRegressor
However, i only get the result of NaN's which is shown below, may i know what cause the issue?
everything works fine when i try to compile my model... but the scoring for the best parameters it always show NaNs, metrics that i used is RMSE
code snippet at below:
def create_model(optimizer,activation,lstm_unit_1,lstm_unit_2,lstm_unit_3, init='glorot_uniform'):
model = Sequential()
model.add(Conv1D(lstm_unit_1, kernel_size=1, activation=activation, input_shape = (trainX.shape[1], trainX.shape[2])))
model.add(GRU(lstm_unit_2, activation = activation, return_sequences=True, input_shape = (trainX.shape[1], trainX.shape[2])))
model.add(GRU(lstm_unit_3, activation = activation, return_sequences=True, input_shape = (trainX.shape[1], trainX.shape[2])))
model.add(Dense(units = 1))
model.add(Flatten())
model.compile(optimizer = optimizer, loss = 'mse', metrics = ['mean_squared_error'])
return model
model = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn = create_model,
epochs = 150,
verbose=False)
batch_size = [16,32,64,128]
lstm_unit_1 = [128,256,512]
lstm_unit_2 = lstm_unit_1.copy()
lstm_unit_3 = lstm_unit_1.copy()
optimizer = ['SGD','Adam','Adamax','RMSprop']
activation = ['relu','linear','sigmoid',]
param_grid = dict(lstm_unit_1=lstm_unit_1,
lstm_unit_2=lstm_unit_2,
lstm_unit_3=lstm_unit_3,
optimizer=optimizer,
activation=activation,
batch_size = batch_size)
warnings.filterwarnings("ignore")
random = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_jobs=-1, scoring='neg_mean_squared_error')
random_result = random.fit(trainX,trainY)
print(random_result.best_score_)
print(random_result.best_params_)

how to load model and restore training tensorflow

I want to load the training model and resume it from last checkpoint any can help me with that?
I am using tensorflow 2.0 . I have low specs pc so I can't train my model at once.
import tensorflow as tf
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt
from tensorflow.python.keras.metrics import acc
import datetime
from tensorflow.keras.callbacks import TensorBoard
IMAGE_SIZE = 224
CHANNELS = 3
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=10,
horizontal_flip=True
)
train_generator = train_datagen.flow_from_directory(
'data/train/',
color_mode="rgb",
target_size=(IMAGE_SIZE,IMAGE_SIZE),
batch_size=32,
class_mode="sparse",
)
print(train_generator.class_indices)
class_names = list(train_generator.class_indices.keys())
print(class_names)
validation_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=10,
horizontal_flip=True)
validation_generator = validation_datagen.flow_from_directory(
'data/validation/',
target_size=(IMAGE_SIZE,IMAGE_SIZE),
batch_size=32,
class_mode="sparse"
)
test_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=10,
horizontal_flip=True)
test_generator = test_datagen.flow_from_directory(
'data/test/',
target_size=(IMAGE_SIZE,IMAGE_SIZE),
batch_size=32,
class_mode="sparse"
)
input_shape = (IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
n_classes = 2
model = models.Sequential([
layers.InputLayer(input_shape=input_shape),
layers.Conv2D(32, kernel_size = (3,3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, kernel_size = (3,3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, kernel_size = (3,3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(n_classes, activation='softmax'),
])
model.summary()
model.compile(
optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['accuracy']
)
import os
checkpoint_path = "teta/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(
checkpoint_path,save_weights_only=True,verbose=1)
history = model.fit(
train_generator,
steps_per_epoch=30,
batch_size=32,
validation_data=validation_generator,
validation_steps=22,
verbose=1,
callbacks=[cp_callback],
epochs=2,
)
I want to load the training model and resume it from last checkpoint any can help me with that?
I am using tensorflow 2.0 . I have low specs pc so I can't train my model at once.
I would recommend saving the whole model with model.save(*) and then loading it again with model.load(*). See this documentation for more information. In your case, you can just run:
model.load_weights('teta/your_checkpoint')
before calling model.fit(*) again.

Keras : KeyError: 'val_acc'

I'm trying to do multiclass classification for 26 classes. It seems my model is not validating data, though I wrote a validation data generator apart from training data generator. Here is the code:
datagen = ImageDataGenerator(
rotation_range=0.2,
width_shift_range=0.05,
height_shift_range=0.05,
shear_range=0.05,
horizontal_flip=True,
fill_mode='nearest',
)
batch_size = 8
train_generator = datagen.flow(
train_images,
train_labels,
batch_size=batch_size,
shuffle=True,
subset='training',
seed=42)
valid_generator = datagen.flow(
val_images,
val_labels,
batch_size=batch_size,
shuffle=True,
subset='validation',
seed=42)
Here's the model:
img_rows = 256
img_cols = 256
def get_net():
inputs = Input((img_rows, img_cols, 1))
print("inputs shape:",inputs.shape)
#Convolution layers
conv1 = Conv2D(24, 3, strides=(2, 2), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
print("conv1 shape:",conv1.shape)
conv2 = Conv2D(24, 3, strides=(2, 2), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
print("conv2 shape:",conv2.shape)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv2)
print("pool1 shape:",pool1.shape)
drop1 = Dropout(0.25)(pool1)
conv3 = Conv2D(36, 3, strides=(2, 2), activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(drop1)
print("conv3 shape:",conv3.shape)
conv4 = Conv2D(36, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3)
print("conv4 shape:",conv4.shape)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv4)
print("pool2 shape:",pool2.shape)
drop2 = Dropout(0.25)(pool2)
conv5 = Conv2D(48, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(drop2)
print("conv5 shape:",conv5.shape)
conv6 = Conv2D(48, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)
print("conv6 shape:",conv6.shape)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv6)
print("pool3 shape:",pool3.shape)
drop3 = Dropout(0.25)(pool3)
#Flattening
flat = Flatten()(drop3)
#Fully connected layers
dense1 = Dense(128, activation = 'relu', use_bias=True, kernel_initializer = 'he_normal')(flat)
print("dense1 shape:",dense1.shape)
drop4 = Dropout(0.5)(dense1)
dense2 = Dense(128, activation = 'relu', use_bias=True, kernel_initializer = 'he_normal')(drop4)
print("dense2 shape:",dense2.shape)
drop5 = Dropout(0.5)(dense2)
dense4 = Dense(26, activation = 'softmax', use_bias=True, kernel_initializer = 'he_normal')(drop5)
print("dense4 shape:",dense4.shape)
#drop7 = Dropout(0.25)(dense4)
model = Model(input = inputs, output = dense4)
optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=0.00000001, decay=0.0)
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])
return model
Here's the code for training:
def train():
model = get_net()
print("got model")
model.summary()
model_checkpoint = ModelCheckpoint('seqnet.hdf5', monitor='loss',verbose=1, save_best_only=True)
print('Fitting model...')
#model.fit_generator(train_generator, validation_data=validation_generator, steps_per_epoch=len(train_generator), epochs=2)
history = model.fit_generator(
train_generator,
steps_per_epoch = len(train_generator) // batch_size,
validation_data = valid_generator,
validation_steps = len(valid_generator) // batch_size,
epochs = 50)
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
return model
model = train()
Training log with last few epochs - note it prints info only about training accuracy and training loss, no info regarding validation.
Epoch 48/50
18/18 [==============================] - 12s 639ms/step - loss: 1.8327 - acc: 0.3125
Epoch 49/50
18/18 [==============================] - 11s 604ms/step - loss: 1.7274 - acc: 0.3840
Epoch 50/50
18/18 [==============================] - 11s 609ms/step - loss: 1.5989 - acc: 0.3542
dict_keys(['acc', 'loss'])
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-72-387ae82a9daf> in <module>()
39 return model
40
---> 41 model = train()
<ipython-input-72-387ae82a9daf> in train()
21 # summarize history for accuracy
22 plt.plot(history.history['acc'])
---> 23 plt.plot(history.history['val_acc'])
24 plt.title('model accuracy')
25 plt.ylabel('accuracy')
KeyError: 'val_acc'
Here's the incomplete curve (according to my code two plots should have been produced - one is training accuracy vs validation accuracy, another is training loss vs validation loss) :

Categories

Resources