Keras model.fit() showing loss as nan - python

I am trying to train my model for Instrument Detection. The output is displaying as loss: nan from the first epoch. I tried to change the loss function, activation function, and add some regularisation like Dropout, but it didn't affect the result.
Here is the code:
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Dropout
from keras.optimizers import Adam
import pickle
import os
import numpy as np
from sklearn.model_selection import train_test_split
def one_hot_encoding(target):
Instruments = ['vio', 'pia', 'tru', 'flu']
enc_tar = np.zeros([len(target), 4])
for i in range(len(target)):
enc_tar[i][Instruments.index(target[i])] = 1
return enc_tar
def create_model_cnn(inp_shape):
classifier = Sequential()
classifier.add(Conv2D(25, kernel_size = 3, activation = 'relu', input_shape = inp_shape))
classifier.add(Conv2D(10, kernel_size = 3, activation = 'relu'))
classifier.add(Flatten())
classifier.add(Dense(4, activation = 'softmax'))
adam = Adam(0.001)
classifier.compile(optimizer = adam, loss = 'categorical_crossentropy', metrics = ['accuracy'])
return classifier
def create_model_mlp(inp_shape):
classifier = Sequential()
classifier.add(Dense(22, activation = 'softsign', input_shape = (42,)))
classifier.add(Dropout(0.25))
classifier.add(Dense(10, activation = 'softsign'))
classifier.add(Dropout(0.25))
classifier.add(Dense(4, activation = 'softmax'))
adam = Adam(0.0001)
classifier.compile(optimizer = adam, loss = 'categorical_crossentropy', metrics = ['accuracy'])
return classifier
def get_weights(classifier):
return classifier.get_weights()
def set_weights(classifier, weights):
classifier.set_weights(weights)
return classifier
def train_model(classifier, data, target, epoch = 40):
classifier.fit(data, target, epochs = epoch, validation_split=0.4, batch_size = 32, verbose = 1)
return classifier
def predict(classifier, data):
return classifier.predict(data)
if __name__ == '__main__':
#Get the data and the target
[data, target] = pickle.load(open('../input/music-features/feat_targ.pickle', 'rb'))
#if 'model.pickle' not in os.listdir():
#Generate the classifiers
cnn_classifier = create_model_cnn((6, 7, 1))
mlp_classifier = create_model_mlp((42))
# else:
# #Load the existing model (from a pickle dump)
# classifier = pickle.load(open('model.pickle', 'rb'))
tr_data, tst_data, tr_target, tst_target = train_test_split(data, target)
tr_data_lin = np.array(tr_data)
tr_data = tr_data_lin.reshape((tr_data_lin.shape[0], 6, 7, 1))
tst_data_lin = np.array(tst_data)
tst_data = tst_data_lin.reshape((tst_data_lin.shape[0], 6, 7, 1))
enc_target = one_hot_encoding(tr_target)
#print(tr_data, enc_target)
# train_model(cnn_classifier, tr_data, enc_target)
train_model(mlp_classifier, tr_data_lin, enc_target)
# pickle.dump([cnn_classifier, mlp_classifier], open('model.pickle', 'wb'))
The training data and the test data are from the pickle file where the shape is (15000, 42).

Related

Keep getting NaNs value for scoring when tuning on KerasRegressor

I am trying to tune hyperparameter on the KerasRegressor
However, i only get the result of NaN's which is shown below, may i know what cause the issue?
everything works fine when i try to compile my model... but the scoring for the best parameters it always show NaNs, metrics that i used is RMSE
code snippet at below:
def create_model(optimizer,activation,lstm_unit_1,lstm_unit_2,lstm_unit_3, init='glorot_uniform'):
model = Sequential()
model.add(Conv1D(lstm_unit_1, kernel_size=1, activation=activation, input_shape = (trainX.shape[1], trainX.shape[2])))
model.add(GRU(lstm_unit_2, activation = activation, return_sequences=True, input_shape = (trainX.shape[1], trainX.shape[2])))
model.add(GRU(lstm_unit_3, activation = activation, return_sequences=True, input_shape = (trainX.shape[1], trainX.shape[2])))
model.add(Dense(units = 1))
model.add(Flatten())
model.compile(optimizer = optimizer, loss = 'mse', metrics = ['mean_squared_error'])
return model
model = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn = create_model,
epochs = 150,
verbose=False)
batch_size = [16,32,64,128]
lstm_unit_1 = [128,256,512]
lstm_unit_2 = lstm_unit_1.copy()
lstm_unit_3 = lstm_unit_1.copy()
optimizer = ['SGD','Adam','Adamax','RMSprop']
activation = ['relu','linear','sigmoid',]
param_grid = dict(lstm_unit_1=lstm_unit_1,
lstm_unit_2=lstm_unit_2,
lstm_unit_3=lstm_unit_3,
optimizer=optimizer,
activation=activation,
batch_size = batch_size)
warnings.filterwarnings("ignore")
random = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_jobs=-1, scoring='neg_mean_squared_error')
random_result = random.fit(trainX,trainY)
print(random_result.best_score_)
print(random_result.best_params_)

How to set the data dimension for GridSearchCV

def rnn_model(self,activation="relu"):
in_out_neurons = 50
n_hidden = 512
model = Sequential()
model.add(LSTM(n_hidden, batch_input_shape=(None, self.seq_len, in_out_neurons), return_sequences=True))
model.add(Dense(in_out_neurons, activation=activation))
optimizer = Adam(learning_rate=0.001)
model.compile(loss="mean_squared_error", optimizer=optimizer)
model.summary()
return model
# then try to fit the model
final_x = np.zeros((319083, 2, 50))
final_y = np.zeros((319083, 1, 50))
# this works.
model = self.rnn_model()
model.fit(
final_x,final_y,
batch_size=400,
epochs=10,
validation_split=0.1
)
#However, when I trid to use hyperparameter sarch, this shows the error `ValueError: Invalid shape for y: (319083, 1, 50)`
activation = ["relu","sigmoid"]
model = KerasClassifier(build_fn=self.rnn_model,verbose=0)
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model,param_grid=param_grid)
grid_result= grid.fit(final_x,final_y)
How dimension changes when using GridSearchCV
You should be using a KerasRegressor, since your model is not a classifier in that sense:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasRegressor
def rnn_model(activation="relu"):
in_out_neurons = 50
n_hidden = 512
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(n_hidden, batch_input_shape=(None, 2, in_out_neurons), return_sequences=True))
model.add(tf.keras.layers.Dense(in_out_neurons, activation=activation))
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss="mean_squared_error", optimizer=optimizer)
model.summary()
return model
final_x = np.zeros((319083, 2, 50))
final_y = np.zeros((319083, 2, 50))
model = rnn_model()
activation = ["relu","sigmoid"]
model = KerasRegressor(build_fn=rnn_model,verbose=0)
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result= grid.fit(final_x,final_y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) # run with a way smaller dataset
Best: 0.000000 using {'activation': 'relu'}

How to solve error "ValueError: No gradients provided for any variable" when building CNN for autoencoder?

I'm trying to build a CNN model for Autoencoder. My code contains 3 functions:
Autoencoder contains the layers of my CNN.
make_autoencoder_model initializes the model.
fit_model_on_cifar10 to fit the model on cifar10.
I encounter the error:
ValueError: No gradients provided for any variable: ['conv2d_38/kernel:0', 'conv2d_38/bias:0', 'batch_normalization_34/gamma:0', 'batch_normalization_34/beta:0', 'conv2d_39/kernel:0', 'conv2d_39/bias:0', 'batch_normalization_35/gamma:0', 'batch_normalization_35/beta:0', 'dense_16/kernel:0', 'dense_16/bias:0', 'conv2d_40/kernel:0', 'conv2d_40/bias:0', 'batch_normalization_36/gamma:0', 'batch_normalization_36/beta:0', 'conv2d_41/kernel:0', 'conv2d_41/bias:0', 'batch_normalization_37/gamma:0', 'batch_normalization_37/beta:0', 'dense_17/kernel:0', 'dense_17/bias:0'].
Could you please elaborate on this error and how to fix it?
from keras.datasets import mnist, fashion_mnist, cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import Conv2D, Dense, Dropout, MaxPooling2D, Flatten, Input, Reshape, BatchNormalization, UpSampling2D
import numpy as np
from keras.optimizers import SGD, Adam
##################################### Lenet-like
filters = (64, 32)
image_shape = (32, 32, 3)
class Autoencoder:
"""
Autoencoder architecture.
"""
def __init__(self):
"""
Architecture settings.
"""
# nothing to do in the init.
def __call__(self, X):
"""
Call autoencoder layers on the inputs.
"""
# encode
for f in filters:
X = Conv2D(filters = f, kernel_size = (3, 3), activation = 'relu')(X)
X = MaxPooling2D(pool_size = (2, 2), strides = (2, 2), padding = 'valid')(X)
X = BatchNormalization(axis = -1)(X)
X = Flatten()(X)
X = Dense(units = 8 * 8 * 3, activation = 'relu')(X)
X = Reshape(target_shape = (8, 8, 3))(X)
# decode
for f in filters[::-1]:
X = Conv2D(filters = f, kernel_size = (3, 3), activation = 'relu')(X)
X = UpSampling2D(size = (2, 2))(X)
X = BatchNormalization(axis = -1)(X)
X = Flatten()(X)
X = Dense(units = np.prod(image_shape), activation = 'sigmoid')(X)
Y = Reshape(target_shape = image_shape)(X)
return Y
###################################
def make_autoencoder_model():
"""
Create and compile autoencoder keras model.
"""
X = Input(shape = image_shape)
Y = Autoencoder()(X)
model = Model(inputs = X, outputs = Y)
model.compile(optimizer = 'adam',
metrics = ['accuracy'],
loss = 'mean_squared_error')
return model
######################################
def fit_model_on_cifar10(n_epochs = 3,
batch_size = 128,
visualization_size = 5,
verbose = 1):
# create your model and call it on your dataset
model = make_autoencoder_model()
# create a Keras ImageDataGenerator to handle your dataset
datagen = ImageDataGenerator(horizontal_flip = True)
if verbose > 0:
print(model.summary())
(x_train, _), (x_test, _) = cifar10.load_data()
# Be sure that your training/test data is 'float32' and between 0 and 1 (pixel image value)
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
try:
history = model.fit(datagen.flow(x_train, batch_size = batch_size),
epochs = n_epochs)
except KeyboardInterrupt:
print("Training interrupted!")
###################################
fit_model_on_cifar10()

trying to pickle ML model can't pickle _thread.RLock objects in google colab

I am training a MNIST dataset using CNN in google colab and want to save the model using pickle and when i try saving the model I get the error can't pickle _thread.RLock objects
My Code
import pickle
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import keras
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D , MaxPooling2D, Dense, Flatten,Dropout
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
testRatio = 0.2
valRatio = 0.2
imageDimensions = (28,28,3)
batchSizeVal = 50
EPOCHS = 2
stepsPerEpoch = 2000
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_validation , y_train , y_validation = train_test_split(X_train, y_train, test_size= valRatio)
X_train = X_train.reshape((48000, 28, 28, 1))
X_test = X_test.reshape((10000, 28, 28, 1))
X_validation = X_validation.reshape((12000, 28, 28, 1))
dataGen = ImageDataGenerator(width_shift_range = 0.1,
height_shift_range = 0.1,
zoom_range = 0.2,
shear_range = 0.1,
rotation_range= 10)
dataGen.fit(X_train)
y_train = to_categorical(y_train,10)
y_test= to_categorical(y_test,10)
y_validation = to_categorical(y_validation,10)
def myModel():
noOfFiters = 60
sizeOfFilter1 = (5,5)
sizeOfFilter2 = (3,3)
sizeOfPool = (2,2)
noOfNode = 500
model = Sequential()
model.add((Conv2D(noOfFiters, sizeOfFilter1,input_shape=(imageDimensions[0]
,imageDimensions[1],
1),
activation = "relu")))
model.add((Conv2D(noOfFiters, sizeOfFilter1, activation = "relu")))
model.add(MaxPooling2D(pool_size=sizeOfPool))
model.add((Conv2D(noOfFiters//2, sizeOfFilter2, activation = "relu")))
model.add((Conv2D(noOfFiters//2, sizeOfFilter2, activation = "relu")))
model.add(MaxPooling2D(pool_size=sizeOfPool))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(noOfNode,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10,activation='softmax'))
model.compile(Adam(lr=0.001),loss='categorical_crossentropy',
metrics=['accuracy'])
return model
model = myModel()
history = model.fit(dataGen.flow(X_train, y_train,
batch_size= batchSizeVal),
steps_per_epoch = stepsPerEpoch,
epochs =EPOCHS,
validation_data = (X_validation,y_validation),
shuffle= True)
plt.figure(1)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['training ', 'validation'])
plt.title("Loss")
plt.xlabel('epoch')
plt.figure(2)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['training ', 'validation'])
plt.title("Accuracy")
plt.xlabel('epoch')
plt.show()
score = model.evaluate(X_test,y_test,verbose=0)
print("Test Score = ",score[0])
print("Test Accuracy = ",score[1])
pickle_out = open("model_trained.pickle","wb" )
model = pickle.dump(model,pickle_out)
pickle_out.close()
What should I do to get it working.
I tried to change the runtime to cpu because I thought this is causing by gpu but even then it is not working
Keras doesn't support Pickle to serialize its objects (Models). Basically, if an object has __getstate__ and __setstate__ methods, pickle will use them to serialize the object. The problem is that Keras Model doesn't implement these.
#Zach Moshe, proposed hotfix for solving this issue. For more details please refer his blog.
# Hotfix function
def make_keras_picklable():
def __getstate__(self):
model_str = ""
with tempfile.NamedTemporaryFile(suffix='.hdf5', delete=True) as fd:
save_model(self, fd.name, overwrite=True)
model_str = fd.read()
d = {'model_str': model_str}
return d
def __setstate__(self, state):
with tempfile.NamedTemporaryFile(suffix='.hdf5', delete=True) as fd:
fd.write(state['model_str'])
fd.flush()
model = load_model(fd.name)
self.__dict__ = model.__dict__
cls = Model
cls.__getstate__ = __getstate__
cls.__setstate__ = __setstate__
# Run the function
make_keras_picklable()
Please refer working code in below
import pickle
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import keras
from keras.datasets import mnist
from keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model, save_model, Model
from keras.layers import Conv2D , MaxPooling2D, Dense, Flatten,Dropout
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import tempfile
testRatio = 0.2
valRatio = 0.2
imageDimensions = (28,28,3)
batchSizeVal = 50
EPOCHS = 2
stepsPerEpoch = 2000
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train, X_validation , y_train , y_validation = train_test_split(X_train, y_train, test_size= valRatio)
X_train = X_train.reshape((48000, 28, 28, 1))
X_test = X_test.reshape((10000, 28, 28, 1))
X_validation = X_validation.reshape((12000, 28, 28, 1))
# Hotfix function
def make_keras_picklable():
def __getstate__(self):
model_str = ""
with tempfile.NamedTemporaryFile(suffix='.hdf5', delete=True) as fd:
save_model(self, fd.name, overwrite=True)
model_str = fd.read()
d = {'model_str': model_str}
return d
def __setstate__(self, state):
with tempfile.NamedTemporaryFile(suffix='.hdf5', delete=True) as fd:
fd.write(state['model_str'])
fd.flush()
model = load_model(fd.name)
self.__dict__ = model.__dict__
cls = Model
cls.__getstate__ = __getstate__
cls.__setstate__ = __setstate__
# Run the function
make_keras_picklable()
dataGen = ImageDataGenerator(width_shift_range = 0.1,
height_shift_range = 0.1,
zoom_range = 0.2,
shear_range = 0.1,
rotation_range= 10)
dataGen.fit(X_train)
y_train = to_categorical(y_train,10)
y_test= to_categorical(y_test,10)
y_validation = to_categorical(y_validation,10)
def myModel():
noOfFiters = 60
sizeOfFilter1 = (5,5)
sizeOfFilter2 = (3,3)
sizeOfPool = (2,2)
noOfNode = 500
model = Sequential()
model.add((Conv2D(noOfFiters, sizeOfFilter1,input_shape=(imageDimensions[0]
,imageDimensions[1],
1),
activation = "relu")))
model.add((Conv2D(noOfFiters, sizeOfFilter1, activation = "relu")))
model.add(MaxPooling2D(pool_size=sizeOfPool))
model.add((Conv2D(noOfFiters//2, sizeOfFilter2, activation = "relu")))
model.add((Conv2D(noOfFiters//2, sizeOfFilter2, activation = "relu")))
model.add(MaxPooling2D(pool_size=sizeOfPool))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(noOfNode,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10,activation='softmax'))
model.compile(Adam(lr=0.001),loss='categorical_crossentropy',
metrics=['accuracy'])
return model
model = myModel()
history = model.fit(dataGen.flow(X_train, y_train,
batch_size= batchSizeVal),
steps_per_epoch = X_train.shape[0]//batchSizeVal,
epochs =EPOCHS,
validation_data = (X_validation,y_validation),
shuffle= True)
score = model.evaluate(X_test,y_test,verbose=0)
print("Test Score = ",score[0])
print("Test Accuracy = ",score[1])
with open('model.pkl', 'wb') as f:
pickle.dump(model, f)
Output:
Epoch 1/2
960/960 [==============================] - 338s 352ms/step - loss: 1.0066 - accuracy: 0.6827 - val_loss: 0.1417 - val_accuracy: 0.9536
Epoch 2/2
960/960 [==============================] - 338s 352ms/step - loss: 0.3542 - accuracy: 0.8905 - val_loss: 0.0935 - val_accuracy: 0.9719
Test Score = 0.07476004958152771
Test Accuracy = 0.9761999845504761

RNN: Get prediction from a text input after the model is trained

I am new to RNNs and I have been working on a small binary label classifier. I have been able to get a stable model with satisfactory results.
However, I am having a hard time using the model to classify new inputs and I was wondering if any of you could help me. Please see my code below for reference.
Thank you very much.
from tensorflow.keras import preprocessing
from sklearn.utils import shuffle
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras import models
from tensorflow.keras.layers import LSTM, Activation, Dense, Dropout, Input,
Embedding
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.preprocessing import sequence, text
from tensorflow.keras.callbacks import EarlyStopping
from matplotlib import pyplot
class tensor_rnn():
def __init__(self, hidden_layers=3):
self.data_path = 'C:\\\\Users\\cmazz\\PycharmProjects\\InvestmentAnalysis_2.0\\Sentiment\\Finance_Articles\\'
# self.corp_paths = corpora_paths
self.h_layers = hidden_layers
self.num_words = []
good = pd.read_csv(self.data_path + 'GoodO.csv')
good['Polarity'] = 'pos'
for line in good['Head'].tolist():
counter = len(line.split())
self.num_words.append(counter)
bad = pd.read_csv(self.data_path + 'BadO.csv')
bad['Polarity'] = 'neg'
for line in bad['Head'].tolist():
counter = len(line.split())
self.num_words.append(counter)
self.features = pd.concat([good, bad]).reset_index(drop=True)
self.features = shuffle(self.features)
self.max_len = len(max(self.features['Head'].tolist()))
# self.train, self.test = train_test_split(features, test_size=0.33, random_state=42)
X = self.features['Head']
Y = self.features['Polarity']
le = LabelEncoder()
Y = le.fit_transform(Y)
Y = Y.reshape(-1, 1)
self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(X, Y, test_size=0.30)
self.tok = preprocessing.text.Tokenizer(num_words=len(self.num_words))
self.tok.fit_on_texts(self.X_train)
sequences = self.tok.texts_to_sequences(self.X_train)
self.sequences_matrix = preprocessing.sequence.pad_sequences(sequences, maxlen=self.max_len)
def RNN(self):
inputs = Input(name='inputs', shape=[self.max_len])
layer = Embedding(len(self.num_words), 30, input_length=self.max_len)(inputs)
# layer = LSTM(64, return_sequences=True)(layer)
layer = LSTM(32)(layer)
layer = Dense(256, name='FC1')(layer)
layer = Activation('relu')(layer)
layer = Dropout(0.5)(layer)
layer = Dense(1, name='out_layer')(layer)
layer = Activation('sigmoid')(layer)
model = Model(inputs=inputs, outputs=layer)
return model
def model_train(self):
self.model = self.RNN()
self.model.summary()
self.model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy']) # RMSprop()
def model_test(self):
self.history = self.model.fit(self.sequences_matrix, self.Y_train, batch_size=100, epochs=3,
validation_split=0.30, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])
test_sequences = self.tok.texts_to_sequences(self.X_test)
test_sequences_matrix = sequence.pad_sequences(test_sequences, maxlen=self.max_len)
accr = self.model.evaluate(test_sequences_matrix, self.Y_test)
print('Test set\n Loss: {:0.3f}\n Accuracy: {:0.3f}'.format(accr[0], accr[1]))
if __name__ == "__main__":
a = tensor_rnn()
a.model_train()
a.model_test()
a.model.save('C:\\\\Users\\cmazz\\PycharmProjects\\'
'InvestmentAnalysis_2.0\\RNN_Model.h5',
include_optimizer=True)
b = models.load_model('C:\\\\Users\\cmazz\\PycharmProjects\\'
'InvestmentAnalysis_2.0\\RNN_Model.h5')
stringy = ['Fund managers back away from Amazon as they cut FANG exposure']
prediction = b.predict(np.array(stringy))
print(prediction)
When I run my code I get the following error:
ValueError: Error when checking input: expected inputs to have shape
(39,) but got array with shape (1,)
Based on the ValueError and prediction = b.predict(np.array(stringy)), I think you need to tokenize your input string.

Categories

Resources