I use a recurrent network (in special GRU) for predict a time serie with a lenght of 90 occurrences. The type of data is multivariante, and a follow this example.
Multivariante Time Series
Option 1:
I use keras for develop the rnn
n_train_quarter = int(len(serie) * 0.75)
train = values[:n_train_quarter, :]
test = values[n_train_quarter:, :]
X_train, y_train = train[:, :-1], train[:, - 1]
X_test, y_test = test[:, :-1], test[:, - 1]
# All parameter can be changes kernel, activation, optimizer, ...
model = Sequential()
model.add(GRU(64, input_shape=(X_train.shape[1], X_train.shape[2]),return_sequences=True))
model.add(Dropout(0.5))
# n is random
for i in range(n)
model.add(GRU(64,kernel_initializer = 'uniform', return_sequences=True))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(1))
model.add(Activation('softmax'))
#Compile and fit
model.compile(loss='mean_squared_error', optimizer='SGD')
early_stopping = EarlyStopping(monitor='val_loss', patience=50)
checkpointer = ModelCheckpoint(filepath=Checkpoint_mode, verbose=0, save_weights_only=False, save_best_only=True)
model.fit(X_train, y_train,
batch_size=256,
epochs=64,
validation_split=0.25,
callbacks=[early_stopping, checkpointer],
verbose=0,
shuffle=False)
And the result with less error look like the image (there are various experiment with same result)
Option 2:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.33, random_state = 42)
# All parameter can be changes kernel, activation, optimizer, ...
model = Sequential()
model.add(GRU(64, input_shape=(X_train.shape[1], X_train.shape[2]),return_sequences=True))
model.add(Dropout(0.5))
# n is random
for i in range(n)
model.add(GRU(64,kernel_initializer = 'uniform', return_sequences=True))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(1))
model.add(Activation('softmax'))
#Compile and fit
model.compile(loss='mean_squared_error', optimizer='SGD')
early_stopping = EarlyStopping(monitor='val_loss', patience=50)
checkpointer = ModelCheckpoint(filepath=Checkpoint_mode, verbose=0, save_weights_only=False, save_best_only=True)
model.fit(X_train, y_train,
batch_size=256,
epochs=64,
validation_split=0.25,
callbacks=[early_stopping, checkpointer],
verbose=0,
shuffle=False)
And the result with less error print as
Can use "train_test_split" of sklearn with random select for the data?
Why is better the result with secuential data than with random selection data, if GRU is better with secuential data?
Related
I want to evaluate the following deep learning model.
lst=[]
for i in range(10):
model = Sequential()
model.add(Dense(64, kernel_initializer='he_normal', input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.7))
model.add(Dense(64, kernel_initializer='he_normal', activation='relu'))
model.add(Dropout(0.7))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['Recall'])
early_stopping = EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(X_train, y_train, epochs=150,
batch_size=128, validation_data=(X_val, y_val), callbacks=[early_stopping])
# Generating predictions on the test set
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)
clsf=classification_report(y_test, y_pred)
lst.append (clsf)
print (clsf)
By this, I am running the model for 10 times and after that take the average of the recall metric.
I am wondering if I am doing right the procedure or I can do this with some other way
Any suggestions? Thanks
Hi have already tuned my hyperparameters and would like to perfrom kfold cross validation for my model. I have being looking around for different methods it won't seem to work for me. The code is here below:
tf.get_logger().setLevel(logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Set random seeds for repeatable results
RANDOM_SEED = 3
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)
classes_values = [ "nearmiss", "normal" ]
classes = len(classes_values)
Y = tf.keras.utils.to_categorical(Y - 1, classes)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1)
input_length = X_train[0].shape[0]
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
validation_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test))
def get_reshape_function(reshape_to):
def reshape(image, label):
return tf.reshape(image, reshape_to), label
return reshape
callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3,mode="auto")
model = Sequential()
model.add(Dense(200, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(44, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(68, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(44, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(classes, activation='softmax', name='y_pred'))
# this controls the learning rate
opt = Adam(learning_rate=0.0002, beta_1=0.9, beta_2=0.999)
# this controls the batch size, or you can manipulate the tf.data.Dataset objects yourself
BATCH_SIZE = 32
train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=False)
validation_dataset = validation_dataset.batch(BATCH_SIZE, drop_remainder=False)
# train the neural network
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
history=model.fit(train_dataset, epochs=50, validation_data=validation_dataset, verbose=2, callbacks=callbacks)
model.test_on_batch(X_test, Y_test)
model.metrics_names
# Use this flag to disable per-channel quantization for a model.
# This can reduce RAM usage for convolutional models, but may have
# an impact on accuracy.
disable_per_channel_quantization = False
Appericate if someone could guide me on this as I am very new to TensorFlow and neural network
I haven't tested it, but this should roughly be what you want. You use the sklearn KFold method to split the dataset into different folds, and then you simply fit the model on the current fold.
tf.get_logger().setLevel(logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Set random seeds for repeatable results
RANDOM_SEED = 3
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)
classes_values = [ "nearmiss", "normal" ]
classes = len(classes_values)
Y = tf.keras.utils.to_categorical(Y - 1, classes)
def get_reshape_function(reshape_to):
def reshape(image, label):
return tf.reshape(image, reshape_to), label
return reshape
callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3,mode="auto")
def create_model():
model = Sequential()
model.add(Dense(200, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(44, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(68, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(44, activation='tanh',
activity_regularizer=tf.keras.regularizers.l1(0.0001)))
model.add(Dropout(0.3))
model.add(Dense(classes, activation='softmax', name='y_pred'))
return model
# this controls the learning rate
opt = Adam(learning_rate=0.0002, beta_1=0.9, beta_2=0.999)
# this controls the batch size, or you can manipulate the tf.data.Dataset objects yourself
BATCH_SIZE = 32
kf = KFold(n_splits=5)
kf.get_n_splits(X)
# Loop over the dataset to create seprate folds
for train_index, test_index in kf.split(X):
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = Y[train_index], Y[test_index]
input_length = X_train[0].shape[0]
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
validation_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=False)
validation_dataset = validation_dataset.batch(BATCH_SIZE, drop_remainder=False)
# Create a new model instance
model = create_model()
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
# train the model on the current fold
history=model.fit(train_dataset, epochs=50, validation_data=validation_dataset, verbose=2, callbacks=callbacks)
model.test_on_batch(X_test, y_test)
I'm trying to classify images whether they're cats,dogs or pandas. the data contains all of images (cats + dogs + pandas) and the labels contains the labels of them but somehow when i fit the data to the model, the val_loss and val_accuracy does not show up, the only metrics shown in each epochs are loss and accuracy. I have no clue why it's not showing up but i have feeling that it's because i don't pass validation_data so i passed X_test.all() into validation_data but the val_loss and val_accuracy still does not show up, what should i do?
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2)
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(32, (2,2), activation = 'relu', input_shape= (height, width, n_channels)),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Conv2D(64,(2,2), activation= 'relu'),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Conv2D(128,(2,2), activation= 'relu'),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Conv2D(256,(2,2), activation= 'relu'),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, activation= 'relu'),
tf.keras.layers.Dense(3, activation= 'softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
y_train = np_utils.to_categorical(y_train, 3)
model.fit(X_train, y_train, batch_size=32, epochs=25, verbose=1)
You forgot to input validation test in your model fit.
model.fit(X_train, y_train, batch_size=32, epochs=25, verbose=1, validation_data=(X_test,y_test))
you forget to convert y_test variable to categorical type. Add this line,
y_test = np_utils.to_categorical(y_test , 3)
model = Sequential()
model.add(Conv2D(50, (5,5), activation='relu', input_shape =(5,5,1), kernel_initializer='he_normal'))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.summary()
# compile the model
model.compile(loss='binary_crossentropy', optimizer= 'adam', metrics=['accuracy'])
model_checkpoint=ModelCheckpoint(r'C:\Users\globo\Desktop\Test_CNN\Results\Kernel5x5\Weights'+'\\'+test+'\model_test{epoch:02d}.h5',save_freq=1,save_weights_only=True)
# fit the model
history = model.fit(X_train, Y_train, epochs=10, batch_size=32, verbose=1, callbacks=[model_checkpoint], shuffle=True, validation_split=0.5)
I'm already extracting weights for each epoch with "ModelCheckpoint", but how can I extract flatten layer output for each epoch and save them?
doing this with sequential models is not feasible at all.
you should use functional API
inp = Input((5,5,1))
x = Conv2D(50, (5,5), activation='relu', kernel_initializer='he_normal')(inp)
xflatten = Flatten()(x)
out = Dense(1, activation='sigmoid')(xflatten)
main_model = Model(inp, out) # this works same as your model
flatten_model = Model(inp, xflatten) # and this only outputs the flatten layer and is not necessary to compile it because we won't train it, it just shows the output of a layer
main_model.compile(loss='binary_crossentropy', optimizer= 'adam', metrics=['accuracy'])
history = main_model.fit(X_train, Y_train, epochs=10, batch_size=32, verbose=1, callbacks=[model_checkpoint], shuffle=True, validation_split=0.5)
to see the flatten layers's output:
flatten_model.predict(X)
I am trying to run a LSTM using Keras on my custom features set. I have train and test features in separate files. Each csv file contains 11 columns with last column as class label. There are total 40 classes in my dataset. The problem is I am not able to figure out the correct input_shape to the first layer. I had explored all the stackoverflow and github but still not able to solve this
Below is my complete code.
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
numpy.random.seed(7)
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=X_train.shape))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)
Whatever I change in input_shape parameter wither I get error in first LSTM layer of in fit method.
you don't have a time dimension in your input.
Input for RNN should be (batch_size, time_step, features) while your input has dimension (batch_size, features).
If you want to use your 10 columns one at a time you should reshape the array with
numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
Try this code:
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
train_dataset = numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
test_dataset = numpy.reshape(test_dataset, (-1, train_dataset.shape[1], 1))
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)