The model is as below:
inputs_1 = keras.Input(shape=(10081,1))
layer1 = Conv1D(64,14)(inputs_1)
layer2 = layers.MaxPool1D(5)(layer1)
layer3 = Conv1D(64, 14)(layer2)
layer4 = layers.GlobalMaxPooling1D()(layer3)
inputs_2 = keras.Input(shape=(85,))
layer5 = layers.concatenate([layer4, inputs_2])
layer6 = Dense(128, activation='relu')(layer5)
layer7 = Dense(2, activation='softmax')(layer6)
model_2 = keras.models.Model(inputs = [inputs_1, inputs_2], output = [layer7])
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,0:10166], df[['Result_cat','Result_cat1']].values, test_size=0.2)
X_train = X_train.to_numpy()
X_train = X_train.reshape([X_train.shape[0], X_train.shape[1], 1])
X_train_1 = X_train[:,0:10081,:]
X_train_2 = X_train[:,10081:10166,:].reshape(736,85)
X_test = X_test.to_numpy()
X_test = X_test.reshape([X_test.shape[0], X_test.shape[1], 1])
X_test_1 = X_test[:,0:10081,:]
X_test_2 = X_test[:,10081:10166,:].reshape(185,85)
adam = keras.optimizers.Adam(lr = 0.0005)
model_2.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['acc'])
history = model_2.fit([X_train_1,X_train_2], y_train, epochs = 120, batch_size = 256, validation_split = 0.2, callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=20)])
Questions:
1) The data is 921rows x 10166columns. Each row is an observation(first 10080 columns being a time series with remaining columns being other statistics features). According to the model, is the input data split into inputs_1 and inputs_2 randomly?
2) I am thinking about doing a kfold cross-validation and splitting the input data into inputs_1 and inputs_2. What is a good way to do this? Thanks
By splitting only indexes.
num_folds = 5
kfold = KFold(n_splits=num_folds, shuffle=False)
ID_Inp = np.array(range(nSamples))
ID_Out = np.array(range(nSamples))
Inputs = [Input1,Input2]
for IDs_Train, IDs_Test in kfold.split(ID_Inp, ID_Out):
Fold_Train_Input1, Fold_Train_Input2 = Input1[IDs_Train], Input2[IDs_Train]
Fold_Train_OutPut = Output[IDs_Train]
Fold_Test_Input1, Fold_Test_Input2 = Input1[IDs_Test], Input2[IDs_Test]
Fold_Test_OutPut = Output[IDs_Test]
####################
Related
I'm trying to figure out what is the difference between using a pretrained model from tensorflow hub versus using the very same architecture from tf.keras.applications. I've tried training 2 models with the same architecture - one from tf hub, an the other one from tf.keras.applications, which should yeild comparable results, however the results are vastly different. Could you please explain the difference?
Here are examples of the two models.
base_model = tf.keras.applications.EfficientNetB0(include_top = False)
base_model.trainable = False
inputs = Input(shape = (224,224,3), name = 'input_layer')
x = base_model(inputs, training = False)
x = GlobalAveragePooling2D(name = 'global_avg_pool_layer')(x)
outputs = Dense(len(class_names), activation = 'softmax', name = 'output_layer')(x)
model_1 = tf.keras.Model(inputs, outputs)
model_1.compile(loss = 'categorical_crossentropy', optimizer = Adam(), metrics = ['accuracy'])
history_1 = model_1.fit(train_data_all_10_percent,
epochs = 10,
validation_data = test_data,
validation_steps = (0.15 * len(test_data)))
AND THE SECOND
efficientnet_url = 'https://tfhub.dev/tensorflow/efficientnet/b0/feature-vector/1'
def create_model(model_url, num_classes = 10):
feature_extractor_layer = hub.KerasLayer(model_url, trainable = False, name = 'feature_extraction_layer', input_shape = IMG_SIZE + (3,))
model = Sequential([
feature_extractor_layer,
Dense(len(class_names), activation = 'softmax', name = 'output_layer')
])
return model
efficientnet_model = create_model(efficientnet_model , num_classes = len(class_names))
efficientnet_model .compile(loss = 'categorical_crossentropy', optimizer = Adam(), metrics = ['accuracy'])
efficientnet_history = efficientnet_model .fit(train_data_all_10_percent,
epochs = 10,
validation_data = test_data,
validation_steps = 0.15 * len(test_data))
I am trying to create a movie recommendation system by training the neural collaborative filtering (NCF) network on the MovieLens dataset. My implementation of NCF is
def NCF(num_users, num_items, gmf_embedding_dim, mlp_embedding_dim):
# Define input vectors for embedding
u_input = Input(shape = [1,])
i_input = Input(shape = [1,])
# GMF embedding
u_embedding_gmf = Embedding(input_dim = num_users, output_dim = gmf_embedding_dim(u_input)
u_vec_gmf = Flatten()(u_embedding_gmf)
i_embedding_gmf = Embedding(input_dim = num_items, output_dim = gmf_embedding_dim(i_input)
i_vec_gmf = Flatten()(i_embedding_gmf)
# MLP embedding
u_embedding_mlp = Embedding(input_dim = num_users, output_dim = mlp_embedding_dim(u_input)
u_vec_mlp = Flatten()(u_embedding_mlp)
i_embedding_mlp = Embedding(input_dim = num_items, output_dim = mlp_embedding_dim(i_input)
i_vec_mlp = Flatten()(i_embedding_mlp)
# GMF path
gmf_output = Dot(axes = 1)([u_vec_gmf, i_vec_gmf])
# MLP path
mlp_input_concat = Concatenate()([u_vec_mlp, i_vec_mlp])
mlp_dense_1 = Dense(units = 128, activation = "relu")(mlp_input_concat)
mlp_bn_1 = BatchNormalization()(mlp_dense_1)
mlp_drop_1 = Dropout(0.3)(mlp_bn_1)
mlp_dense_2 = Dense(units = 64, activation = "relu")(mlp_drop_1)
mlp_bn_2 = BatchNormalization()(mlp_dense_2)
mlp_output = Dropout(0.3)(mlp_bn_2)
# Concatenate GMF and MLP pathways
paths_concat = Concatenate()([gmf_output, mlp_output])
# Prediction
output = Dense(units = 1, activation = "sigmoid")(paths_concat)
# Create model
return Model(inputs = [u_input, i_input], outputs = output)
I created a function to handle my training
def train(model, x_train, y_train, x_valid, y_valid, batch_size, epochs, save_name,
checkpoint_path, history_path, lr = 0.001, lr_decay = True):
if isfile(join(history_path, save_name)):
return
model.compile(loss = BinaryCrossentropy(), optimizer = Adam(learning_rate = lr),
metrics["accuracy"])
best_checkpoint = ModelCheckpoint(filepath = join(checkpoint_path, save_name),
monitor = "val_loss",
save_best_only = True)
history_csv = CSVLogger(join(history_path, save_name))
early_stop = EarlyStopping(monitor = "val_loss",
patience = 30,
restore_best_weights = True)
lr_decay_callback = ReduceLROnPlateau(monitor = "val_loss",
patience = 10,
factor = 0.5,
min_lr = 0.000001)
callback_list = [best_checkpoint, history_csv, early_stop]
if lr_decay:
callback_list.append(lr_decay_callback)
model.fit(x = x_train, y = y_train, validation_data = (x_valid, y_valid),
epochs = epochs, callbacks = callback_list, batch_size = batch_size)
Encoded the user_ID and movie_ID values ready for the embedding layers with
enc = LabelEncoder()
train_set["user_ID"] = enc.fit_transform(train_set["user_ID"].values)
enc = LabelEncoder()
train_set["movie_ID"] = enc.fit_transform(train_set["movie_ID"].values)
enc = LabelEncoder()
valid_set["user_ID"] = enc.fit_transform(valid_set["user_ID"].values)
enc = LabelEncoder()
valid_set["movie_ID"] = enc.fit_transform(valid_set["movie_ID"].values)
enc = LabelEncoder()
test_set["user_ID"] = enc.fit_transform(test_set["user_ID"].values)
enc = LabelEncoder()
test_set["movie_ID"] = enc.fit_transform(test_set["movie_ID"].values)
Then initiated the training with
train(model = NCF(num_users = train_set["user_ID"].nunique() + 1, num_items =
train_set["movie_ID"].nunique() + 1, gmf_embedding_dim = 10, mlp_embedding_dim = 10),
x_train = [train_set["user_ID"], train_set["movie_ID"]], y_train =
train_set["interaction"],
x_valid = [valid_set["user_ID"], valid_set["movie_ID"]], y_valid =
valid_set["interaction"],
batch_size = (train_set.shape[0])/10, epochs = 50, save_name = "NCF_1",
checkpoint_path = "D:/Movie Recommendation System Project/model data/checkpoints",
history_path = "D:/Movie Recommendation System Project/model data/training history")
The training appeared to go fine until the very last batch of the first epoch, where I received the error:
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: indices[10,0] = 101102 is not in [0, 101102)
[[node functional_9/embedding_16/embedding_lookup (defined at D:/Movie Recommendation System Project/architecture and training\training_and_evaluation.py:38) ]]
[[functional_9/embedding_18/embedding_lookup/_16]]
(1) Invalid argument: indices[10,0] = 101102 is not in [0, 101102)
[[node functional_9/embedding_16/embedding_lookup (defined at D:/Movie Recommendation System Project/architecture and training\training_and_evaluation.py:38) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_test_function_529078]
This was a very similar error to that which I received at the end of my previous attempt, where the value was 101101 instead of 101102. As a naive solution, I tried adding 1 to my values for num_users and num_movies, but now the values in the error message appear to have simply increased by 1. I feel like I am missing something obvious or fundamental about embedding layers here. Could anyone help?
I believe this error occurred as a result of the embedding layer encountering a value that it did not expect. When you are calling the NCF function you are passing the unique number of users of the train set. Instead calculate the unique number of users in the full data set and send it to the NCF function.
For example:
total_num_users = train_set["user_ID"].nunique() + valid_set["user_ID"].nunique() + test_set["user_ID"]..]nunique()
train(model = NCF(num_users = total_num_users, num_items =
train_set["movie_ID"].nunique() + 1, gmf_embedding_dim = 10, mlp_embedding_dim = 10),
x_train = [train_set["user_ID"], train_set["movie_ID"]], y_train =
train_set["interaction"],
x_valid = [valid_set["user_ID"], valid_set["movie_ID"]], y_valid =
valid_set["interaction"],
batch_size = (train_set.shape[0])/10, epochs = 50, save_name = "NCF_1",
checkpoint_path = "D:/Movie Recommendation System Project/model data/checkpoints",
history_path = "D:/Movie Recommendation System Project/model data/training history")
Make sure to follow the same approach to other categorical variables that you are embedding.
I'm using Python. How can the GridSearchCV or RandomSearchCV be easily integrated in the network training?
I receive the following error code:
AttributeError: 'RandomizedSearchCV' object has no attribute 'fit_generator'
Does anyone have a simple idea to get around this error?
def train_test_splitting(df):
x_train,x_test, y_train, y_test = train_test_split(features, target, test_size = 0.2, random_state = 123, shuffle = False)
return [x_train,x_test, y_train, y_test]
def preprocessing_nn(df,time_period_window, num_oberservations_per_batch ,number_training_features):
TimeseriesGenerator(features, target, length = 6 , sampling_rate = 1, batch_size =1)[0]
win_length = time_period_window #Wie viele Datenpunkte
batch_size = num_oberservations_per_batch # wie viele Beobachtungswertee pro Iteration
num_features = number_training_features
nn_train_generator= TimeseriesGenerator(x_train, y_train, length = win_length , sampling_rate = 1, batch_size = batch_size)
nn_test_generator= TimeseriesGenerator(x_test, y_test, length = win_length , sampling_rate = 1, batch_size = batch_size)
return [nn_train_generator, nn_test_generator]
#Applying Functions:
x_train,x_test, y_train, y_test = train_test_splitting(df)
nn_train_generator,nn_test_generator = preprocessing_nn(df,time_period_window, num_oberservations_per_batch ,number_training_features)
def create_LSTM_network(optimizer='adam'):
model = Sequential()
model.add(LSTM(units=120, return_sequences=True, input_shape=(time_period_window,number_training_features)))
model.add(Dropout(0.5))
model.add(LSTM(units=120, return_sequences=False))
model.add(Dense(1, activation = "relu"))
model.compile(loss='mse', optimizer=optimizer,
metrics=['mae'])
return model
#Wrapper für Scikit Learn zur Benutztung von Randomised CV
LSTM_neural_network = KerasClassifier(build_fn=create_LSTM_network, verbose=1)
#Create hyperparameter space
epochs = sp_randInt(10,500)
batches = sp_randInt(10,100)
optimizers = ['rmsprop','adam','sgd']
hyperparameters = dict(optimizer = optimizers, epochs=epochs, batch_size = batches)
#RandomSearchCV - Creation
grid = RandomizedSearchCV(estimator = LSTM_neural_network, param_distributions=hyperparameters,
cv= 2, n_iter = 5, n_jobs =-1)
grid_result = grid.fit(nn_train_generator, epochs = 50,
validation_data = nn_test_generator,
shuffle=False,
callbacks=[early_stopping])
print(); print(grid_result.best_params_)
AttributeError: 'RandomizedSearchCV' object has no attribute 'fit_generator'
I want to split my dataset into test and validation datasets as my model might be suffering from overfitting. Here's my current architecture:
input_sentences = []
output_sentences = []
output_sentences_inputs = []
count = 0
for line in open(r'/content/drive/My Drive/TEMPPP/123.txt', encoding="utf-8"):
count += 1
if count > NUM_SENTENCES:
break
if '\t' not in line:
continue
input_sentence, output = line.rstrip().split('\t')
output_sentence = output + ' <eos>'
output_sentence_input = '<sos> ' + output
input_sentences.append(input_sentence)
output_sentences.append(output_sentence)
output_sentences_inputs.append(output_sentence_input)
input_tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
input_tokenizer.fit_on_texts(input_sentences)
input_integer_seq = input_tokenizer.texts_to_sequences(input_sentences)
word2idx_inputs = input_tokenizer.word_index
max_input_len = max(len(sen) for sen in input_integer_seq)
output_tokenizer = Tokenizer(num_words=MAX_NUM_WORDS, filters='')
output_tokenizer.fit_on_texts(output_sentences + output_sentences_inputs)
output_integer_seq = output_tokenizer.texts_to_sequences(output_sentences)
output_input_integer_seq = output_tokenizer.texts_to_sequences(output_sentences_inputs)
word2idx_outputs = output_tokenizer.word_index
num_words_output = len(word2idx_outputs) + 1
max_out_len = max(len(sen) for sen in output_integer_seq)
encoder_input_sequences = pad_sequences(input_integer_seq, maxlen=max_input_len)
decoder_input_sequences = pad_sequences(output_input_integer_seq, maxlen=max_out_len, padding='post')
import numpy as np
read_dictionary = np.load('/content/drive/My Drive/TEMPPP/hinvec.npy',allow_pickle='TRUE').item()
num_words = min(MAX_NUM_WORDS, len(word2idx_inputs) + 1)
embedding_matrix = np.zeros((num_words, EMBEDDING_SIZE))
for word, index in word2idx_inputs.items():
embedding_vector = read_dictionary.get(word)
if embedding_vector is not None:
embedding_matrix[index] = embedding_vector
embedding_layer = Embedding(num_words, EMBEDDING_SIZE, weights=[embedding_matrix], input_length=max_input_len)
decoder_targets_one_hot = np.zeros((
len(input_sentences),
max_out_len,
num_words_output
),
dtype='float32'
)
decoder_output_sequences = pad_sequences(output_integer_seq, maxlen=max_out_len, padding='post')
for i, d in enumerate(decoder_output_sequences):
for t, word in enumerate(d):
decoder_targets_one_hot[i, t, word] = 1
encoder_inputs_placeholder = Input(shape=(max_input_len,))
x = embedding_layer(encoder_inputs_placeholder)
encoder = LSTM(LSTM_NODES, return_state=True)
encoder_outputs, h, c = encoder(x)
encoder_states = [h, c]
decoder_inputs_placeholder = Input(shape=(max_out_len,))
decoder_embedding = Embedding(num_words_output, LSTM_NODES)
decoder_inputs_x = decoder_embedding(decoder_inputs_placeholder)
decoder_lstm = LSTM(LSTM_NODES, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs_x, initial_state=encoder_states)
decoder_dense = Dense(num_words_output, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
import tensorflow as tf
starter_learning_rate = 0.1
end_learning_rate = 0.01
decay_steps = 2000
learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(
starter_learning_rate,
decay_steps,
end_learning_rate,
power=0.5)
opt = tf.keras.optimizers.Adam(learning_rate=learning_rate_fn, epsilon=1e-03, clipvalue=0.5)
model = Model([encoder_inputs_placeholder,
decoder_inputs_placeholder],
decoder_outputs)
model.compile(
optimizer=opt,
loss='categorical_crossentropy',
metrics=['accuracy']
)
history = model.fit(
[encoder_input_sequences, decoder_input_sequences],
decoder_targets_one_hot,
batch_size=BATCH_SIZE,
epochs=EPOCHS,
validation_split=0.1,
)
After reading dataset, its already being stored in input_sentences and output_sentences so I thought I can pass them directly to X,y like this:
from sklearn.model_selection import train_test_split
X=input_sentences
y=output_sentences
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
This way I get my Hindi sentences in X and English sentences in y with . Now im really confused how to implement it in my model?
Hello i have problem with GridSearchCV it works perfectly on mnist_dataset but not on my own data, and i don't know why.
# df = pd.read_csv('bank-full.csv',sep=';')
# print(df.head())
#
# print(df.shape)
#
# print(df.columns)
# print(df.info)
# df.columns = [col.replace('"', '') for col in df.columns]
#
#
# df.drop(columns=['day', 'poutcome'], axis =1 , inplace=True)
#
#
# print(df.head())
# print(df.shape)
#
# le = preprocessing.LabelEncoder()
# df.job = le.fit_transform(df.job)
# df.education = le.fit_transform(df.education)
# df.housing = le.fit_transform(df.housing)
# df.loan = le.fit_transform(df.loan)
# #df.poutcome = le.fit_transform(df.poutcome)
# df.month = le.fit_transform(df.month)
# df.contact = le.fit_transform(df.contact)
# df.marital = le.fit_transform(df.marital)
# df.default = le.fit_transform(df.default)
# df.y = le.fit_transform(df.y)
#
#
#
# print(df.head())
#
# X = df.iloc[:, 0:14]
# y = df.iloc[:, 14]
# X = np.array(X, dtype="float64")
# y = np.array(y,dtype="float64")
#
# scaler = Normalizer()
# X = scaler.fit_transform(X)
#
#
#
#
# x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=0)
# model = LogisticRegression(penalty='l2', max_iter=1000)
# model.fit(x_train, y_train)
# prediction = model.predict(x_test)
# from sklearn.metrics import accuracy_score
# print("ACC: {} ".format(accuracy_score(y_test, prediction)))
#
#
# print(x_train.shape)
#
# nn = Sequential()
# nn.add(Dense(120,input_dim = 14, activation='relu'))
# nn.add(Dense(240,activation='relu'))
#
#
# nn.add(Dense(1))
# nn.add(Activation('sigmoid'))
#
# nn.compile(loss=keras.losses.binary_crossentropy,
# optimizer='sgd',
# metrics=['accuracy'])
#
# nn.fit(x_train, y_train,
# batch_size=10,
# epochs=10,
# verbose=1,
#
# validation_data=(x_test, y_test))
#
# loss_acc = nn.evaluate(x_test, y_test, verbose=0)
# print('Test loss:', loss_acc[0])
# print('Test accuracy:', loss_acc[1])
data = bm.load_data('bank-full.csv')
data = bm.preprocess_data(data)
X,y = bm.split_data(data)
scaler = Normalizer()
X = scaler.fit_transform(X)
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.1, random_state=0)
start = time()
model = KerasClassifier(build_fn=nnmodel.create_model())
optimizers = ['rmsprop', 'adam']
init = ['glorot_uniform', 'normal', 'uniform']
epochs = np.array([50, 100, 150])
batches = np.array([5, 10, 20])
param_grid = dict(optimizer=optimizers, nb_epoch=epochs, batch_size=batches, init=init)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(x_train, y_train)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
for params, mean_score, scores in grid_result.grid_scores_:
print("%f (%f) with: %r" % (scores.mean(), scores.std(), params))
print("total time:", time() - start)
this commented section, is just a simple keras model, that works perfectrly but below if i try gridSearchCV on this model, it gives me this errors:
https://pastebin.com/mhJLSXAS , for example if i run this program https://www.kaggle.com/shujunge/gridsearchcv-with-keras it works perferctly byt on my data it's not, does somebody know why ?
Scikitlearn builds each time new model. Script has to build a classifier with specific parameters inside the grid search method. So you have to send the method name as an argument, not the result of it.
Probably nnmodel.create_model is your function which creates a new model based on parameters. So try to change:
build_fn=nnmodel.create_model()
To:
build_fn=nnmodel.create_model