I'm building a model with multiple sequential models that I need to merge before training the dataset. It seems keras.engine.topology.Merge isn't supported on Keras 2.0 anymore. I tried keras.layers.Add and keras.layers.Concatenate and it doesn't work as well.
Here's my code:
model = Sequential()
model1 = Sequential()
model1.add(Embedding(len(word_index) + 1, 300, weights = [embedding_matrix], input_length = 40, trainable = False))
model1.add(TimeDistributed(Dense(300, activation = 'relu')))
model1.add(Lambda(lambda x: K.sum(x, axis = 1), output_shape = (300, )))
model2 = Sequential()
###Same as model1###
model3 = Sequential()
model3.add(Embedding(len(word_index) + 1, 300, weights = [embedding_matrix], input_length = 40, trainable = False))
model3.add(Convolution1D(nb_filter = nb_filter, filter_length = filter_length, border_mode = 'valid', activation = 'relu', subsample_length = 1))
model3.add(GlobalMaxPooling1D())
model3.add(Dropout(0.2))
model3.add(Dense(300))
model3.add(Dropout(0.2))
model3.add(BatchNormalization())
model4 = Sequential()
###Same as model3###
model5 = Sequential()
model5.add(Embedding(len(word_index) + 1, 300, input_length = 40, dropout = 0.2))
model5.add(LSTM(300, dropout_W = 0.2, dropout_U = 0.2))
model6 = Sequential()
###Same as model5###
merged_model = Sequential()
merged_model.add(Merge([model1, model2, model3, model4, model5, model6], mode = 'concat'))
merged_model.add(BatchNormalization())
merged_model.add(Dense(300))
merged_model.add(PReLU())
merged_model.add(Dropout(0.2))
merged_model.add(Dense(1))
merged_model.add(BatchNormalization())
merged_model.add(Activation('sigmoid'))
merged_model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
checkpoint = ModelCheckpoint('weights.h5', monitor = 'val_acc', save_best_only = True, verbose = 2)
merged_model.fit([x1, x2, x1, x2, x1, x2], y = y, batch_size = 384, nb_epoch = 200, verbose = 1, validation_split = 0.1, shuffle = True, callbacks = [checkpoint])
Error:
name 'Merge' is not defined
Using keras.layers.Add and keras.layers.Concatenate says cannot do it with sequential models.
What's the workaround for it?
If I were you, I would use Keras functional API in this case, at least for making the final model (i.e. merged_model). It gives you much more flexibility and let you easily define complex models:
from keras.models import Model
from keras.layers import concatenate
merged_layers = concatenate([model1.output, model2.output, model3.output,
model4.output, model5.output, model6.output])
x = BatchNormalization()(merged_layers)
x = Dense(300)(x)
x = PReLU()(x)
x = Dropout(0.2)(x)
x = Dense(1)(x)
x = BatchNormalization()(x)
out = Activation('sigmoid')(x)
merged_model = Model([model1.input, model2.input, model3.input,
model4.input, model5.input, model6.input], [out])
merged_model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
You can also do the same thing for other models you have defined. As I mentioned, functional API gives you more control over the structure of the model, so it is recommended to be used in case of creating complex models like this.
Related
I am trying to tune hyperparameter on the KerasRegressor
However, i only get the result of NaN's which is shown below, may i know what cause the issue?
everything works fine when i try to compile my model... but the scoring for the best parameters it always show NaNs, metrics that i used is RMSE
code snippet at below:
def create_model(optimizer,activation,lstm_unit_1,lstm_unit_2,lstm_unit_3, init='glorot_uniform'):
model = Sequential()
model.add(Conv1D(lstm_unit_1, kernel_size=1, activation=activation, input_shape = (trainX.shape[1], trainX.shape[2])))
model.add(GRU(lstm_unit_2, activation = activation, return_sequences=True, input_shape = (trainX.shape[1], trainX.shape[2])))
model.add(GRU(lstm_unit_3, activation = activation, return_sequences=True, input_shape = (trainX.shape[1], trainX.shape[2])))
model.add(Dense(units = 1))
model.add(Flatten())
model.compile(optimizer = optimizer, loss = 'mse', metrics = ['mean_squared_error'])
return model
model = tf.keras.wrappers.scikit_learn.KerasRegressor(build_fn = create_model,
epochs = 150,
verbose=False)
batch_size = [16,32,64,128]
lstm_unit_1 = [128,256,512]
lstm_unit_2 = lstm_unit_1.copy()
lstm_unit_3 = lstm_unit_1.copy()
optimizer = ['SGD','Adam','Adamax','RMSprop']
activation = ['relu','linear','sigmoid',]
param_grid = dict(lstm_unit_1=lstm_unit_1,
lstm_unit_2=lstm_unit_2,
lstm_unit_3=lstm_unit_3,
optimizer=optimizer,
activation=activation,
batch_size = batch_size)
warnings.filterwarnings("ignore")
random = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_jobs=-1, scoring='neg_mean_squared_error')
random_result = random.fit(trainX,trainY)
print(random_result.best_score_)
print(random_result.best_params_)
def rnn_model(self,activation="relu"):
in_out_neurons = 50
n_hidden = 512
model = Sequential()
model.add(LSTM(n_hidden, batch_input_shape=(None, self.seq_len, in_out_neurons), return_sequences=True))
model.add(Dense(in_out_neurons, activation=activation))
optimizer = Adam(learning_rate=0.001)
model.compile(loss="mean_squared_error", optimizer=optimizer)
model.summary()
return model
# then try to fit the model
final_x = np.zeros((319083, 2, 50))
final_y = np.zeros((319083, 1, 50))
# this works.
model = self.rnn_model()
model.fit(
final_x,final_y,
batch_size=400,
epochs=10,
validation_split=0.1
)
#However, when I trid to use hyperparameter sarch, this shows the error `ValueError: Invalid shape for y: (319083, 1, 50)`
activation = ["relu","sigmoid"]
model = KerasClassifier(build_fn=self.rnn_model,verbose=0)
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model,param_grid=param_grid)
grid_result= grid.fit(final_x,final_y)
How dimension changes when using GridSearchCV
You should be using a KerasRegressor, since your model is not a classifier in that sense:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasRegressor
def rnn_model(activation="relu"):
in_out_neurons = 50
n_hidden = 512
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(n_hidden, batch_input_shape=(None, 2, in_out_neurons), return_sequences=True))
model.add(tf.keras.layers.Dense(in_out_neurons, activation=activation))
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss="mean_squared_error", optimizer=optimizer)
model.summary()
return model
final_x = np.zeros((319083, 2, 50))
final_y = np.zeros((319083, 2, 50))
model = rnn_model()
activation = ["relu","sigmoid"]
model = KerasRegressor(build_fn=rnn_model,verbose=0)
param_grid = dict(activation=activation)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result= grid.fit(final_x,final_y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_)) # run with a way smaller dataset
Best: 0.000000 using {'activation': 'relu'}
I'm trying to figure out what is the difference between using a pretrained model from tensorflow hub versus using the very same architecture from tf.keras.applications. I've tried training 2 models with the same architecture - one from tf hub, an the other one from tf.keras.applications, which should yeild comparable results, however the results are vastly different. Could you please explain the difference?
Here are examples of the two models.
base_model = tf.keras.applications.EfficientNetB0(include_top = False)
base_model.trainable = False
inputs = Input(shape = (224,224,3), name = 'input_layer')
x = base_model(inputs, training = False)
x = GlobalAveragePooling2D(name = 'global_avg_pool_layer')(x)
outputs = Dense(len(class_names), activation = 'softmax', name = 'output_layer')(x)
model_1 = tf.keras.Model(inputs, outputs)
model_1.compile(loss = 'categorical_crossentropy', optimizer = Adam(), metrics = ['accuracy'])
history_1 = model_1.fit(train_data_all_10_percent,
epochs = 10,
validation_data = test_data,
validation_steps = (0.15 * len(test_data)))
AND THE SECOND
efficientnet_url = 'https://tfhub.dev/tensorflow/efficientnet/b0/feature-vector/1'
def create_model(model_url, num_classes = 10):
feature_extractor_layer = hub.KerasLayer(model_url, trainable = False, name = 'feature_extraction_layer', input_shape = IMG_SIZE + (3,))
model = Sequential([
feature_extractor_layer,
Dense(len(class_names), activation = 'softmax', name = 'output_layer')
])
return model
efficientnet_model = create_model(efficientnet_model , num_classes = len(class_names))
efficientnet_model .compile(loss = 'categorical_crossentropy', optimizer = Adam(), metrics = ['accuracy'])
efficientnet_history = efficientnet_model .fit(train_data_all_10_percent,
epochs = 10,
validation_data = test_data,
validation_steps = 0.15 * len(test_data))
In the following code, I save the label to tfrecord and read it again.
(In reality, I save both images and labels to tfrecord, here is a simple example for illustration purpose) .
I got an error ValueError: Shapes (None, 3, 2) and (None, 2) are incompatible, how should I fix this? I am using Tensorflow 2.3. The key part should be in the return statement of parse_examples.
import contextlib2
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
def process_image():
dic={
"image/label": tf.train.Feature(int64_list=tf.train.Int64List(value=[0,1]))
}
return tf.train.Example(features=tf.train.Features(feature=dic))
with contextlib2.ExitStack() as tf_record_close_stack:
output_tfrecords = [tf_record_close_stack.enter_context(tf.io.TFRecordWriter(file_name)) for file_name in
[f"data_train.tfrecord"]]
output_tfrecords[0].write(process_image().SerializeToString())
def parse_examples(examples):
parsed_examples = tf.io.parse_example(examples, features={
"image/label": tf.io.FixedLenFeature(shape=[2], dtype=tf.int64),
})
res = np.random.randint(2, size=3072).reshape(32, 32, 3)
return (res, [parsed_examples["image/label"],parsed_examples["image/label"],parsed_examples["image/label"]])
def process_dataset(dataset):
dataset = dataset.map(parse_examples, num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(1)
return dataset
train_data = tf.data.TFRecordDataset(filenames="data_train.tfrecord")
train_data = process_dataset(train_data)
base_model = tf.keras.applications.EfficientNetB7(input_shape=(32,32, 3), weights='imagenet',
include_top=False) # or weights='noisy-student'
for layer in base_model.layers[:]:
layer.trainable = False
x = GlobalAveragePooling2D()(base_model.output)
dropout_rate = 0.3
x = Dense(256, activation='relu')(x)
x = Dropout(dropout_rate)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(dropout_rate)(x)
all_target = []
loss_list = []
test_metrics = {}
for name, node in [("task1", 2), ("task2", 2), ("task3", 2)]:
y1 = Dense(128, activation='relu')(x)
y1 = Dropout(dropout_rate)(y1)
y1 = Dense(64, activation='relu')(y1)
y1 = Dropout(dropout_rate)(y1)
y1 = Dense(node, activation='softmax', name=name)(y1)
all_target.append(y1)
loss_list.append('categorical_crossentropy')
test_metrics[name] = "accuracy"
# model = Model(inputs=model_input, outputs=[y1, y2, y3])
model = Model(inputs=base_model.input, outputs=all_target)
model.compile(loss=loss_list, optimizer='adam', metrics=test_metrics)
history = model.fit(train_data, epochs=1, verbose=1)
It turns out that, just change the return statement from parse_examples works:
return (res, {"task1":parsed_examples["image/label"],"task2":parsed_examples["image/label"],"task3":parsed_examples["image/label"]})
Where task1,task2,task3 are the names of the softmax layers given by me.
I am building a small network using some custom network boxes for each use case, It looks like this :
def top_block(dropout = None, training = None):
# scaled input
input_1 = tf.keras.Input(shape=(1,15), dtype='float32')
input_2 = tf.keras.Input(shape=(1,15), dtype='float32')
if dropout:
layer_one = tf.keras.layers.Dropout(rate = dropout)(input_1, training = training)
layer_two = tf.keras.layers.Dropout(rate = dropout)(input_2, training = training)
return [layer_one,layer_two]
return [input_1,input_2]
def bottom_layer(input_layers):
data = tf.reduce_mean(input_layers,0)
cls_layer = tf.keras.layers.Dense(1,
kernel_initializer = keras.initializers.glorot_uniform(seed=200),
activation = 'sigmoid')(data)
model = tf.keras.Model([input_layers[0], input_layers[1]], cls_layer , name = 'model_1')
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
model.summary()
return model
If I am trying to access this network without dropout, it's working fine :
top_ = top_block()
model = bottom_layer(top_ )
But if I am accessing with dropout, it's giving error:
top_ = top_block(dropout = 0.2, training = True)
model = bottom_layer(top_ )
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_72:0", shape=(None, 1, 15), dtype=float32) at layer "input_72". The following previous layers were accessed without issue: []
How to access the model with dropout layer?
How to disable training = False during evaluate? Do I need to load full model and old model weights?
Thank You!
I just realized my input is coming from intermediate layer (dropout layer), It should come directly from Input layer :
def top_block():
# scaled input
input_1 = tf.keras.Input(shape=(1,15), dtype='float32')
input_2 = tf.keras.Input(shape=(1,15), dtype='float32')
return [input_1, input_2]
def apply_dropout(layers_data, dropout_val, training):
layer_one = tf.keras.layers.Dropout(rate = dropout_val)(layers_data[0], training = training)
layer_two = tf.keras.layers.Dropout(rate = dropout_val)(layers_data[1], training = training)
return [layer_one, layer_two]
def bottom_layer(input_layers, data):
data = tf.reduce_mean(data, 0)
cls_layer = tf.keras.layers.Dense(1,
kernel_initializer = keras.initializers.glorot_uniform(seed=200),
activation = 'sigmoid')(data)
model = tf.keras.Model(input_layers, cls_layer , name = 'model_1')
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
model.summary()
return model
It's working now
top_ = top_block()
dropout_ = apply_dropout(top_, 0.2, True)
model = bottom_layer(top_ , dropout_)