I have trained a model with frozen feature extraction layers which was initialised as followed:
model = models.densenet161(pretrained=True)
for param in model.parameters():
param.requires_grad = False
num_ftrs = model.classifier.in_features
model.classifier = torch.nn.Linear(num_ftrs,2)
However, at inference, I am unsure of how to load the model correctly. In a separate script I do the following:
model = models.densenet161(pretrained=True)
for param in model.parameters():
param.requires_grad = False
num_ftrs = model.classifier.in_features
model.classifier = torch.nn.Linear(num_ftrs,2)
model.to(device)
# load the best model
bestmodel = get_best_model(best)
bestmodel = torch.load(bestmodel)
model.load_state_dict(bestmodel['classifier'])
# set model to evaluation mode
model.eval()
with torch.no_grad():
Does this look correct? Or do I set pretrained=False when introducing the model in my inference script?
Related
I'm trying to run multiple Keras model from functions but it gives OOM error my code looks like this :
def mainModel():
trainX, testX, trainY, testY = train_test_split()
evScoreVGG, imHolderVGG = runKerasVGGModel( train and test )
evScoreResNet, imHolderResnet = runResnetKerasModel(train and test values)
def createKerasVGG19Model():
base_model = VGG19( weights='imagenet',
include_top=False,
input_shape=(width, height,3),
)
base_model.trainable = False
model = Sequential([
base_model,
Flatten(),
Dense(164, activation="softmax"),
])
return model
#The ResNet is the same with ResNet50V2 base model
def runKerasVGGModel(trainX,testX,trainY,testY,mode):
modelKeras = createKerasVGG19Model()
modelKeras.compile( foo )
modelKeras.fit( foo )
evScore = modelKeras.evaluate(testX, testY)
predictedValue = modelKeras.predict(testX, mode)
del(modelKeras)
K.clear_session()
return evScore
Code for running and creating Resnet is the same as the VGG model without any change. I didn't add for readability.
I tried writing del(model) at the end of the runKeras() as del(model)
I tried writing K.clear_session() at the end of the runKeras()
I tried writing K.clear_session() between the runKeras1() and runKeras2() inside the mainModel
I am trying to ensemble 5 transfer learning pre-trained DNN models (the base models trained on imagenet) using the code below
def define_stacked_model(members):
# update all layers in all models to not be trainable
for i in range(len(members)):
model = members[i]
for layer in model.layers:
# make not trainable
layer.trainable = False
# rename to avoid 'unique layer name' issue
layer._name = 'ensemble_' + str(i+1) + '_' + layer.name
# define multi-headed input
ensemble_visible = [model.input for model in members]
# concatenate merge output from each model
ensemble_outputs = [model.output for model in members]
merge = concatenate(ensemble_outputs)
hidden = tf.keras.layers.Dense(10, activation='relu')(merge)
output = tf.keras.layers.Dense(1)(hidden)
model = tf.keras.Model(inputs=ensemble_visible, outputs=output)
# plot graph of ensemble
plot_model(model, show_shapes=True, to_file='model_graph.png')
# compile
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=["accuracy"])
return model
# define ensemble model
stacked_model = define_stacked_model(members)
stacked_model.summary()
each model accept input image with size 160X160. I read the images using:
IMG_WIDTH=160
IMG_HEIGHT=160
def create_dataset(img_folder):
img_data_array=[]
class_name=[]
for dir1 in os.listdir(img_folder):
for file in os.listdir(os.path.join(img_folder, dir1)):
image_path= os.path.join(img_folder, dir1, file)
image = cv2.imread( image_path, cv2.IMREAD_COLOR)
image = cv2.resize(image, (IMG_HEIGHT, IMG_WIDTH),interpolation = cv2.INTER_AREA)
image = np.array(image)
image = image.astype('float32')
image /= 255
img_data_array.append(image)
class_name.append(dir1)
return img_data_array, class_name
# extract the image array and class name
img_data, class_name = create_dataset(TRAIN_DIR)
target_dict={k: v for v, k in enumerate(np.unique(class_name))}
target_val= [target_dict[class_name[i]] for i in range(len(class_name))]
target_val = np.array(list(map(int,target_val))[0:300], np.float32)
note: I used 300 images only because of the availability of the memory.
after that I stacked the images using:
stacked_img_data = np.dstack([img_data[0:300],
img_data[0:300],
img_data[0:300],
img_data[0:300],
img_data[0:300]])
and finally trained the ensemble model using:
history = stacked_model.fit(x = stacked_img_data, y = target_val, epochs=10)
I faced this error:
ValueError: Layer model expects 5 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 160, 800, 3) dtype=float32>]
I think the above method is close to the correct way. but I am not sure how to solve the issue.
Well you can make this work only by changing this history = stacked_model.fit(x = stacked_img_data, y = target_val, epochs=10) to this
history = stacked_model.fit(x = [img_data[0:300],img_data[0:300],img_data[0:300],img_data[0:300],img_data[0:300]], y = target_val, epochs=10)
That being said you can also do this
input = tf.keras.layers.Input(shape=(160,160,3))
ensemble_outputs = [model(input) for model in members]
.
.
model = tf.keras.Model(inputs=input, outputs=output)
and only pass the image one time
I am trying to fine-tune a BERT model from TensorFlow hub. I loaded the preprocessing layer and the encoder as follow :
bert_preprocess_model = hub.KerasLayer('https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3')
bert_model = hub.KerasLayer('https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1')
And this is my model definition :
def build_classifier_model():
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessing_layer = hub.KerasLayer(bert_preprocess_model, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(bert_model, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
net = tf.keras.layers.Dropout(0.1)(net)
net = tf.keras.layers.Dense(3, activation='softmax', name='classifier')(net)
return tf.keras.Model(text_input, net)
classifier_model = build_classifier_model()
But I get the following error : ERROR:absl:hub.KerasLayer is trainable but has zero trainable weights.
In the official website, the model is fine-tunable.
I found the solution, simply add trainable = True :
bert_model = hub.KerasLayer('https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1',trainable=True)
I have created an Estimator from a TF Slim Resnet V2 checkpoint and tested it to make predictions. The main thing of what I did is basically similar to a normal Estimator together with assign_from_checkpoint_fn:
def model_fn(features, labels, mode, params):
...
slim.assign_from_checkpoint_fn(os.path.join(checkpoint_dir, 'resnet_v2_50.ckpt'), slim.get_model_variables('resnet_v2')
...
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'class_ids': predicted_classes[:, tf.newaxis],
'probabilities': tf.nn.softmax(logits),
'logits': logits,
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
To export the estimator as a SavedModel, I made a serving_input_fn as follows:
def image_preprocess(image_buffer):
image = tf.image.decode_jpeg(image_buffer, channels=3)
image_preprocessing_fn = preprocessing_factory.get_preprocessing('inception', is_training=False)
image = image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size)
return image
def serving_input_fn():
input_ph = tf.placeholder(tf.string, shape=[None], name='image_binary')
image_tensors = image_preprocess(input_ph)
return tf.estimator.export.ServingInputReceiver(image_tensors, input_ph)
In the main function, I use export_saved_model to try to export Estimator to SavedModel format:
def main():
...
classifier = tf.estimator.Estimator(model_fn=model_fn)
classifier.export_saved_model(dir_path, serving_input_fn)
However, when I try to run the codes, it says "Couldn't find trained model at /tmp/tmpn3spty2z". From what I understand, this export_saved_model tries to find a trained Estimator model to export to SavedModel. However, I would like to know if there are any ways I can restore the pretrained checkpoint into an Estimator and export the Estimator to a SavedModel without any further training?
I have solved my problem. To export TF Slim Resnet checkpoint with TF 1.14 to SavedModel, warm start can be used together with export_savedmodel as follows:
config = tf.estimator.RunConfig(save_summary_steps = None, save_checkpoints_secs = None)
warm_start = tf.estimator.WarmStartSettings(checkpoint_dir, checkpoint_name)
classifier = tf.estimator.Estimator(model_fn=model_fn, warm_start_from = warm_start, config = config)
classifier.export_savedmodel(export_dir_base = FLAGS.output_dir, serving_input_receiver_fn = serving_input_fn)
I am building a small network using some custom network boxes for each use case, It looks like this :
def top_block(dropout = None, training = None):
# scaled input
input_1 = tf.keras.Input(shape=(1,15), dtype='float32')
input_2 = tf.keras.Input(shape=(1,15), dtype='float32')
if dropout:
layer_one = tf.keras.layers.Dropout(rate = dropout)(input_1, training = training)
layer_two = tf.keras.layers.Dropout(rate = dropout)(input_2, training = training)
return [layer_one,layer_two]
return [input_1,input_2]
def bottom_layer(input_layers):
data = tf.reduce_mean(input_layers,0)
cls_layer = tf.keras.layers.Dense(1,
kernel_initializer = keras.initializers.glorot_uniform(seed=200),
activation = 'sigmoid')(data)
model = tf.keras.Model([input_layers[0], input_layers[1]], cls_layer , name = 'model_1')
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
model.summary()
return model
If I am trying to access this network without dropout, it's working fine :
top_ = top_block()
model = bottom_layer(top_ )
But if I am accessing with dropout, it's giving error:
top_ = top_block(dropout = 0.2, training = True)
model = bottom_layer(top_ )
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_72:0", shape=(None, 1, 15), dtype=float32) at layer "input_72". The following previous layers were accessed without issue: []
How to access the model with dropout layer?
How to disable training = False during evaluate? Do I need to load full model and old model weights?
Thank You!
I just realized my input is coming from intermediate layer (dropout layer), It should come directly from Input layer :
def top_block():
# scaled input
input_1 = tf.keras.Input(shape=(1,15), dtype='float32')
input_2 = tf.keras.Input(shape=(1,15), dtype='float32')
return [input_1, input_2]
def apply_dropout(layers_data, dropout_val, training):
layer_one = tf.keras.layers.Dropout(rate = dropout_val)(layers_data[0], training = training)
layer_two = tf.keras.layers.Dropout(rate = dropout_val)(layers_data[1], training = training)
return [layer_one, layer_two]
def bottom_layer(input_layers, data):
data = tf.reduce_mean(data, 0)
cls_layer = tf.keras.layers.Dense(1,
kernel_initializer = keras.initializers.glorot_uniform(seed=200),
activation = 'sigmoid')(data)
model = tf.keras.Model(input_layers, cls_layer , name = 'model_1')
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
model.summary()
return model
It's working now
top_ = top_block()
dropout_ = apply_dropout(top_, 0.2, True)
model = bottom_layer(top_ , dropout_)