Correct way to initialise model at inference (PyTorch) - python

I have trained a model with frozen feature extraction layers which was initialised as followed:
model = models.densenet161(pretrained=True)
for param in model.parameters():
param.requires_grad = False
num_ftrs = model.classifier.in_features
model.classifier = torch.nn.Linear(num_ftrs,2)
However, at inference, I am unsure of how to load the model correctly. In a separate script I do the following:
model = models.densenet161(pretrained=True)
for param in model.parameters():
param.requires_grad = False
num_ftrs = model.classifier.in_features
model.classifier = torch.nn.Linear(num_ftrs,2)
model.to(device)
# load the best model
bestmodel = get_best_model(best)
bestmodel = torch.load(bestmodel)
model.load_state_dict(bestmodel['classifier'])
# set model to evaluation mode
model.eval()
with torch.no_grad():
Does this look correct? Or do I set pretrained=False when introducing the model in my inference script?

Related

Keras OOM on multiple model and del() doesn't work

I'm trying to run multiple Keras model from functions but it gives OOM error my code looks like this :
def mainModel():
trainX, testX, trainY, testY = train_test_split()
evScoreVGG, imHolderVGG = runKerasVGGModel( train and test )
evScoreResNet, imHolderResnet = runResnetKerasModel(train and test values)
def createKerasVGG19Model():
base_model = VGG19( weights='imagenet',
include_top=False,
input_shape=(width, height,3),
)
base_model.trainable = False
model = Sequential([
base_model,
Flatten(),
Dense(164, activation="softmax"),
])
return model
#The ResNet is the same with ResNet50V2 base model
def runKerasVGGModel(trainX,testX,trainY,testY,mode):
modelKeras = createKerasVGG19Model()
modelKeras.compile( foo )
modelKeras.fit( foo )
evScore = modelKeras.evaluate(testX, testY)
predictedValue = modelKeras.predict(testX, mode)
del(modelKeras)
K.clear_session()
return evScore
Code for running and creating Resnet is the same as the VGG model without any change. I didn't add for readability.
I tried writing del(model) at the end of the runKeras() as del(model)
I tried writing K.clear_session() at the end of the runKeras()
I tried writing K.clear_session() between the runKeras1() and runKeras2() inside the mainModel

How to feed the input images into ensemble of DNN models?

I am trying to ensemble 5 transfer learning pre-trained DNN models (the base models trained on imagenet) using the code below
def define_stacked_model(members):
# update all layers in all models to not be trainable
for i in range(len(members)):
model = members[i]
for layer in model.layers:
# make not trainable
layer.trainable = False
# rename to avoid 'unique layer name' issue
layer._name = 'ensemble_' + str(i+1) + '_' + layer.name
# define multi-headed input
ensemble_visible = [model.input for model in members]
# concatenate merge output from each model
ensemble_outputs = [model.output for model in members]
merge = concatenate(ensemble_outputs)
hidden = tf.keras.layers.Dense(10, activation='relu')(merge)
output = tf.keras.layers.Dense(1)(hidden)
model = tf.keras.Model(inputs=ensemble_visible, outputs=output)
# plot graph of ensemble
plot_model(model, show_shapes=True, to_file='model_graph.png')
# compile
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=["accuracy"])
return model
# define ensemble model
stacked_model = define_stacked_model(members)
stacked_model.summary()
each model accept input image with size 160X160. I read the images using:
IMG_WIDTH=160
IMG_HEIGHT=160
def create_dataset(img_folder):
img_data_array=[]
class_name=[]
for dir1 in os.listdir(img_folder):
for file in os.listdir(os.path.join(img_folder, dir1)):
image_path= os.path.join(img_folder, dir1, file)
image = cv2.imread( image_path, cv2.IMREAD_COLOR)
image = cv2.resize(image, (IMG_HEIGHT, IMG_WIDTH),interpolation = cv2.INTER_AREA)
image = np.array(image)
image = image.astype('float32')
image /= 255
img_data_array.append(image)
class_name.append(dir1)
return img_data_array, class_name
# extract the image array and class name
img_data, class_name = create_dataset(TRAIN_DIR)
target_dict={k: v for v, k in enumerate(np.unique(class_name))}
target_val= [target_dict[class_name[i]] for i in range(len(class_name))]
target_val = np.array(list(map(int,target_val))[0:300], np.float32)
note: I used 300 images only because of the availability of the memory.
after that I stacked the images using:
stacked_img_data = np.dstack([img_data[0:300],
img_data[0:300],
img_data[0:300],
img_data[0:300],
img_data[0:300]])
and finally trained the ensemble model using:
history = stacked_model.fit(x = stacked_img_data, y = target_val, epochs=10)
I faced this error:
ValueError: Layer model expects 5 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 160, 800, 3) dtype=float32>]
I think the above method is close to the correct way. but I am not sure how to solve the issue.
Well you can make this work only by changing this history = stacked_model.fit(x = stacked_img_data, y = target_val, epochs=10) to this
history = stacked_model.fit(x = [img_data[0:300],img_data[0:300],img_data[0:300],img_data[0:300],img_data[0:300]], y = target_val, epochs=10)
That being said you can also do this
input = tf.keras.layers.Input(shape=(160,160,3))
ensemble_outputs = [model(input) for model in members]
.
.
model = tf.keras.Model(inputs=input, outputs=output)
and only pass the image one time

BERT Encoder layer is non-trainable

I am trying to fine-tune a BERT model from TensorFlow hub. I loaded the preprocessing layer and the encoder as follow :
bert_preprocess_model = hub.KerasLayer('https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3')
bert_model = hub.KerasLayer('https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1')
And this is my model definition :
def build_classifier_model():
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessing_layer = hub.KerasLayer(bert_preprocess_model, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(bert_model, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
net = tf.keras.layers.Dropout(0.1)(net)
net = tf.keras.layers.Dense(3, activation='softmax', name='classifier')(net)
return tf.keras.Model(text_input, net)
classifier_model = build_classifier_model()
But I get the following error : ERROR:absl:hub.KerasLayer is trainable but has zero trainable weights.
In the official website, the model is fine-tunable.
I found the solution, simply add trainable = True :
bert_model = hub.KerasLayer('https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1',trainable=True)

Create Estimator from checkpoint and save as SavedModel without further training

I have created an Estimator from a TF Slim Resnet V2 checkpoint and tested it to make predictions. The main thing of what I did is basically similar to a normal Estimator together with assign_from_checkpoint_fn:
def model_fn(features, labels, mode, params):
...
slim.assign_from_checkpoint_fn(os.path.join(checkpoint_dir, 'resnet_v2_50.ckpt'), slim.get_model_variables('resnet_v2')
...
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'class_ids': predicted_classes[:, tf.newaxis],
'probabilities': tf.nn.softmax(logits),
'logits': logits,
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
To export the estimator as a SavedModel, I made a serving_input_fn as follows:
def image_preprocess(image_buffer):
image = tf.image.decode_jpeg(image_buffer, channels=3)
image_preprocessing_fn = preprocessing_factory.get_preprocessing('inception', is_training=False)
image = image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size)
return image
def serving_input_fn():
input_ph = tf.placeholder(tf.string, shape=[None], name='image_binary')
image_tensors = image_preprocess(input_ph)
return tf.estimator.export.ServingInputReceiver(image_tensors, input_ph)
In the main function, I use export_saved_model to try to export Estimator to SavedModel format:
def main():
...
classifier = tf.estimator.Estimator(model_fn=model_fn)
classifier.export_saved_model(dir_path, serving_input_fn)
However, when I try to run the codes, it says "Couldn't find trained model at /tmp/tmpn3spty2z". From what I understand, this export_saved_model tries to find a trained Estimator model to export to SavedModel. However, I would like to know if there are any ways I can restore the pretrained checkpoint into an Estimator and export the Estimator to a SavedModel without any further training?
I have solved my problem. To export TF Slim Resnet checkpoint with TF 1.14 to SavedModel, warm start can be used together with export_savedmodel as follows:
config = tf.estimator.RunConfig(save_summary_steps = None, save_checkpoints_secs = None)
warm_start = tf.estimator.WarmStartSettings(checkpoint_dir, checkpoint_name)
classifier = tf.estimator.Estimator(model_fn=model_fn, warm_start_from = warm_start, config = config)
classifier.export_savedmodel(export_dir_base = FLAGS.output_dir, serving_input_receiver_fn = serving_input_fn)

Graph disconnected: cannot obtain value for tensor "x" Tensor at layer "x" . The following previous layers were accessed without issue: []

I am building a small network using some custom network boxes for each use case, It looks like this :
def top_block(dropout = None, training = None):
# scaled input
input_1 = tf.keras.Input(shape=(1,15), dtype='float32')
input_2 = tf.keras.Input(shape=(1,15), dtype='float32')
if dropout:
layer_one = tf.keras.layers.Dropout(rate = dropout)(input_1, training = training)
layer_two = tf.keras.layers.Dropout(rate = dropout)(input_2, training = training)
return [layer_one,layer_two]
return [input_1,input_2]
def bottom_layer(input_layers):
data = tf.reduce_mean(input_layers,0)
cls_layer = tf.keras.layers.Dense(1,
kernel_initializer = keras.initializers.glorot_uniform(seed=200),
activation = 'sigmoid')(data)
model = tf.keras.Model([input_layers[0], input_layers[1]], cls_layer , name = 'model_1')
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
model.summary()
return model
If I am trying to access this network without dropout, it's working fine :
top_ = top_block()
model = bottom_layer(top_ )
But if I am accessing with dropout, it's giving error:
top_ = top_block(dropout = 0.2, training = True)
model = bottom_layer(top_ )
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_72:0", shape=(None, 1, 15), dtype=float32) at layer "input_72". The following previous layers were accessed without issue: []
How to access the model with dropout layer?
How to disable training = False during evaluate? Do I need to load full model and old model weights?
Thank You!
I just realized my input is coming from intermediate layer (dropout layer), It should come directly from Input layer :
def top_block():
# scaled input
input_1 = tf.keras.Input(shape=(1,15), dtype='float32')
input_2 = tf.keras.Input(shape=(1,15), dtype='float32')
return [input_1, input_2]
def apply_dropout(layers_data, dropout_val, training):
layer_one = tf.keras.layers.Dropout(rate = dropout_val)(layers_data[0], training = training)
layer_two = tf.keras.layers.Dropout(rate = dropout_val)(layers_data[1], training = training)
return [layer_one, layer_two]
def bottom_layer(input_layers, data):
data = tf.reduce_mean(data, 0)
cls_layer = tf.keras.layers.Dense(1,
kernel_initializer = keras.initializers.glorot_uniform(seed=200),
activation = 'sigmoid')(data)
model = tf.keras.Model(input_layers, cls_layer , name = 'model_1')
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
model.summary()
return model
It's working now
top_ = top_block()
dropout_ = apply_dropout(top_, 0.2, True)
model = bottom_layer(top_ , dropout_)

Categories

Resources