I am using VGG16 to finetune it on my dataset.
Here's the model:
def finetune(self, aux_input):
model = applications.VGG16(weights='imagenet', include_top=False)
# return model
drop_5 = Input(shape=(7, 7, 512))
flatten = Flatten()(drop_5)
# aux_input = Input(shape=(1,))
concat = Concatenate(axis=1)([flatten, aux_input])
fc1 = Dense(512, kernel_regularizer=regularizers.l2(self.weight_decay))(concat)
fc1 = Activation('relu')(fc1)
fc1 = BatchNormalization()(fc1)
fc1_drop = Dropout(0.5)(fc1)
fc2 = Dense(self.num_classes)(fc1_drop)
top_model_out = Activation('softmax')(fc2)
top_model = Model(inputs=drop_5, outputs=top_model_out)
output = top_model(model.output)
complete_model = Model(inputs=[model.input, aux_input], outputs=output)
return complete_model
I have two inputs to the model. In the above function, I'm using Concatenate for the flattened array and my aux_input.
I'm not sure if this would work with imagenet weights.
When I run this, I get an error:
ValueError: Graph disconnected: cannot obtain value for tensor
Tensor("aux_input:0", shape=(?, 1), dtype=float32) at layer
"aux_input". The following previous layers were accessed without
issue: ['input_2', 'flatten_1']
Not sure where am I going wrong.
If it matters, this is fit function:
model.fit(x={'input_1': x_train, 'aux_input': y_aux_train}, y=y_train, batch_size=batch_size,
epochs=maxepoches, validation_data=([x_test, y_aux_test], y_test),
callbacks=[reduce_lr, tensorboard], verbose=2)
But, I get an error before this fit function when I call model.summary().
The problem is that you are using aux_input in your top_model but you don't specify it as an input in your definition of top_model. Try replacing your definition of top_model and output with the following:
top_model = Model(inputs=[drop_5, aux_input], outputs=top_model_out)
output = top_model([model.output, aux_input])
Related
I am working with a multitask problem and I want to define the appropriate train/test generators. So far I was working with a classification and a regression task separately so I would write eg for the classification task:
train_generator=img_gen.flow_from_dataframe(dataframe=train_dataset,x_col="file_loc",y_col="expr",target_size=(96, 96),batch_size=203,class_mode="raw")
test_generator=img_gen.flow_from_dataframe(dataframe=test_dataset_va,x_col="file_loc",y_col="expr",target_size=(96, 96),batch_size=93,shuffle=False,class_mode="raw")
and for the regression task:
train_generator=img_gen.flow_from_dataframe(dataframe=train_dataset,x_col="file_loc",y_col=["valence","arousal"],target_size=(96, 96),batch_size=203,class_mode="raw")
test_generator=img_gen.flow_from_dataframe(dataframe=test_dataset_va,x_col="file_loc",y_col=["valence","arousal"],target_size=(96, 96),batch_size=93,shuffle=False,class_mode="raw")
My data looks like below:
file_loc expr valence arousal
0 /content/train_set/images/0.jpg 1 0.785714 -0.055556
1 /content/train_set/images/100000.jpg 1 0.784476 -0.137627
I tried writing the train generator for the multitask like:
train_generator=img_gen.flow_from_dataframe(dataframe=train_dataset,x_col="file_loc",y_col=["expr","valence","arousal"],target_size=(96, 96),batch_size=203,class_mode="raw")
but it produces an error so I am sure it is not the right way. Any ideas?
resnet = tf.keras.applications.ResNet50(
include_top=False ,
weights='imagenet' ,
input_shape=(96, 96, 3) ,
pooling="avg"
)
for layer in resnet.layers:
layer.trainable = True
inputs = Input(shape=(96, 96, 3), name='main_input')
main_branch = resnet(inputs)
main_branch = Flatten()(main_branch)
#fully connected λιγα units
expr_branch = Dense(8, activation='softmax', name='expr_output')(main_branch)
va_branch = Dense(2, name='va_output')(main_branch)
model = Model(inputs = inputs,
outputs = [expr_branch, va_branch])
plot_model(model)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
loss={'expr_output': 'sparse_categorical_crossentropy', 'va_output': 'mean_squared_error'},metrics={'expr_output': 'accuracy',
'va_output': tf.keras.metrics.MeanSquaredError()})
history = model.fit_generator(
train_generator,
epochs=2,
steps_per_epoch=STEP_SIZE_TRAIN_resnet,
validation_data=test_generator,
validation_steps=STEP_SIZE_TEST_resnet,
max_queue_size=1,
shuffle=True,
verbose=1
)
When I put class_mode="raw" the error is:
raw classmode
and when I put class_mode="multi_output" it says:
multi_output classmode
I want to make a model like the below picture. (simplified)
So, practically, I want the weights with the same names to always have the same values during training. What I did was the code below:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
example_train_features = np.arange(12000).reshape(1000, 12)
example_lanbels = np.random.randint(2, size=1000) #these data are just for illustration purposes
train_ds = tf.data.Dataset.from_tensor_slices((example_train_features, example_lanbels)).shuffle(buffer_size = 1000).batch(32)
dense1 = layers.Dense(1, activation="relu") #input shape:4
dense2 = layers.Dense(2, activation="relu") #input shape:1
dense3 = layers.Dense(1, activation="sigmoid") #input shape:6
feature_input = keras.Input(shape=(12,), name="features")
nodes_list = []
for i in range(3):
first_lvl_input = feature_input[i :: 4] ######## marked line
out1 = dense1(first_lvl_input)
out2 = dense2(out1)
nodes_list.append(out2)
joined = layers.concatenate(nodes_list)
final_output = dense3(joined)
model = keras.Model(inputs = feature_input, outputs = final_output, name="extrema_model")
compile_and_fit(model, train_ds, val_ds, patience=4)
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
optimizer = tf.keras.optimizers.RMSprop(),
metrics=keras.metrics.BinaryAccuracy())
history = model.fit(train_ds, epochs=10, validation_data=val_ds)
But when I try to run this code I get this error:
MklConcatOp : Dimensions of inputs should match: shape[0][0]= 71 vs. shape[18][0] = 70
[[node extrema_model/concatenate_2/concat (defined at <ipython-input-373-5efb41d312df>:398) ]] [Op:__inference_train_function_15338]
(please don't pay attention to numbers as they are from my real code) this is because it gets the whole data including the labels as an input, but shouldn't Keras only feed the features itself? Anyway, if I write the marked line as below:
first_lvl_input = feature_input[i :12: 4]
it doesn't give me the above error anymore. But, then I get another error which I know why happens but I don't know how to resolve it.
InvalidArgumentError: Incompatible shapes: [4,1] vs. [32,1]
[[node gradient_tape/binary_crossentropy/logistic_loss/mul/BroadcastGradientArgs
(defined at <ipython-input-1-b82546367b3c>:398) ]] [Op:__inference_train_function_6098]
This is because keras is feeding again the whole batch array, whereas in Keras documentation it is written you shouldn't specify the batch dimension for the program as it understands itself, so I expected Keras to feed the data one by one for my code to work. So I appreciate any ideas on how to resolve this or on how to write a code for what I want. Thanks.
You can wrap the dense layers in timedistributed wrapper , and reshape your data to have three dimensions (1000,3,4)(batch, sequence, feature), so for each time step (=3 that replace your for loop code .) the four features will be multiplied with the same weights each time.
example_train_features = np.arange(12000).reshape(1000, 3, 4 )
example_lanbels = np.random.randint(2, size=1000) #these data are just for illustration purposes
train_ds = tf.data.Dataset.from_tensor_slices((example_train_features, example_lanbels)).shuffle(buffer_size = 1000).batch(32)
dense1 = layers.TimeDistributed(layers.Dense(1, activation="relu")) #input shape:4
dense2 =layers.TimeDistributed(layers.Dense(2, activation="relu")) #input shape:1
dense3 = layers.Dense(1, activation="sigmoid") #input shape:6
feature_input = keras.Input(shape=(3,4), name="features")
out1 = dense1(feature_input)
out2 = dense2(out1)
z = layers.Flatten()(out2)
final_output = dense3(z)
model = keras.Model(inputs = feature_input, outputs = final_output, name="extrema_model")
model.compile(loss = tf.keras.losses.BinaryCrossentropy(),
optimizer = tf.keras.optimizers.RMSprop(),
metrics=keras.metrics.BinaryAccuracy())
history = model.fit(train_ds, epochs=10)
I have created a model that takes an input of shape (None, 512). Below is the summary of my model
shape of training feature
train_ids.shape
(10, 512)
shape of the training response variable
indus_cat_train.shape
(10, 49)
My model runs perfectly if I use
history = model.fit(
train_ids, indus_cat_train, epochs=2, validation_data=(
valid_ids, indus_cat_valid))
However my actual dataset is very large and feeding the completed dataset all at once is consuming so much RAM and shut down all the process.
I want to feed all data in batches or one by one. In order to complete this task, I tried out tf.data.Dataset.from_tensor_slices function
# training data
tf_train_data = tf.data.Dataset.from_tensor_slices((train_ids, indus_cat_train))
# validation data
tf_valid_data = tf.data.Dataset.from_tensor_slices((valid_ids, indus_cat_valid))
The above code is running fine and upon inspection, it is giving the desired shape
for elem in t:
print(elem[0].shape) # for features
print(elem[1].shape) # for response
print output
(512,) # for features
(49,) # for response variable
# terminating all other output to save space
However on-calling model.fit on tf_train_dataset the model gives me an error
bert_history = model.fit(
tf_train_data, epochs=2, validation_data=tf_valid_data)
WARNING:tensorflow:Model was constructed with shape (None, 512) for input Tensor("input_ids_1:0", shape=(None, 512), dtype=int32), but it was called on an input with incompatible shape (512, 1).
Sharing model code for further understanding as asked by Prateek
# training data
tf_train_data = tf.data.Dataset.from_tensor_slices((train_ids, indus_cat_train))
# validation data
tf_valid_data = tf.data.Dataset.from_tensor_slices((valid_ids, indus_cat_valid))
# model downloaded from bert
bert_model_name = "uncased_L-12_H-768_A-12"
bert_ckpt_dir = "bert_model"
bert_ckpt_file = os.path.join(bert_ckpt_dir, "bert_model.ckpt")
bert_config_file = os.path.join(bert_ckpt_dir, "bert_config.json")
# creating tokenizer
tokenizer = FullTokenizer(vocab_file=os.path.join(bert_ckpt_dir, "vocab.txt"))
# create function for model
def create_model(max_seq_len, bert_ckpt_file, n_classes):
with tf.io.gfile.GFile(bert_config_file, "r") as reader:
# get bert configurations
bert_configurations = StockBertConfig.from_json_string(reader.read())
bert_params = map_stock_config_to_params(bert_configurations)
bert_params_adapter_size = None
bert = BertModelLayer.from_params(bert_params, name="bert")
input_ids = keras.layers.Input(shape=(max_seq_len,), dtype="int32",
name="input_ids")
bert_output = bert(input_ids)
print("bert shape", bert_output.shape)
cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output)
cls_out = keras.layers.Dropout(0.5)(cls_out)
logits = keras.layers.Dense(units=765, activation="tanh")(cls_out)
logits = keras.layers.Dropout(0.5)(logits)
logits = keras.layers.Dense(
units=n_classes, activation="softmax")(logits)
model = keras.Model(inputs=input_ids, outputs=logits)
model.build(input_shape=(None, max_seq_len))
load_stock_weights(bert, bert_ckpt_file)
return model
n_cats = 49 #number of output categories
model = create_model(max_seq_len=512, bert_ckpt_file=bert_ckpt_file,
n_classes=n_cats)
model.summary()
optimizer = tf.keras.optimizers.Adam( learning_rate=learning_rate, epsilon=1e-08)
loss = tf.keras.losses.CategoricalCrossentropy()metric = tf.keras.metrics.CategoricalCrossentropy( name='categorical_crossentropy')model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
bert_history = model.fit( tf_train_data, epochs=2, validation_data=tf_valid_data)
I have solved it using dataset.batch. tf.data.Dataset was missing the batch size arguments as a result of which supplied tensors are not batched, i.e. I was getting shape (512,1) instead of (512,) and (49,1) instead of (49,)
batch_size = 2
tf_train_data = tf.data.Dataset.from_tensor_slices((train_ids,
indus_cat_train)).batch(batch_size)
tf_valid_data = tf.data.Dataset.from_tensor_slices((valid_ids,
indus_cat_valid)).batch(batch_size)
bert_history = model.fit(
tf_train_data, epochs=2, validation_data=tf_valid_data)
I am trying to update my code to work with TF 2.0. as a start, I have used a pre-made keras model:
def train_input_fn(batch_size=1):
"""An input function for training"""
print("train_input_fn: start function")
train_dataset = tf.data.experimental.make_csv_dataset(CSV_PATH_TRAIN, batch_size=batch_size,label_name='label',
select_columns=["sample","label"])
print('train_input_fn: finished make_csv_dataset')
train_dataset = train_dataset.map(parse_features_vector)
print("train_input_fn: finished the map with pars_features_vector")
train_dataset = train_dataset.repeat().batch(batch_size)
print("train_input_fn: finished batch size. train_dataset is %s ", train_dataset)
return train_dataset
IMG_SHAPE = (160,160,3)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
include_top = False,
weights = 'imagenet')
base_model.trainable = False
model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001),
loss='binary_crossentropy',
metrics=['accuracy'])
estimator = tf.keras.estimator.model_to_estimator(keras_model = model, model_dir = './date')
# train_input_fn read a CSV of images, resize them and returns dataset batch
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=20)
# eval_input_fn read a CSV of images, resize them and returns dataset batch of one sample
eval_spec = tf.estimator.EvalSpec(eval_input_fn)
tf.estimator.train_and_evaluate(estimator, train_spec=train_spec, eval_spec=eval_spec)
LOGS are:
train_input_fn: finished batch size. train_dataset is %s <BatchDataset shapes: ({mobilenetv2_1.00_160_input: (None, 1, 160, 160, 3)}, (None, 1)), types: ({mobilenetv2_1.00_160_input: tf.float32}, tf.int32)>
ERROR:
ValueError: Input 0 of layer Conv1_pad is incompatible with the layer: expected ndim=4, found ndim=5. Full shape received: [None, 1, 160, 160, 3]
What will be the right way to combine tf.keras with dataset API. is this the issue or something else?
Thanks,
eilalan
You don't need this line
train_dataset = train_dataset.repeat().batch(batch_size)
Function you're using to create dataset, tf.data.experimental.make_csv_dataset alredy batched it. You can use repeat though
I am trying to mimic this keras blog about fine tuning image classifiers. I would like to use the Inceptionv3 found on a fchollet repo.
Inception is a Model (functional API), so I can't just do model.add(top_model) which is reserved for Sequential.
How can I add combine two functional Models? Let's say I have
inputs = Input(shape=input_shape)
x = Flatten()(inputs)
predictions = Dense(4, name='final1')(x)
model1 = Model(input=inputs, output=predictions)
for the first model and
inputs_2 = Input(shape=(4,))
y = Dense(5)(l_inputs)
y = Dense(2, name='final2')(y)
predictions_2 = Dense(29)(y)
model2 = Model(input=inputs2, output=predictions2)
for the second. I now want an end-to-end that goes from inputs to predicions_2 and links predictions to inputs_2.
I tried using model1.get_layer('final1').output but I had a mismatch with types and I couldn't make it work.
I haven't tried this but according to the documentation functional models are callable, so you can do something like:
y = model2(model1(x))
where x is the data that goes to inputs and y is the result of predictions_2
I ran into this problem as well while fine tuning VGG16. Here's what worked for me and I imagine a similar approach can be taken for Inception V3. Tested on Keras 2.0.5 with Tensorflow 1.2 backend.
# NOTE: define the following variables
# top_model_weights_path
# num_classes
# dense_layer_1 = 4096
# dense_layer_2 = 4096
vgg16 = applications.VGG16(
include_top=False,
weights='imagenet',
input_shape=(224, 224, 3))
# Inspect the model
vgg16.summary()
# This shape has to match the last layer in VGG16 (without top)
dense_input = Input(shape=(7, 7, 512))
dense_output = Flatten(name='flatten')(dense_input)
dense_output = Dense(dense_layer_1, activation='relu', name='fc1')(dense_output)
dense_output = Dense(dense_layer_2, activation='relu', name='fc2')(dense_output)
dense_output = Dense(num_classes, activation='softmax', name='predictions')(dense_output)
top_model = Model(inputs=dense_input, outputs=dense_output, name='top_model')
# from: https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
# note that it is necessary to start with a fully-trained
# classifier, including the top classifier,
# in order to successfully do fine-tuning
top_model.load_weights(top_model_weights_path)
block5_pool = vgg16.get_layer('block5_pool').output
# Now combine the two models
full_output = top_model(block5_pool)
full_model = Model(inputs=vgg16.input, outputs=full_output)
# set the first 15 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
# WARNING: this may not be applicable for Inception V3
for layer in full_model.layers[:15]:
layer.trainable = False
# Verify things look as expected
full_model.summary()
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
full_model.compile(
loss='binary_crossentropy',
optimizer=optimizers.SGD(lr=5e-5, momentum=0.9),
metrics=['accuracy'])
# Train the model...
I think there are 2 options depending on what you need:
(a) predictions_1 and predictions_2 matter for you. In this case, you can train a network with 2 outputs. Here an example derived from your post:
input_shape = [3, 20]
inputs = Input(shape=input_shape)
x = Flatten()(inputs)
predictions_1 = Dense(4, name='predictions_1')(x)
# here the predictions_1 just corresponds to your next layer's input
y = Dense(5)(predictions_1)
y = Dense(2)(y)
predictions_2 = Dense(29, name='predictions_2')(y)
# you specify here that you have 2 outputs
model = Model(input=inputs, output=[predictions_1, predictions_2])
For the .fit and .predict, you can find a lot of details in https://keras.io/getting-started/functional-api-guide/, section: Multi-input and multi-output models.
(b) you are only interested in predictions_2. In this case, you can just do:
input_shape = [3, 20]
inputs = Input(shape=input_shape)
x = Flatten()(inputs)
predictions_1 = Dense(4, name='predictions_1')(x)
# here the predictions_1 just corresponds to your next layer's input
y = Dense(5)(predictions_1)
y = Dense(2)(y)
predictions_2 = Dense(29, name='predictions_2')(y)
# you specify here that your only output is predictions_2
model = Model(input=inputs, output=predictions_2)
Now as regards inception_v3. You can define by yourself the architecture and modify the deep layers inside according to your needs (giving to these layers specific names in order to avoid keras naming them automatically).
After that, compile your model and loads weights (as in https://keras.io/models/about-keras-models/ see function load_weights(..., by_name=True))
# you can load weights for only the part that corresponds to the true
# inception_v3 architecture. The other part will be initialized
# randomly
model.load_weights("inception_v3.hdf5", by_name=True)
This should solve your problem. By the way, you can find extra information here: https://www.gradientzoo.com. The doc. explains several saving / loading / fine-tuning routines ;)
Update: if you do not want to redefine your model from scratch you can do the following:
input_shape = [3, 20]
# define model1 and model2 as you want
inputs1 = Input(shape=input_shape)
x = Flatten()(inputs1)
predictions_1 = Dense(4, name='predictions_1')(x)
model1 = Model(input=inputs1, output=predictions_1)
inputs2 = Input(shape=(4,))
y = Dense(5)(inputs2)
y = Dense(2)(y)
predictions_2 = Dense(29, name='predictions_2')(y)
model2 = Model(input=inputs2, output=predictions_2)
# then define functions returning the image of an input through model1 or model2
def give_model1():
def f(x):
return model1(x)
return f
def give_model2():
def g(x):
return model2(x)
return g
# now you can create a global model as follows:
inputs = Input(shape=input_shape)
x = model1(inputs)
predictions = model2(x)
model = Model(input=inputs, output=predictions)
Drawing from filitchp's answer above, assuming the output dimensions of model1 match the input dimensions of model2, this worked for me:
model12 = Model(inputs=inputs, outputs=model2(model1.output))