I am trying to use this notebook where we define a 3-head model based on DenseNet201. The AlexNet based works correctly but DenseNet201 throws me an error. I am a Pytorch user and have not been able to figure out the error of ValueError: Missing data for input "input_5". You passed a data dictionary with keys ['img_input']. Expected the following keys: ['input_5'].
I know somewhere in the following code snippet I should have a name 'img_input' but I cannot figure it out.
class base_model():
def __init__(self, side_dim, n_bb, n_classes, name_model):
self.side_dim = side_dim
self.name_model = name_model
# base model DenseNet
if name_model == 'DenseNet201':
self.base_model = keras.applications.DenseNet201(
include_top=False,
input_shape=(self.side_dim, self.side_dim, 3),
)
self.image_input = self.base_model.input
self.flatten = keras.layers.Flatten()(self.base_model.layers[-2].output)
self.BatcNorm = keras.layers.BatchNormalization()(self.flatten)
print('Base model: DenseNet121 (7.2M params x 201 layers')
# ----------------------------------------------------------------------
# Add head with three different outputs to last layer of the basic model
# ----------------------------------------------------------------------
# class output
self.class_categorical = keras.layers.Dense((n_bb * n_classes),
activation='softmax')(self.BatcNorm)
self.class_output = keras.layers.Reshape((n_bb, n_classes),
name='class_output')(self.class_categorical)
# confidence output
self.score_confidence = keras.layers.Dense((n_bb),
name='score_confidence',
activation='tanh')(self.BatcNorm)
# bounding boxes coordinate output
self.score_coords = keras.layers.Dense((n_bb * 4),
name='score_coords')(self.BatcNorm)
The error is thrown when I run the following:
# let's start our training
train_history = myModel.fit({'img_input': X_train},
{'class_output': class_target,
'score_confidence': target_confidence,
'score_coords': target_coords},
epochs=N_ep,
validation_data=({'img_input': X_val},
{'class_output': Val_class,
'score_confidence': Val_confidence,
'score_coords': Val_coords}),
batch_size=Batchs,
initial_epoch = init_ep,
verbose=1,
callbacks=[callbacks,
tensorboard_callback])
In the AlexNet based network, the input name is changed directly but I do not know how to do it for the DenseNet201.
Can you please help me?
The issue is that your input node does not have the same name as the dictionary key holding your input.
You can create your input layer before hand wit the right name, and pass it to the DenseNet201 function as the input tensor.
self.image_input = keras.Input((self.side_dim, self.side_dim, 3), name="img_input")
self.base_model = keras.applications.DenseNet201(
include_top=False,
input_tensor=self.image_input,
)
Another option is to get the name of the input right in your dictionary by using the name of the input node:
myModel.fit({myModel.input.name: X_train},
{'class_output': class_target,
'score_confidence': target_confidence,
'score_coords': target_coords})
A final option is to skip using a dictionary all together, given that you have a single input:
myModel.fit(X_train,
{'class_output': class_target,
'score_confidence': target_confidence,
'score_coords': target_coords})
Related
I want to add explanation to my model running in Vertex AI using the Vertex AI SDK.I get a silent error when running the batch prediction using ModelBatchPredictOp, where the ModelBatchPredictOp node runs infinitely.Here is my ModelBatchPredictOp definition;
ModelBatchPredictOp(
project=project_id,
job_display_name="tensorflow-ex-batch-prediction-job",
location=project_location,
model=champion_model.outputs["model"],
instances_format="csv",
predictions_format="jsonl",
gcs_source_uris=gcs_source_uris,
gcs_destination_output_uri_prefix=gcs_destination_output_uri_prefix,
machine_type=batch_prediction_machine_type,
starting_replica_count=batch_prediction_min_replicas,
max_replica_count=batch_prediction_max_replicas,
generate_explanation=True,
)
I have narrowed down the issue to inputTensorName key in the INPUTMETADATA spec.The 'inputTensorName' key in the INPUTMETADATA spec takes in a string for it's value(INPUTMETADATA spec). In my case I have a tensorflow model defined using the functional API meaning it has multiple inputs, as shown below;
# numeric/categorical features in Chicago trips dataset to be preprocessed
NUM_COLS = ["dayofweek", "hourofday", "trip_distance", "trip_miles", "trip_seconds"]
ORD_COLS = ["company"]
OHE_COLS = ["payment_type"]
def build_and_compile_model(dataset: Dataset, model_params: dict) -> Model:
"""Build and compile model.
Args:
dataset (Dataset): training dataset
model_params (dict): model parameters
Returns:
model (Model): built and compiled model
"""
# create inputs (scalars with shape `()`)
num_ins = {
name: Input(shape=(), name=name, dtype=tf.float32) for name in NUM_COLS
}
ord_ins = {
name: Input(shape=(), name=name, dtype=tf.string) for name in ORD_COLS
}
cat_ins = {
name: Input(shape=(), name=name, dtype=tf.string) for name in OHE_COLS
}
# join all inputs and expand by 1 dimension. NOTE: this is useful when passing
# in scalar inputs to a model in Vertex AI batch predictions or endpoints e.g.
# `{"instances": {"input1": 1.0, "input2": "str"}}` instead of
# `{"instances": {"input1": [1.0], "input2": ["str"]}`
all_ins = {**num_ins, **ord_ins, **cat_ins}
exp_ins = {n: tf.expand_dims(i, axis=-1) for n, i in all_ins.items()}
# preprocess expanded inputs
num_encoded = [normalization(n, dataset)(exp_ins[n]) for n in NUM_COLS]
ord_encoded = [str_lookup(n, dataset, "int")(exp_ins[n]) for n in ORD_COLS]
ohe_encoded = [str_lookup(n, dataset, "one_hot")(exp_ins[n]) for n in OHE_COLS]
# ensure ordinal encoded layers is of type float32 (like the other layers)
ord_encoded = [tf.cast(x, tf.float32) for x in ord_encoded]
# concat encoded inputs and add dense layers including output layer
x = num_encoded + ord_encoded + ohe_encoded
x = Concatenate()(x)
for units, activation in model_params["hidden_units"]:
x = Dense(units, activation=activation)(x)
x = Dense(1, name="output", activation="linear")(x)
model = Model(inputs=all_ins, outputs=x, name="nn_model")
model.summary()
logging.info(f"Use optimizer {model_params['optimizer']}")
optimizer = optimizers.get(model_params["optimizer"])
optimizer.learning_rate = model_params["learning_rate"]
model.compile(
loss=model_params["loss_fn"],
optimizer=optimizer,
metrics=model_params["metrics"],
)
return model
As a consequence when getting the input layer, using;
serving_input = list(
loaded_model.signatures["serving_default"].structured_input_signature[1].keys()
)[0]
INPUT_METADATA = {
"input_tensor_name": serving_input,
"encoding": "BAG_OF_FEATURES",
"modality": "numeric",
"index_feature_mapping": cols,
}
I only get one Input layer corresponding to one of the input tensors (cols) in either NUM_COLS, ORD_COLS or OHE_COLS. This causes an infinite run when running the ModelBatchPredictOp in the prediction pipeline as only the name to one input tensor is passed as the value to the inputTensorName. Running
list(model.signatures["serving_default"].structured_input_signature[1].keys())
returns the a list of all the input layers corresponding to the input tensor names (cols) defined in NUM_COLS, ORD_COLS and OHE_COLS.
How do I specify the value of inputTensorName in order to capture all the input layers? Or is there a work around to assign multiple input tensor names to inputTensorName?
I am new to TensorFlow. My task is predict some values (in this case, speed). If I use one value for the model input (l0), then everything is fine, I can train it and make predictions:
dataset, meta = arff.loadarff('data.arff')
# meta: 'XYZ'
# TIMESTAMP_ms's type is numeric
# SPEED_KMH's type is numeric
# POWER_W's type is numeric
# CURRENT_A's type is numeric
# VOLTAGE_V's type is numeric
# TORQUE_Nm's type is numeric
# CADENCE_RPM's type is numeric
speed = np.array(dataset[:]['SPEED_KMH'], dtype=float)
cadence = np.array(dataset[:]['CADENCE_RPM'], dtype=float)
power = np.array(dataset[:]['POWER_W'], dtype=float)
torque = np.array(dataset[:]['TORQUE_Nm'], dtype=float)
# Create model
l0 = tf.keras.layers.Dense(units=4, input_shape=[1]) #with one input all ok. BUT HOW TO USE n-Input?
l1 = tf.keras.layers.Dense(units=4)
l2 = tf.keras.layers.Dense(units=1)
model = tf.keras.Sequential([l0, l1, l2])
model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(0.01))
model.fit(cadence, speed, epochs=500, verbose=True)
...
model.predict([<some_val>])
BUT, when I tried to add several values to the input layer to increase the accuracy of the model, I have a problem:
...
train_data = []
for i in range(len(dataset)):
train_data.append([cadence[i], power[i], torque[i]])
...
l0 = tf.keras.layers.Dense(units=4, input_shape=[3])
...
model.fit(train_data, speed, epochs=1, verbose=True)
ValueError: Failed to find data adapter that can handle input: ( containing values of types {'(
Please, help me transfer multiple values to the input layer l0 of the model?
One way of using multiple inputs for a model is to use Tensorflow's functional API. It allows you to set multiple inputs which you can concatenate together later on in your model.
input1 = tf.keras.layers.Input(shape=(1, ))
input2 = tf.keras.layers.Input(shape=(1,))
input3 = tf.keras.layers.Input(shape=(1,))
mergeLayer = tf.keras.layers.Concatenate(axis=1)([input1, input2, input3])
dense1 = tf.keras.layers.Dense(4)(mergeLayer)
dense2 = tf.keras.layers.Dense(4)(dense1)
output = tf.keras.layers.Dense(1)(dense2)
model = tf.keras.models.Model([input1, input2, input3], output)
Now you can try merging your data together into one list and calling the fit() method on the new model.
For some more information on the functional API, you can go to the docs.
The Keras Functional API
I want to run a seq2seq model using lstm for a customer journey analysis.I am able to run the model but unable to load the saved model on a different notebook.
Code for attention model is here:
# RNN "Cell" classes in Keras perform the actual data transformations at each timestep. Therefore, in order to add attention to LSTM, we need to make a custom subclass of LSTMCell.
class AttentionLSTMCell(LSTMCell):
def __init__(self, **kwargs):
self.attentionMode = False
super(AttentionLSTMCell, self).__init__(**kwargs)
# Build is called to initialize the variables that our cell will use. We will let other Keras
# classes (e.g. "Dense") actually initialize these variables.
#tf_utils.shape_type_conversion
def build(self, input_shape):
# Converts the input sequence into a sequence which can be matched up to the internal
# hidden state.
self.dense_constant = TimeDistributed(Dense(self.units, name="AttLstmInternal_DenseConstant"))
# Transforms the internal hidden state into something that can be used by the attention
# mechanism.
self.dense_state = Dense(self.units, name="AttLstmInternal_DenseState")
# Transforms the combined hidden state and converted input sequence into a vector of
# probabilities for attention.
self.dense_transform = Dense(1, name="AttLstmInternal_DenseTransform")
# We will augment the input into LSTMCell by concatenating the context vector. Modify
# input_shape to reflect this.
batch, input_dim = input_shape[0]
batch, timesteps, context_size = input_shape[-1]
lstm_input = (batch, input_dim + context_size)
# The LSTMCell superclass expects no constant input, so strip that out.
return super(AttentionLSTMCell, self).build(lstm_input)
# This must be called before call(). The "input sequence" is the output from the
# encoder. This function will do some pre-processing on that sequence which will
# then be used in subsequent calls.
def setInputSequence(self, input_seq):
self.input_seq = input_seq
self.input_seq_shaped = self.dense_constant(input_seq)
self.timesteps = tf.shape(self.input_seq)[-2]
# This is a utility method to adjust the output of this cell. When attention mode is
# turned on, the cell outputs attention probability vectors across the input sequence.
def setAttentionMode(self, mode_on=False):
self.attentionMode = mode_on
# This method sets up the computational graph for the cell. It implements the actual logic
# that the model follows.
def call(self, inputs, states, constants):
# Separate the state list into the two discrete state vectors.
# ytm is the "memory state", stm is the "carry state".
ytm, stm = states
# We will use the "carry state" to guide the attention mechanism. Repeat it across all
# input timesteps to perform some calculations on it.
stm_repeated = K.repeat(self.dense_state(stm), self.timesteps)
# Now apply our "dense_transform" operation on the sum of our transformed "carry state"
# and all encoder states. This will squash the resultant sum down to a vector of size
# [batch,timesteps,1]
# Note: Most sources I encounter use tanh for the activation here. I have found with this dataset
# and this model, relu seems to perform better. It makes the attention mechanism far more crisp
# and produces better translation performance, especially with respect to proper sentence termination.
combined_stm_input = self.dense_transform(
keras.activations.relu(stm_repeated + self.input_seq_shaped))
# Performing a softmax generates a log probability for each encoder output to receive attention.
score_vector = keras.activations.softmax(combined_stm_input, 1)
# In this implementation, we grant "partial attention" to each encoder output based on
# it's log probability accumulated above. Other options would be to only give attention
# to the highest probability encoder output or some similar set.
context_vector = K.sum(score_vector * self.input_seq, 1)
# Finally, mutate the input vector. It will now contain the traditional inputs (like the seq2seq
# we trained above) in addition to the attention context vector we calculated earlier in this method.
inputs = K.concatenate([inputs, context_vector])
# Call into the super-class to invoke the LSTM math.
res = super(AttentionLSTMCell, self).call(inputs=inputs, states=states)
# This if statement switches the return value of this method if "attentionMode" is turned on.
if(self.attentionMode):
return (K.reshape(score_vector, (-1, self.timesteps)), res[1])
else:
return res
# Custom implementation of the Keras LSTM that adds an attention mechanism.
# This is implemented by taking an additional input (using the "constants" of the RNN class into the LSTM: The encoder output vectors across the entire input sequence.
class LSTMWithAttention(RNN):
def __init__(self, units, **kwargs):
cell = AttentionLSTMCell(units=units)
self.units = units
super(LSTMWithAttention, self).__init__(cell, **kwargs)
#tf_utils.shape_type_conversion
def build(self, input_shape):
self.input_dim = input_shape[0][-1]
self.timesteps = input_shape[0][-2]
return super(LSTMWithAttention, self).build(input_shape)
# This call is invoked with the entire time sequence. The RNN sub-class is responsible
# for breaking this up into calls into the cell for each step.
# The "constants" variable is the key to our implementation. It was specifically added
# to Keras to accomodate the "attention" mechanism we are implementing.
def call(self, x, constants, **kwargs):
if isinstance(x, list):
self.x_initial = x[0]
else:
self.x_initial = x
# The only difference in the LSTM computational graph really comes from the custom
# LSTM Cell that we utilize.
self.cell._dropout_mask = None
self.cell._recurrent_dropout_mask = None
self.cell.setInputSequence(constants[0])
return super(LSTMWithAttention, self).call(inputs=x, constants=constants, **kwargs)
Code defining encoder and decoder model:
# Encoder Layers
encoder_inputs = Input(shape=(None,len_input), name="attenc_inputs")
encoder = LSTM(units=units, return_sequences=True, return_state=True)
encoder_outputs, state_h, state_c = encoder((encoder_inputs))
encoder_states = [state_h, state_c]
#define inference decoder
encoder_model = Model(encoder_inputs, encoder_states)
#encoder_model.save('atten_enc_model.h5')
# define training decoder
decoder_inputs = Input(shape=(None, n_output))
Attention_dec_lstm = LSTMWithAttention(units=units, return_sequences=True, return_state=True)
# Note that the only real difference here is that we are feeding attenc_outputs to the decoder now.
attdec_lstm_out, _, _ = Attention_dec_lstm(inputs=decoder_inputs,
constants=encoder_outputs,
initial_state=encoder_states)
decoder_dense1 = Dense(units, activation="relu")
decoder_dense2 = Dense(n_output, activation='softmax')
decoder_outputs = decoder_dense2(Dropout(rate=.10)(decoder_dense1(Dropout(rate=.10)(attdec_lstm_out))))
atten_model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
atten_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
atten_model.summary()
#Defining inference decoder
state_input_h = Input(shape=(units,), name="state_input_h")
state_input_c = Input(shape=(units,), name="state_input_c")
decoder_states_inputs = [state_input_h, state_input_c]
attenc_seq_out = Input(shape=encoder_outputs.get_shape()[1:], name="attenc_seq_out")
attenc_seq_out
inf_attdec_inputs = Input(shape=(None,n_output), name="inf_attdec_inputs")
attdec_res, attdec_h, attdec_c = Attention_dec_lstm(inputs=inf_attdec_inputs,
constants=attenc_seq_out,
initial_state=decoder_states_inputs)
decoder_states = [attdec_h, attdec_c]
decoder_model = Model(inputs=[inf_attdec_inputs, state_input_h, state_input_c, attenc_seq_out],
outputs=[attdec_res, attdec_h, attdec_c])
Code for model fit and save:
history = atten_model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
batch_size=batch_size_num,
epochs=epochs,
validation_split=0.2,verbose=1)
atten_model.save('atten_model_lstm.h5')
Code to load the encoder decoder model with custom Attention layer:
with open('atten_model_lstm.json') as mdl:
json_string = mdl.read()
model = model_from_json(json_string, custom_objects={'AttentionLSTMCell': AttentionLSTMCell, 'LSTMWithAttention': LSTMWithAttention})
This code to load is giving error :
TypeError: int() argument must be a string, a bytes-like object or a number, not 'AttentionLSTMCell'
Here's a solution inspired by the link in my comment:
# serialize model to JSON
atten_model_json = atten_model.to_json()
with open("atten_model.json", "w") as json_file:
json_file.write(atten_model_json)
# serialize weights to HDF5
atten_model.save_weights("atten_model.h5")
print("Saved model to disk")
# Different part of your code or different file
# load json and create model
json_file = open('atten_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("atten_model.h5")
print("Loaded model from disk")
I'm looking for a way to create a Keras model with optional inputs. In raw TensorFlow, you can create placeholders with optional inputs as follows:
import numpy as np
import tensorflow as tf
def main():
required_input = tf.placeholder(
tf.float32,
shape=(None, 2),
name='required_input')
default_optional_input = tf.random_uniform(
shape=(tf.shape(required_input)[0], 3))
optional_input = tf.placeholder_with_default(
default_optional_input,
shape=(None, 3),
name='optional_input')
output = tf.concat((required_input, optional_input), axis=-1)
with tf.Session() as session:
with_optional_input_output_np = session.run(output, feed_dict={
required_input: np.random.uniform(size=(4, 2)),
optional_input: np.random.uniform(size=(4, 3)),
})
print(f"with optional input: {with_optional_input_output_np}")
without_optional_input_output_np = session.run(output, feed_dict={
required_input: np.random.uniform(size=(4, 2)),
})
print(f"without optional input: {without_optional_input_output_np}")
if __name__ == '__main__':
main()
In a similar fashion, I would want to be able to have optional inputs for my Keras model. It seems like the tensor argument in the keras.layers.Input.__init__ might be what I'm looking for, but at least it doesn't work as I was expecting (i.e. the same way as tf.placeholder_with_default shown above). Here's an example that breaks:
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
def create_model(output_size):
required_input = tf.keras.layers.Input(
shape=(13, ), dtype='float32', name='required_input')
batch_size = tf.shape(required_input)[0]
def sample_optional_input(inputs, batch_size=None):
base_distribution = tfp.distributions.MultivariateNormalDiag(
loc=tf.zeros(output_size),
scale_diag=tf.ones(output_size),
name='sample_optional_input')
return base_distribution.sample(batch_size)
default_optional_input = tf.keras.layers.Lambda(
sample_optional_input,
arguments={'batch_size': batch_size}
)(None)
optional_input = tf.keras.layers.Input(
shape=(output_size, ),
dtype='float32',
name='optional_input',
tensor=default_optional_input)
concat = tf.keras.layers.Concatenate(axis=-1)(
[required_input, optional_input])
dense = tf.keras.layers.Dense(
output_size, activation='relu')(concat)
model = tf.keras.Model(
inputs=[required_input, optional_input],
outputs=[dense])
return model
def main():
model = create_model(output_size=3)
required_input_np = np.random.normal(size=(4, 13))
outputs_np = model.predict({'required_input': required_input_np})
print(f"outputs_np: {outputs_np}")
required_input = tf.random_normal(shape=(4, 13))
outputs = model({'required_input': required_input})
print(f"outputs: {outputs}")
if __name__ == '__main__':
main()
The first call to the model.predict seems to give correct output, but for some reason, the direct call to model fails with the following error:
ValueError: Layer model expects 2 inputs, but it received 1 input tensors. Inputs received: []
Can the tensor argument in Input.__init__ be used to implement optional inputs for Keras model as in my example above? If yes, what should I change in my example to make it run correctly? If not, what is the expected way of creating optional inputs in Keras?
I really don't think it's possible without workarounds. Keras was not meant for that.
But, noticing that you are using two different session.run commands for each case, it seems that it should be easy to do it with two models. One model uses the optional input, the other doesn't. You choose which one to use the same way you choose which session.run() to call.
That said, you can use Input(tensor=...) or simply create the optional input inside a Lambda layer. Both things are fine. But don't use Input(shape=..., tensor=...), these are redundant arguments and sometimes Keras does not deal well with redundancies like this.
Ideally, keep all operations inside Lambda layers, even the tf.shape operation.
That said:
required_input = tf.keras.layers.Input(
shape=(13, ), dtype='float32', name='required_input')
#needs the input for the case you want to pass it:
optional_input_when_used = tf.keras.layers.Input(shape=(output_size,))
#operations should be inside Lambda layers
batch_size = Lambda(lambda x: tf.shape(x)[0])(required_input)
#updated for using the batch size coming from lambda
#you didn't use "inputs" anywhere in this function
def sample_optional_input(batch_size):
base_distribution = tfp.distributions.MultivariateNormalDiag(
loc=tf.zeros(output_size),
scale_diag=tf.ones(output_size),
name='sample_optional_input')
return base_distribution.sample(batch_size)
#updated for using the batch size as input
default_optional_input = tf.keras.layers.Lambda(sample_optional_input)(batch_size)
#let's skip the concat for now - notice I'm not "using" this layer yet
dense_layer = tf.keras.layers.Dense(output_size, activation='relu')
#you could create the rest of the model here if it's big, so you don't create it twice
#(check the final section of this answer)
Model using passed input:
concat_when_used = tf.keras.layers.Concatenate(axis=-1)(
[required_input, optional_input_when_used]
)
dense_when_used = dense_layer(concat_when_used)
#or final_part_of_the_model(concat_when_used)
model_when_used = Model([required_input, optional_input_when_used], dense_when_used)
Model not using the optional input:
concat_not_used = tf.keras.layers.Concatenate(axis=-1)(
[required_input, default_optional_input]
)
dense_not_used = dense_layer(concat_not_used)
#or final_part_of_the_model(concat_not_used)
model_not_used = Model(required_input, dense_not_used)
It's ok to create two models like this and choose one to use (both models share the final layers, so they will always be trained together)
Now, at the point you choose which session.run, now you will choose which model to use:
model_when_used.predict([x1, x2])
model_when_used.fit([x1,x2], y)
model_not_used.predict(x)
model_not_used.fit(x, y)
How to create a shared final part?
If your final part is big, you will not want to call everything twice to create two models. In this case, create a final model first:
input_for_final = Input(shape_after_concat)
out = Dense(....)(input_for_final)
out = Dense(....)(out)
out = Dense(....)(out)
.......
final_part_of_the_model = Model(input_for_final, out)
Then use this final part in previous answer.
dense_when_used = final_part_of_the_model(concat_when_used)
dense_not_used = final_part_of_the_model(concat_not_used)
Let's say I want to train a GRU and because I need stateful=true the batch-size has to be known beforehand.
Using the functional API I would have an Input as follows:
input_1 = Input(batch_shape=(batch_size, None, features))
But when I evaluate the model I don't want to pass my test data in batches (batch_size = 1; predictions for one observation) with fixed timesteps. My
solution at the moment is to load the saved model and rebuild it with:
input_1 = Input(shape=(None, num_input_dim))
To do that though I need a method that goes through every layer of the model and then
set the weights afterwards.
input_1 = Input(shape=(None, num_input_dim))
x1 = input_1
weights = []
for l in range(0, len(layers)):
if isinstance(layers[l], keras.layers.GRU):
x1 = GRU(layers[l].output_shape[-1], return_sequences=True)(x1)
weights.append(layers[l].get_weights())
elif isinstance(layers[l], keras.layers.Dense):
x1 = Dense(layers[l].output_shape[-1], activation='tanh')(x1)
weights.append(layers[l].get_weights())
else:
continue
(This is just an example and I find this solution very unelegant.)
There must be a better way to redefine the input shape. Can somebody help me out here
please.
Since you're not using a stateful=True model for evaluating, then you do need to redefine the model.
You can make a function to create the model taking the options as input:
def createModel(stateful, weights=None):
#input
if (stateful==True):
batch = batch_size
else:
batch = None
#You don't need fixed timesteps, even if the model is stateful
input_1 = Input(batch_shape=(batch_size, None, num_input_dim))
#layer creation as you did with your first model
...
out = LSTM(...., stateful=stateful)(someInput)
...
model = Model(input_1,out)
if weights is not None:
model.set_weights(weights)
return model
Work sequence:
#create the training model
trainModel = createModel(True,None)
#train
...
#create the other model
newModel = createModel(False,trainModel.get_weights())