This is apparently the code for seq2seq model with embedding that i wrote
encoder_inputs = Input(shape=(MAX_LEN, ), dtype='int32',)
encoder_embedding = embed_layer(encoder_inputs)
encoder_LSTM = LSTM(HIDDEN_DIM, return_state=True)
encoder_outputs, state_h, state_c = encoder_LSTM(encoder_embedding)
encoder_states = [state_h, state_c]
decoder_inputs = Input(shape=(MAX_LEN, ))
decoder_embedding = embed_layer(decoder_inputs)
decoder_LSTM = LSTM(HIDDEN_DIM, return_state=True, return_sequences=True)
decoder_outputs, _, _ = decoder_LSTM(
decoder_embedding, initial_state=encoder_states)
outputs = TimeDistributed(
Dense(VOCAB_SIZE, activation='softmax'))(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], outputs)
# defining inference model
encoder_model = Model(encoder_inputs, encoder_states)
decoder_state_input_h = Input(shape=(None,))
decoder_state_input_c = Input(shape=(None,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_LSTM(
decoder_embedding, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
outputs = TimeDistributed(
Dense(VOCAB_SIZE, activation='softmax'))(decoder_outputs)
decoder_model = Model(
[decoder_inputs] + decoder_states_inputs, [outputs] + decoder_states)
return model, encoder_model, decoder_model
we are using inference mode for predictions particularly encoder and decoder model, but i am not sure where the training is happening for the encoder and decoder?
Edit 1
Code is build upon: https://keras.io/examples/lstm_seq2seq/,
with added embedding layer and Time Distributed dense layer.
for more info on issue: github repo
Encoder and decoder are trained simultaneously, or more precisely the model that is composed of these two is trained which in turn trains both of them (this is not GAN where you need some fancy training cycle)
If you look closely in the provided link, there is a section where the model is trained.
# Run training
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
batch_size=batch_size,
epochs=epochs,
validation_split=0.2)
Edit: from comments
If you look more closely, the "new" model that you are defining after fit consists of layers that have already been trained in the previous step. i.e Model(encoder_inputs, encoder_states) both encoder_inputs and encoder_states were used during the initial training, you are just repackaging them.
Related
I have trained the following model:
K.clear_session()
latent_dim = 500
encoder_inputs = Input(shape=(INPUT_MAX_LENGTH-1,))
enc_emb = Embedding(vocab_size_source, latent_dim, trainable=True, mask_zero=True)(encoder_inputs)
encoder_lstm1 = LSTM(latent_dim,return_sequences=True,return_state=True)
encoder_output1, state_h1, state_c1 = encoder_lstm1(enc_emb)
encoder_lstm2 = LSTM(latent_dim,return_sequences=True,return_state=True)
encoder_output2, state_h2, state_c2 = encoder_lstm2(encoder_output1)
encoder_lstm3=LSTM(latent_dim, return_state=True, return_sequences=True)
encoder_outputs, state_h, state_c= encoder_lstm3(encoder_output2)
# We discard `encoder_outputs` and only keep the states.
encoder_states = [state_h, state_c]
# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(INPUT_MAX_LENGTH-1,))
dec_emb_layer = Embedding(vocab_size_target, latent_dim, trainable=True, mask_zero=True)
dec_emb = dec_emb_layer(decoder_inputs)
# We set up our decoder to return full output sequences,
# and to return internal states as well. We don't use the
# return states in the training model, but we will use them in inference.
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(dec_emb, initial_state=[state_h, state_c])
attn_layer = BahdanauAttention(units=latent_dim)
attn_out, attn_states = attn_layer(encoder_outputs, decoder_outputs)
decoder_concat_input = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attn_out])
decoder_dense = TimeDistributed(Dense(vocab_size_target, activation='softmax'))
decoder_outputs = decoder_dense(decoder_concat_input)
# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.summary()
And now I am trying to create an inference pipeline:
# Define sampling models
# Restore the model and construct the encoder and decoder.
#model = keras.models.load_model("s2s")
encoder_inputs = model.input[0] # input_1
encoder_outputs, state_h_enc, state_c_enc = model.layers[6].output # lstm_1
encoder_states = [state_h_enc, state_c_enc]
encoder_model = Model(encoder_inputs, encoder_states)
dec_inputs = model.input[1] # input_2
dec_emd_layer = model.layers[5]
dec_embedding = dec_emd_layer(dec_inputs)
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_lstm = model.layers[7]
decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
dec_embedding, initial_state=decoder_states_inputs
)
attn_layer = model.layers[8]
attn_out_inf, attn_states_inf = attn_layer(encoder_outputs, decoder_outputs)
concat_layer = model.layers[9]
decoder_concat_input = concat_layer([decoder_outputs, attn_out_inf])
decoder_states = [state_h_dec, state_c_dec]
decoder_dense = model.layers[10]
decoder_outputs = decoder_dense(decoder_concat_input)
decoder_model = Model(
[dec_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states
)
But I keep getting the following error:
ValueError: Graph disconnected: cannot obtain value for tensor KerasTensor(type_spec=TensorSpec(shape=(None, 99), dtype=tf.float32, name='input_1'), name='input_1', description="created by layer 'input_1'") at layer "embedding". The following previous layers were accessed without issue: []
I see no disconnect in the graph. I have also changed variable names to ensure there are no conflicts.
I need to adapt this model for two text columns input (instead one column)
tfhub_handle_encoder = \
"https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1"
tfhub_handle_preprocess = \
"https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3"
def build_classifier_model():
text_input = tf.keras.layers.Input(
shape=(), dtype=tf.string, name='text')
preprocessing_layer = hub.KerasLayer(
tfhub_handle_preprocess, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(
tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
net = tf.keras.layers.Dropout(0.1)(net)
net = tf.keras.layers.Dense(
6, activation='softmax', name='classifier')(net)
model = tf.keras.Model(text_input, net)
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=False) # (from_logits=True)
metric = tf.metrics.CategoricalAccuracy('accuracy')
optimizer = Adam(
learning_rate=5e-05, epsilon=1e-08, decay=0.01, clipnorm=1.0)
model.compile(
optimizer=optimizer, loss=loss, metrics=metric)
model.summary()
return model
history = classifier_model.fit(
x=X_train['f'].values,
y=y_train_c,
validation_data=(X_valid['f'].values, y_valid_c),
epochs=15)
Seems like this is model from tutorial: https://www.tensorflow.org/text/tutorials/classify_text_with_bert
I have tried modify code for two input layer, but get error because after concatenate there is wrong tensor dimensions:
def build_classifier_model():
input1 = tf.keras.layers.Input(
shape=(), dtype=tf.string, name='text')
input2 = tf.keras.layers.Input(
shape=(), dtype=tf.string, name='text1')
text_input = tf.keras.layers.concatenate([input1, input2], axis=-1)
preprocessing_layer = hub.KerasLayer(
tfhub_handle_preprocess, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(
tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
net = tf.keras.layers.Dropout(0.1)(net)
net = tf.keras.layers.Dense(
6, activation='softmax', name='classifier')(net)
model = tf.keras.Model([input1, input2], net)
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=False) # (from_logits=True)
metric = tf.metrics.CategoricalAccuracy('accuracy')
optimizer = Adam(
learning_rate=5e-05, epsilon=1e-08, decay=0.01, clipnorm=1.0)
model.compile(
optimizer=optimizer, loss=loss, metrics=metric)
model.summary()
return model
Error:
InvalidArgumentError: logits and labels must be broadcastable: logits_size=[64,6] labels_size=[32,6]
[[node categorical_crossentropy/softmax_cross_entropy_with_logits (defined at tmp/ipykernel_39/1837193519.py:5) ]] [Op:__inference_train_function_271676]
If use concatenate with another dimension then model doensn't compile
Weirdly enough, replacing your Concatenation layer with tf.strings.join inside your model seems to work:
def build_classifier_model():
input1 = tf.keras.layers.Input(
shape=(), dtype=tf.string, name='text')
input2 = tf.keras.layers.Input(
shape=(), dtype=tf.string, name='text1')
text_input = tf.strings.join([input1, input2])
preprocessing_layer = hub.KerasLayer(
tfhub_handle_preprocess, name='preprocessing')
encoder_inputs = preprocessing_layer(text_input)
encoder = hub.KerasLayer(
tfhub_handle_encoder, trainable=True, name='BERT_encoder')
outputs = encoder(encoder_inputs)
net = outputs['pooled_output']
net = tf.keras.layers.Dropout(0.1)(net)
output = tf.keras.layers.Dense(
6, activation='softmax', name='classifier')(net)
model = tf.keras.Model([input1, input2], output)
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=False) # (from_logits=True)
metric = tf.metrics.CategoricalAccuracy('accuracy')
optimizer = Adam(
learning_rate=5e-05, epsilon=1e-08, decay=0.01, clipnorm=1.0)
model.compile(
optimizer=optimizer, loss=loss, metrics=metric)
model.summary()
return model
Epoch 1/5
497/1094 [============>.................] - ETA: 2:14 - loss: 1.8664 - accuracy: 0.1641
You could also consider simply doing text_input = input1 + input2 , since the Concatenation layer seems to mess up the batch dimension. Or you could feed each input to your encoder and concatenate the results afterwards:
def build_classifier_model():
input1 = tf.keras.layers.Input(
shape=(), dtype=tf.string, name='text')
input2 = tf.keras.layers.Input(
shape=(), dtype=tf.string, name='text1')
preprocessing_layer = hub.KerasLayer(
tfhub_handle_preprocess, name='preprocessing')
encoder_input1 = preprocessing_layer(input1)
encoder_input2 = preprocessing_layer(input2)
encoder = hub.KerasLayer(
tfhub_handle_encoder, trainable=True, name='BERT_encoder')
output1 = encoder(encoder_input1)
output2 = encoder(encoder_input2)
net = tf.keras.layers.Concatenate(axis=-1)([output1['pooled_output'], output2['pooled_output']])
net = tf.keras.layers.Dropout(0.1)(net)
output = tf.keras.layers.Dense(
6, activation='softmax', name='classifier')(net)
model = tf.keras.Model([input1, input2], output)
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=False) # (from_logits=True)
metric = tf.metrics.CategoricalAccuracy('accuracy')
optimizer = Adam(
learning_rate=5e-05, epsilon=1e-08, decay=0.01, clipnorm=1.0)
model.compile(
optimizer=optimizer, loss=loss, metrics=metric)
model.summary()
return model
I have an encoder-decoder model whose structure is the same as the one at machinelearningmastery.com with num_encoder_tokens = 1949,
num_decoder_tokens = 1944, and latent_dim = 2048.
I would like to construct the encoder and decoder models by loading the already trained model and try decoding some samples, but I get the error "Graph disconnected: cannot obtain value for tensor Tensor("input_1_1:0", shape=(?,?, 1949), dtype=float32) at layer "input_1". The following previous layers were accessed without issue: [].
Part of my code is the following:
encoder_inputs = Input(shape=(None, num_encoder_tokens))
encoder = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_states = [state_h, state_c]
decoder_inputs = Input(shape=(None, num_decoder_tokens))
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
batch_size=batch_size,
epochs=epochs,
validation_split=0.2)
model.save('modelname.h5')
# ...from here different python file for inference...
encoder = LSTM(latent_dim, return_state=True)
model = load_model('modelname.h5')
encoder_model = Model(model.output, encoder(model.output)) # I get the error here
And what I would like to do here is:
encoder_inputs = Input(shape=(None, 1949))
encoder = LSTM(2048, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_states = [state_h, state_c]
encoder_model = Model(encoder_inputs, encoder_states)
I would highly appreciate it if anyone could help me.
Take a look at Robert Sim's answer to this post in stack overflow: Restore keras seq2seq model
And to this post in github: https://github.com/keras-team/keras/pull/9119.
He also provides an example in: https://github.com/simra/keras/blob/simra/s2srestore/examples/lstm_seq2seq_restore.py where you can see how the model is loaded. The following code has been taken from that example.
# Restore the model and construct the encoder and decoder.
model = load_model('s2s.h5')
encoder_inputs = model.input[0] # input_1
encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output # lstm_1
encoder_states = [state_h_enc, state_c_enc]
encoder_model = Model(encoder_inputs, encoder_states)
decoder_inputs = model.input[1] # input_2
decoder_state_input_h = Input(shape=(latent_dim,), name='input_3')
decoder_state_input_c = Input(shape=(latent_dim,), name='input_4')
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_lstm = model.layers[3]
decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h_dec, state_c_dec]
decoder_dense = model.layers[4]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
[decoder_inputs] + decoder_states_inputs,
[decoder_outputs] + decoder_states)
I'm working with the keras seq2seq example here:
https://github.com/keras-team/keras/blob/master/examples/lstm_seq2seq.py
I would like to persist the vocabulary and decoder so that I can load it again later, and apply it to new sequences.
While the code calls model.save(), this is insufficient because I can see the decoding setup referencing a number of other variables which are deep pointers into the trained model:
encoder_model = Model(encoder_inputs, encoder_states)
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(
decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model(
[decoder_inputs] + decoder_states_inputs,
[decoder_outputs] + decoder_states)
I would like to translate this code to determine encoder_inputs, encoder_states, latent_dim, decoder_inputs from a model loaded from disk. It's ok to assume I know the model architecture in advance. Is there a straightforward way to do this?
Update:
I have made some progress using the decoder construction code and pulling out the layer inputs/outputs as needed.
encoder_inputs = model.input[0] #input_1
decoder_inputs = model.input[1] #input_2
encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output # lstm_1
_, state_h_dec, state_c_dec = model.layers[3].output # lstm_2
decoder_outputs = model.layers[4].output # dense_1
encoder_states = [state_h_enc, state_c_enc]
encoder_model = Model(encoder_inputs, encoder_states)
latent_dim = 256 # TODO: infer this from the model. Should match lstm_1 outputs.
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_states = [state_h_dec, state_c_dec]
decoder_model = Model(
[decoder_inputs] + decoder_states_inputs,
[decoder_outputs] + decoder_states)
However, when I try to construct the decoder model, I encounter this error:
RuntimeError: Graph disconnected: cannot obtain value for tensor Tensor("input_1:0", shape=(?, ?, 96), dtype=float32) at layer "input_1". The following previous layers were accessed without issue: []
As a test I tried Model(decoder_inputs,decoder_outputs) with the same result. It's not clear to me what is disconnected from the graph, since these layers are loaded from the model.
Ok, I solved this problem and the decoder is producing reasonable results. In my code above I missed a couple details in the decoder step, specifically that it call()s the LSTM and Dense layers in order to wire them up. In addition, the new decoder inputs need unique names so they don't collide with input_1 and input_2 (this detail smells like a keras bug).
encoder_inputs = model.input[0] #input_1
encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output # lstm_1
encoder_states = [state_h_enc, state_c_enc]
encoder_model = Model(encoder_inputs, encoder_states)
decoder_inputs = model.input[1] #input_2
decoder_state_input_h = Input(shape=(latent_dim,),name='input_3')
decoder_state_input_c = Input(shape=(latent_dim,),name='input_4')
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_lstm = model.layers[3]
decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h_dec, state_c_dec]
decoder_dense = model.layers[4]
decoder_outputs=decoder_dense(decoder_outputs)
decoder_model = Model(
[decoder_inputs] + decoder_states_inputs,
[decoder_outputs] + decoder_states)
A big drawback with this code is the fact we know the full architecture in advance. I would like to eventually be able to load an architecture-agnostic decoder.
At a point in the code of the Keras seq2seq example you will have a finished encoder and decoder model. You can save the architecture and weights of these models to disk and load them later. The following works for me:
Save the models to disk:
with open('encoder_model.json', 'w', encoding='utf8') as f:
f.write(encoder_model.to_json())
encoder_model.save_weights('encoder_model_weights.h5')
with open('decoder_model.json', 'w', encoding='utf8') as f:
f.write(decoder_model.to_json())
decoder_model.save_weights('decoder_model_weights.h5')
Later load the encoder and decoder:
def load_model(model_filename, model_weights_filename):
with open(model_filename, 'r', encoding='utf8') as f:
model = model_from_json(f.read())
model.load_weights(model_weights_filename)
return model
encoder = load_model('encoder_model.json', 'encoder_model_weights.h5')
decoder = load_model('decoder_model.json', 'decoder_model_weights.h5')
During prediction you will also need a number of other data, like number of encoder/decoder tokens, dictionaries mapping char to index etc. You can just save these to file after training and load them later, just like with the models.
I have been working on speech emotion recognition deep neural network. I have used keras Bidirectional LSTM with CTC loss. i trained the model and saved it
model_json = model.to_json()
with open("ctc_model.json", "w") as json_file:
json_file.write(model_json)
model.save_weights("ctc_weights.h5")
The problem is i can not use this model to test on on unseen data because the model accepts 4 argument as input and calculates the ctc loss..just build the model and train. so how can i save a model in such away that in require only one input. not the labels, and length. Basically how can i save a model as this function test_func = K.function([net_input], [output])
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
shift = 2
y_pred = y_pred[:, shift:, :]
input_length -= shift
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def build_model(nb_feat, nb_class, optimizer='Adadelta'):
net_input = Input(name="the_input", shape=(200, nb_feat))
forward_lstm1 = LSTM(output_dim=64,
return_sequences=True,
activation="tanh"
)(net_input)
backward_lstm1 = LSTM(output_dim=64,
return_sequences=True,
activation="tanh",
go_backwards=True
)(net_input)
blstm_output1 = Merge(mode='concat')([forward_lstm1, backward_lstm1])
forward_lstm2 = LSTM(output_dim=64,
return_sequences=True,
activation="tanh"
)(blstm_output1)
backward_lstm2 = LSTM(output_dim=64,
return_sequences=True,
activation="tanh",
go_backwards=True
)(blstm_output1)
blstm_output2 = Merge(mode='concat')([forward_lstm2, backward_lstm2])
hidden = TimeDistributed(Dense(512, activation='tanh'))(blstm_output2)
output = TimeDistributed(Dense(nb_class + 1, activation='softmax')) (hidden)
labels = Input(name='the_labels', shape=[1], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name="ctc")([output, labels, input_length, label_length])
model = Model(input=[net_input, labels, input_length, label_length], output=[loss_out])
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=optimizer, metrics=[])
test_func = K.function([net_input], [output])
return model, test_func
model, test_func = build_model(nb_feat=nb_feat, nb_class=nb_class, optimizer=optimizer)
for epoch in range(number_epoches):
inputs_train = {'the_input': X_train[i:i+batch_size],
'the_labels': y_train[i:i+batch_size],
'input_length': np.sum(X_train_mask[i:i+batch_size], axis=1, dtype=np.int32),
'label_length': np.squeeze(y_train_mask[i:i+batch_size]),
}
outputs_train = {'ctc': np.zeros([inputs_train["the_labels"].shape[0]])}
ctcloss = model.train_on_batch(x=inputs_train, y=outputs_train)
total_ctcloss += ctcloss * inputs_train["the_input"].shape[0] * 1.
loss_train[epoch] = total_ctcloss / X_train.shape[0]
Here is the my model summary
Try the following solution:
import keras.backend as K
def get_prediction_function(model):
input_tensor = model.layers[0].input
output_tensor = model.layers[-5].output
net_function = K.function([input_tensor, K.learning_phase()], [output_tensor])
def _result_function(x):
return net_function([x, 0])[0]
return _result_function
Now your network function might be obtained by:
test_function = get_prediction_function(model)