I'm trying to implement a mixed model where part of it is a variational autoencoder and the other part takes the latent space and makes predictions on the properties of the input. I'd like to train these two models jointly. Here are my models:
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = Dense(intermediate_dim1, activation='relu')(inputs)
x1 = Dense(intermediate_dim2, activation='relu')(x)
x2 = Dense(intermediate_dim3, activation='relu')(x1)
z_mean = Dense(latent_dim, name='z_mean')(x2)
z_log_var = Dense(latent_dim, name='z_log_var')(x2)
# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling1')
x1 = Dense(intermediate_dim3, activation='relu')(latent_inputs)
x2 = Dense(intermediate_dim2, activation='relu')(x1)
x3 = Dense(intermediate_dim1, activation='relu')(x2)
outputs = Dense(2*original_dim+1, activation='sigmoid')(x3)
# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
#build property predictor model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling2')
x1 = Dense(64, activation='relu')(latent_inputs)
x2 = Dense(128, activation='relu')(x1)
outputs = Dense(property_dim, activation='sigmoid')(x2)
predModel = Model(latent_inputs, outputs, name='predictor')
This is the full model that has the inupts of the encoder and the output of only the predictor model.
#build full model
vaeOutputs = decoder(encoder(inputs)[2])
predOutputs = predModel(encoder(inputs)[0])
vaePred = Model(inputs, [vaeOutputs,predOutputs], name='vae_fullimage')
vaePred.summary()
Now I have trouble defining the loss function and training the model:
This is my attempt:
if __name__ == '__main__':
parser = argparse.ArgumentParser()
help_ = "Load h5 model trained weights"
parser.add_argument("-w", "--weights", help=help_)
help_ = "Use mse loss instead of binary cross entropy (default)"
parser.add_argument("-m",
"--mse",
help=help_, action='store_true')
#args = parser.parse_args()
args = parser.parse_known_args()[0]
models = (encoder, decoder)
def custom_loss(y_true, y_pred):
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
reconstruction_loss = binary_crossentropy(y_true[0], y_pred[0])
reconstruction_loss*= original_dim
#y_pred = predOutputs
prediction_loss =K.square(y_pred[1] - y_true[1])
total_loss = K.mean(prediction_loss, axis= -1) + K.mean (reconstruction_loss) + K.mean(kl_loss)
return total_loss
optimizer = keras.optimizers.Adam(learning_rate=0.001)
vaePred.compile(optimizer, custom_loss)
vaePred.summary()
if args.weights:
vaePred.load_weights(args.weights)
else:
# train the autoencoder
history =vaePred.fit(x=x_train, y=[x_train,property_train],
epochs=epochs,
callbacks=callbacks,
batch_size=batch_size,
validation_data=(x_test, [x_test,property_test]))
It appears that you are training an autoencoder(AE) (a generative model that seeks to predict itself). The outputs for an AE should equal the inputs if it is perfect. Therefore you should change y_true to be inputs.
change:
prediction_loss = mse(y_true, predOutputs)
to be:
prediction_loss = mse(inputs, predOutputs)
note: I have not run or tested any of this code. It appears to be example code from Keras.
Related
I have a model in TensorFlow that I converted to Pytorch. I want to check If the two models are the same or if I'm mixing things up. Here is my code in tensofrlow:
model = Sequential()
model.add(
LSTM(5, input_shape=(4, 1000))
)
model.add(
Dense(1, activation='tanh')
)
model.compile(
loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']
)
model.fit(
X_train, y_train, epochs=100, batch_size=27
)
I built the equivalent model in Pytorch in this way:
class LSTM1(nn.Module):
def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
super(LSTM1, self).__init__()
self.num_classes = num_classes #number of classes
self.num_layers = num_layers #number of layers
self.input_size = input_size #input size
self.seq_length = seq_length #sequence length
self.hidden_size = hidden_size #hidden state
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
num_layers=num_layers, batch_first=True) #lstm
self.fc = nn.Linear(self.hidden_size, num_classes) #fully connected last layer
self.sigmoid = nn.Sigmoid()
def forward(self,x):
h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
# Propagate input through LSTM
output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
out = self.sigmoid(hn)
out = self.fc(out) #Final Output
out = self.sigmoid(out)
return out
num_epochs = 100 #100 epochs
learning_rate = 0.001 #0.001 lr
input_size = 1000 #number of features
num_layers = 5 #number of stacked lstm layers
hidden_size = 1
num_classes = 1 #number of output classes
X_train = np.concatenate((X_phage, X_bac))
y_train = np.concatenate((np.ones(len(X_phage)), np.zeros(len(X_bac))))
X_train_tensors_final = Variable(torch.Tensor(X_train))
y_train_tensors = Variable(torch.Tensor(y_train))
print(X_train_tensors_final.shape)
model = LSTM1(num_classes, input_size, hidden_size, num_layers, X_train_tensors_final.shape[1]) #our lstm class
print("Training Shape", X_train_tensors_final.shape, y_train_tensors.shape)
print(model)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
outputs = model.forward(X_train_tensors_final) #forward pass
optimizer.zero_grad() #caluclate the gradient, manually setting to 0
# obtain the loss function
outputs = outputs[-20:]
y_train_tensors = y_train_tensors.type(torch.LongTensor)
y_train_tensors = torch.reshape(y_train_tensors, (20, 1))
loss = criterion(outputs, y_train_tensors.float())
loss.backward() #calculates the loss of the loss function
optimizer.step() #improve from loss, i.e backprop
train_acc = torch.sum((outputs > 0.5).bool().float() == y_train_tensors)
final_train_acc = train_acc/20
print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
print('Accuracy: ', final_train_acc)
My data is 20 instances, where each instance has 1000 features and 4 timesteps, therefore it is shaped like this (20,4,1000). However, When I run the two models, I got different results (different loss and accuracy). Therefore I'm afraid I have missed something and the models are not the same.
I would appreciate it if someone can validate that these models are the same and if they were then why I'm getting different results?
I have coded the following model:
import tensorflow as tf
mod_input = tf.keras.layers.Input(shape=(4,))
mod = tf.keras.layers.Dense(30, "selu")(mod_input)
mod = tf.keras.layers.Dense(30, "selu")(mod)
mod = tf.keras.layers.Dense(30, "selu")(mod)
pre_mu = tf.keras.layers.Dense(30, "selu")(mod)
mu = tf.keras.layers.Dense(1, "linear")(pre_mu)
model = tf.keras.Model(mod_input, mu, name="model")
class MyModel(tf.keras.Model):
def __init__(self, model, **kwargs):
super(MyModel, self).__init__(**kwargs)
self.model = model
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
z_mean = self.model(x)
# MSE
total_loss = tf.math.reduce_mean(
tf.math.square(z_mean - y)
)
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
"loss": total_loss,
}
Which on my experience, should minimize (it's an example) the MSE... however, it's not, infact, compiling it with:
mm = MyModel(model)
mm.compile(optimizer=tf.keras.optimizers.Adam())
history = mm.fit(x, y_noised, epochs=80, batch_size=1000)
I get the last loss to be 888.2213... however, with the following model, the last loss is 31.8990:
m = tf.keras.Sequential([
tf.keras.layers.Input(shape=(4,)),
tf.keras.layers.Dense(10, activation="selu"),
tf.keras.layers.Dense(10, activation="selu"),
tf.keras.layers.Dense(10, activation="selu"),
tf.keras.layers.Dense(1, activation="linear")
])
what am I missing?
BTW, If instead of compiling mm I compile mod, it works fine:
mm = MyModel(model)
mod.compile(optimizer=tf.keras.optimizers.Adam())
history = mod.fit(x, y_noised, epochs=80, batch_size=1000)
(ignoring mm)
Caught the problem...
In the train_step, y had shape (1000,) and z_mean (1000,1), so the - operator was returning a matrix with a pairwise distance (every possible pair), not a "couple" distance (first with first, second with second...)
I have a VAE architecture script as follows:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Conv2DTranspose, Lambda, Reshape, Layer
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
INPUT_DIM = (64,64,3)
CONV_FILTERS = [32,64,64, 128]
CONV_KERNEL_SIZES = [4,4,4,4]
CONV_STRIDES = [2,2,2,2]
CONV_ACTIVATIONS = ['relu','relu','relu','relu']
DENSE_SIZE = 1024
CONV_T_FILTERS = [64,64,32,3]
CONV_T_KERNEL_SIZES = [5,5,6,6]
CONV_T_STRIDES = [2,2,2,2]
CONV_T_ACTIVATIONS = ['relu','relu','relu','sigmoid']
Z_DIM = 32
BATCH_SIZE = 100
LEARNING_RATE = 0.0001
KL_TOLERANCE = 0.5
class Sampling(Layer):
def call(self, inputs):
mu, log_var = inputs
epsilon = K.random_normal(shape=K.shape(mu), mean=0., stddev=1.)
return mu + K.exp(log_var / 2) * epsilon
class VAEModel(Model):
def __init__(self, encoder, decoder, r_loss_factor, **kwargs):
super(VAEModel, self).__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
self.r_loss_factor = r_loss_factor
def train_step(self, data):
if isinstance(data, tuple):
data = data[0]
def compute_kernel(x, y):
x_size = tf.shape(x)[0]
y_size = tf.shape(y)[0]
dim = tf.shape(x)[1]
tiled_x = tf.tile(tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1]))
tiled_y = tf.tile(tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1]))
return tf.exp(-tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32))
def compute_mmd(x, y):
x_kernel = compute_kernel(x, x)
y_kernel = compute_kernel(y, y)
xy_kernel = compute_kernel(x, y)
return tf.reduce_mean(x_kernel) + tf.reduce_mean(y_kernel) - 2 * tf.reduce_mean(xy_kernel)
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(data)
reconstruction = self.decoder(z)
reconstruction_loss = tf.reduce_mean(
tf.square(data - reconstruction), axis = [1,2,3]
)
reconstruction_loss *= self.r_loss_factor
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_sum(kl_loss, axis = 1)
kl_loss *= -0.5
true_samples = tf.random.normal(tf.stack([BATCH_SIZE, Z_DIM]))
loss_mmd = compute_mmd(true_samples, z)
total_loss = reconstruction_loss + loss_mmd
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
"loss": total_loss,
"reconstruction_loss": reconstruction_loss,
"kl_loss": kl_loss,
"mmd_loss": loss_mmd
}
def call(self,inputs):
latent = self.encoder(inputs)
return self.decoder(latent)
class VAE():
def __init__(self):
self.models = self._build()
self.full_model = self.models[0]
self.encoder = self.models[1]
self.decoder = self.models[2]
self.input_dim = INPUT_DIM
self.z_dim = Z_DIM
self.learning_rate = LEARNING_RATE
self.kl_tolerance = KL_TOLERANCE
def _build(self):
vae_x = Input(shape=INPUT_DIM, name='observation_input')
vae_c1 = Conv2D(filters = CONV_FILTERS[0], kernel_size = CONV_KERNEL_SIZES[0], strides = CONV_STRIDES[0], activation=CONV_ACTIVATIONS[0], name='conv_layer_1')(vae_x)
vae_c2 = Conv2D(filters = CONV_FILTERS[1], kernel_size = CONV_KERNEL_SIZES[1], strides = CONV_STRIDES[1], activation=CONV_ACTIVATIONS[0], name='conv_layer_2')(vae_c1)
vae_c3= Conv2D(filters = CONV_FILTERS[2], kernel_size = CONV_KERNEL_SIZES[2], strides = CONV_STRIDES[2], activation=CONV_ACTIVATIONS[0], name='conv_layer_3')(vae_c2)
vae_c4= Conv2D(filters = CONV_FILTERS[3], kernel_size = CONV_KERNEL_SIZES[3], strides = CONV_STRIDES[3], activation=CONV_ACTIVATIONS[0], name='conv_layer_4')(vae_c3)
vae_z_in = Flatten()(vae_c4)
vae_z_mean = Dense(Z_DIM, name='mu')(vae_z_in)
vae_z_log_var = Dense(Z_DIM, name='log_var')(vae_z_in)
vae_z = Sampling(name='z')([vae_z_mean, vae_z_log_var])
#### DECODER:
vae_z_input = Input(shape=(Z_DIM,), name='z_input')
vae_dense = Dense(1024, name='dense_layer')(vae_z_input)
vae_unflatten = Reshape((1,1,DENSE_SIZE), name='unflatten')(vae_dense)
vae_d1 = Conv2DTranspose(filters = CONV_T_FILTERS[0], kernel_size = CONV_T_KERNEL_SIZES[0] , strides = CONV_T_STRIDES[0], activation=CONV_T_ACTIVATIONS[0], name='deconv_layer_1')(vae_unflatten)
vae_d2 = Conv2DTranspose(filters = CONV_T_FILTERS[1], kernel_size = CONV_T_KERNEL_SIZES[1] , strides = CONV_T_STRIDES[1], activation=CONV_T_ACTIVATIONS[1], name='deconv_layer_2')(vae_d1)
vae_d3 = Conv2DTranspose(filters = CONV_T_FILTERS[2], kernel_size = CONV_T_KERNEL_SIZES[2] , strides = CONV_T_STRIDES[2], activation=CONV_T_ACTIVATIONS[2], name='deconv_layer_3')(vae_d2)
vae_d4 = Conv2DTranspose(filters = CONV_T_FILTERS[3], kernel_size = CONV_T_KERNEL_SIZES[3] , strides = CONV_T_STRIDES[3], activation=CONV_T_ACTIVATIONS[3], name='deconv_layer_4')(vae_d3)
#### MODELS
vae_encoder = Model(vae_x, [vae_z_mean, vae_z_log_var, vae_z], name = 'encoder')
vae_decoder = Model(vae_z_input, vae_d4, name = 'decoder')
vae_full = VAEModel(vae_encoder, vae_decoder, 10000)
opti = Adam(lr=LEARNING_RATE)
vae_full.compile(optimizer=opti)
return (vae_full,vae_encoder, vae_decoder)
def set_weights(self, filepath):
self.full_model.load_weights(filepath)
def train(self, data):
self.full_model.fit(data, data,
shuffle=True,
epochs=1,
batch_size=BATCH_SIZE)
def save_weights(self, filepath):
self.full_model.save_weights(filepath)
Problem:
vae = VAE()
vae.set_weights(filepath)
throws:
File
"/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py",
line 2200, in load_weights
'Unable to load weights saved in HDF5 format into a subclassed ' ValueError: Unable to load weights saved in HDF5 format into a
subclassed Model which has not created its variables yet. Call the
Model first, then load the weights.
I am not sure what this means since I am not that proficient in OOP. The surprising bit is that the above code was working until it stopped working. The model is training from scratch and it saves the weights in filepath. But when I am loading the same weights now it is throwing the above error!
If you set model.built = True prior to loading the model weights it works.
i was getting same same error while loading weights via
model.load_weights("Detection_model.h5")
ValueError: Unable to load weights saved in HDF5 format into a subclassed Model which has not created its variables yet. Call the Model first, then load the weights.
solved it by building model before loading weights
model.build(input_shape = <INPUT_SHAPE>)
model.load_weights("Detection_model.h5")
ps, tensorflow Version: 2.5.0
What version of TF are you running? For a while the default saving format was hdf5, but this format cannot support subclassed models as easily, so you get this error. It may be solvable by first training it on a single batch and then loading the weights (to determine how the parts are connected, which is not saved in hdf5).
In the future I would recommend making sure that all saves are done with the TF file format though, it will save you from extra work.
As alwaysmvp45 pointed out "hdf5 does not store how the layers are connected". To make these layers be connected, another way is that you call the model to predict a zeros array with input shape ((1,w,h,c)) before loading weights:
model(np.zeros((1,w,h,c)))
Not sure if this has changed in more recent versions (I'm on 2.4). but I had to go this route:
# Do all the build and training
# ...
# Save the weights
model.save('path/to/location.h5')
# delete any reference to the model
del model
# Now do the load for testing
from tensorflow import keras
model = keras.models.load_model('path/to/location.h5')
If I tried the other suggestions, I got warnings about the layers not being present and I had to build the same model that I did the training on. No big deal, stick it in in a function somewhere, but this works better for me.
I have a TF 1.0.1 code of seq2seq model. I am trying to rewrite it using Tensorflow Keras.
TF 1.0.1 code has following decoder architecure:
with tf.variable_scope("decoder_scope") as decoder_scope:
# output projection
# we need to specify output projection manually, because sampled softmax needs to have access to the the projection matrix
output_projection_w_t = tf.get_variable("output_projection_w", [vocabulary_size, state_size], dtype=DTYPE)
output_projection_w = tf.transpose(output_projection_w_t)
output_projection_b = tf.get_variable("output_projection_b", [vocabulary_size], dtype=DTYPE)
decoder_cell = tf.contrib.rnn.LSTMCell(num_units=state_size)
decoder_cell = DtypeDropoutWrapper(cell=decoder_cell, output_keep_prob=tf_keep_probabiltiy, dtype=DTYPE)
decoder_cell = contrib_rnn.MultiRNNCell(cells=[decoder_cell] * num_lstm_layers, state_is_tuple=True)
# define decoder train netowrk
decoder_outputs_tr, _ , _ = dynamic_rnn_decoder(
cell=decoder_cell,
decoder_fn= simple_decoder_fn_train(last_encoder_state, name=None),
inputs=decoder_inputs,
sequence_length=decoder_sequence_lengths,
parallel_iterations=None,
swap_memory=False,
time_major=False)
# define decoder inference network
decoder_scope.reuse_variables()
Here is how the sampled_softmax_loss is calculated:
decoder_forward_outputs = tf.reshape(decoder_outputs_tr,[-1, state_size])
decoder_target_labels = tf.reshape(decoder_labels ,[-1, 1]) #decoder_labels is target sequnce of decoder
sampled_softmax_losses = tf.nn.sampled_softmax_loss(
weights = output_projection_w_t,
biases = output_projection_b,
inputs = decoder_forward_outputs,
labels = decoder_target_labels ,
num_sampled = 500,
num_classes=vocabulary_size,
num_true = 1,
)
total_loss_op = tf.reduce_mean(sampled_softmax_losses)
And, this is my decoder in Keras:
decoder_inputs = tf.keras.Input(shape=(None,), name='decoder_input')
emb_layer = tf.keras.layers.Embedding(vocabulary_size, state_size)
x_d = emb_layer(decoder_inputs)
d_lstm_layer = tf.keras.layers.LSTM(embed_dim, return_sequences=True)
d_lstm_out = d_lstm_layer(x_d, initial_state=encoder_states)
This is my sampled_softmax_loss function I use for Keras model:
class SampledSoftmaxLoss(object):
def __init__(self, model):
self.model = model
output_layer = model.layers[-1]
self.input = output_layer.input
self.weights = output_layer.weights
def loss(self, y_true, y_pred, **kwargs):
loss = tf.nn.sampled_softmax_loss(
weights=self.weights[0],
biases=self.weights[1],
labels=tf.reshape(y_true ,[-1, 1]),
inputs=tf.reshape(d_lstm_out,[-1, state_size]),
num_sampled = 500,
num_classes = vocabulary_size
)
But, it does not work.
Can anyone help me to implement sampled_loss_funtion in Keras correctly.
I have been working on speech emotion recognition deep neural network. I have used keras Bidirectional LSTM with CTC loss. i trained the model and saved it
model_json = model.to_json()
with open("ctc_model.json", "w") as json_file:
json_file.write(model_json)
model.save_weights("ctc_weights.h5")
The problem is i can not use this model to test on on unseen data because the model accepts 4 argument as input and calculates the ctc loss..just build the model and train. so how can i save a model in such away that in require only one input. not the labels, and length. Basically how can i save a model as this function test_func = K.function([net_input], [output])
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
shift = 2
y_pred = y_pred[:, shift:, :]
input_length -= shift
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def build_model(nb_feat, nb_class, optimizer='Adadelta'):
net_input = Input(name="the_input", shape=(200, nb_feat))
forward_lstm1 = LSTM(output_dim=64,
return_sequences=True,
activation="tanh"
)(net_input)
backward_lstm1 = LSTM(output_dim=64,
return_sequences=True,
activation="tanh",
go_backwards=True
)(net_input)
blstm_output1 = Merge(mode='concat')([forward_lstm1, backward_lstm1])
forward_lstm2 = LSTM(output_dim=64,
return_sequences=True,
activation="tanh"
)(blstm_output1)
backward_lstm2 = LSTM(output_dim=64,
return_sequences=True,
activation="tanh",
go_backwards=True
)(blstm_output1)
blstm_output2 = Merge(mode='concat')([forward_lstm2, backward_lstm2])
hidden = TimeDistributed(Dense(512, activation='tanh'))(blstm_output2)
output = TimeDistributed(Dense(nb_class + 1, activation='softmax')) (hidden)
labels = Input(name='the_labels', shape=[1], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name="ctc")([output, labels, input_length, label_length])
model = Model(input=[net_input, labels, input_length, label_length], output=[loss_out])
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=optimizer, metrics=[])
test_func = K.function([net_input], [output])
return model, test_func
model, test_func = build_model(nb_feat=nb_feat, nb_class=nb_class, optimizer=optimizer)
for epoch in range(number_epoches):
inputs_train = {'the_input': X_train[i:i+batch_size],
'the_labels': y_train[i:i+batch_size],
'input_length': np.sum(X_train_mask[i:i+batch_size], axis=1, dtype=np.int32),
'label_length': np.squeeze(y_train_mask[i:i+batch_size]),
}
outputs_train = {'ctc': np.zeros([inputs_train["the_labels"].shape[0]])}
ctcloss = model.train_on_batch(x=inputs_train, y=outputs_train)
total_ctcloss += ctcloss * inputs_train["the_input"].shape[0] * 1.
loss_train[epoch] = total_ctcloss / X_train.shape[0]
Here is the my model summary
Try the following solution:
import keras.backend as K
def get_prediction_function(model):
input_tensor = model.layers[0].input
output_tensor = model.layers[-5].output
net_function = K.function([input_tensor, K.learning_phase()], [output_tensor])
def _result_function(x):
return net_function([x, 0])[0]
return _result_function
Now your network function might be obtained by:
test_function = get_prediction_function(model)