I have developed a keras model and trying to fit my data into the model. Here is the code for model.fit
def train(run_name, start_epoch, stop_epoch, img_w):
# Input Parameters
img_h = 64
words_per_epoch = 300
val_split = 0.2
val_words = int(words_per_epoch * (val_split))
# Network parameters
conv_filters = 16
kernel_size = (3, 3)
pool_size = 2
time_dense_size = 32
rnn_size = 256
minibatch_size = 32
if K.image_data_format() == 'channels_first':
input_shape = (1, img_w, img_h)
else:
input_shape = (img_w, img_h, 1)
act = 'relu'
input_data = Input(name='the_input', shape=input_shape, dtype='float32')
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv1')(input_data)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv2')(inner)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
# cuts down input size going into RNN:
inner = Dense(time_dense_size, activation=act, name='dense1')(inner)
# Two layers of bidirectional GRUs
# GRU seems to work as well, if not better than LSTM:
gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner)
gru1_merged = add([gru_1, gru_1b])
gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)
# transforms RNN output to character activations:
# print("Output Size",img_gen.get_output_size())
inner = Dense(47, kernel_initializer='he_normal',
name='dense2')(concatenate([gru_2, gru_2b]))
y_pred = Activation('softmax', name='softmax')(inner)
# Model(inputs=input_data, outputs=y_pred).summary()
labels = Input(name='the_labels', shape=[10], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
# clipnorm seems to speeds up convergence
sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
test_func = K.function([input_data], [y_pred])
# viz_cb = VizCallback(run_name, test_func, img_gen.next_val())
(X_train, y_train, train_input_length, train_labels_length), (X_test, y_test, test_input_length, test_labels_length) = dataset_load('./OCR_BanglaData.pkl.gz')
print(y_train[0])
X_train = X_train.reshape(X_train.shape[0], 128,64,1)
X_test = X_test.reshape(X_test.shape[0], 128,64,1)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
model.fit((np.array(X_train), np.array(y_train),np.array(train_input_length), np.array(train_labels_length)), batch_size=32, epochs=120, verbose=1,validation_data=[np.array(X_test), np.array(y_test),np.array(test_input_length), np.array(test_labels_length)])
After this I am getting this error
TypeError: Error when checking model input: data should be a Numpy array, or list/dict of Numpy arrays.
I tried to print the data types of each array. The result is
<type 'numpy.ndarray'>
But still I am getting this error. Is there any specific reason for it?
I am using tensorflow model as my backend of keras.
Here you have 4 inputs:
model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)
Each of the four inputs must be a numpy array, and the fit method wants them in a list (you're using a tuple):
model.fit([X_train, y_train,train_input_length, train_labels_length],...)
And you're missing the outputs in the fit method. Something must match what you defined as loss_out when creating the model.
Related
I created a CAPTCHA model based on what is provided in the Keras code example.
But when I load the model, an error pops up.
I show you the code I wrote in Jupyter notebook.
STEP1) Model build
class CTCLayer(layers.Layer):
def __init__(self, name=None):
super().__init__(name=name)
self.loss_fn = keras.backend.ctc_batch_cost
def call(self, y_true, y_pred):
# Compute the training-time loss value and add it
# to the layer using `self.add_loss()`.
batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")
input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")
loss = self.loss_fn(y_true, y_pred, input_length, label_length)
self.add_loss(loss)
# At test time, just return the computed predictions
return y_pred
def build_model():
# Inputs to the model
input_img = layers.Input(
shape=(img_width, img_height, 1), name="image", dtype="float32"
)
labels = layers.Input(name="label", shape=(None,), dtype="float32")
# First conv block
x = layers.Conv2D(
32,
(3, 3),
activation="relu",
kernel_initializer="he_normal",
padding="same",
name="Conv1",
)(input_img)
x = layers.MaxPooling2D((2, 2), name="pool1")(x)
# Second conv block
x = layers.Conv2D(
64,
(3, 3),
activation="relu",
kernel_initializer="he_normal",
padding="same",
name="Conv2",
)(x)
x = layers.MaxPooling2D((2, 2), name="pool2")(x)
# We have used two max pool with pool size and strides 2.
# Hence, downsampled feature maps are 4x smaller. The number of
# filters in the last layer is 64. Reshape accordingly before
# passing the output to the RNN part of the model
new_shape = ((img_width // 4), (img_height // 4) * 64)
x = layers.Reshape(target_shape=new_shape, name="reshape")(x)
x = layers.Dense(64, activation="relu", name="dense1")(x)
x = layers.Dropout(0.2)(x)
# RNNs
x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True, dropout=0.25))(x)
# Output layer
x = layers.Dense(
len(char_to_num.get_vocabulary()) + 1, activation="softmax", name="dense2"
)(x)
# Add CTC layer for calculating CTC loss at each step
output = CTCLayer(name="ctc_loss")(labels, x)
# Define the model
model = keras.models.Model(
inputs=[input_img, labels], outputs=output, name="ocr_model_v1"
)
# Optimizer
opt = keras.optimizers.Adam()
# Compile the model and return
model.compile(optimizer=opt)
return model
# Get the model
model = build_model()
model.summary()
STEP2) training model
epochs = 100
early_stopping_patience = 10
# Add early stopping
early_stopping = keras.callbacks.EarlyStopping(
monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True
)
# Train the model
history = model.fit(
train_dataset,
validation_data=validation_dataset,
epochs=epochs,
callbacks=[early_stopping],
)
STEP3) Check prediction
# Get the prediction model by extracting layers till the output layer
prediction_model = keras.models.Model(
model.get_layer(name="image").input, model.get_layer(name="dense2").output
)
prediction_model.summary()
# A utility function to decode the output of the network
def decode_batch_predictions(pred):
input_len = np.ones(pred.shape[0]) * pred.shape[1]
# Use greedy search. For complex tasks, you can use beam search
results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
:, :max_length
]
# Iterate over the results and get back the text
output_text = []
for res in results:
res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
output_text.append(res)
return output_text
# Let's check results on some validation samples
for batch in validation_dataset.take(1):
batch_images = batch["image"]
batch_labels = batch["label"]
preds = prediction_model.predict(batch_images)
pred_texts = decode_batch_predictions(preds)
orig_texts = []
for label in batch_labels:
label = tf.strings.reduce_join(num_to_char(label)).numpy().decode("utf-8")
orig_texts.append(label)
_, ax = plt.subplots(4, 4, figsize=(15, 8))
for i in range(len(pred_texts)):
img = (batch_images[i, :, :, 0] * 255).numpy().astype(np.uint8)
img = img.T
title = f"Prediction: {pred_texts[i]}"
ax[i // 4, i % 4].imshow(img, cmap="gray")
ax[i // 4, i % 4].set_title(title)
ax[i // 4, i % 4].axis("off")
plt.show()
STEP4) Save model
model.save("ocr_model.h5")
STEP5) Load model
model = load_model('./ocr_model.h5',custom_objects={'CTCLayer':CTCLayer})
I got the following error message.
TypeError: init() got an unexpected keyword argument 'trainable'
And I tried one more this code.
model = load_model('./ocr_model.h5')
I got the following error message.
ValueError: Unknown layer: CTCLayer. Please ensure this object is passed to the custom_objects argument. See https://www.tensorflow.org/guide/keras/save_and_serialize#registering_the_custom_object for details.
How can I use a stored model?
According to this thread: TypeError: __init__() got an unexpected keyword argument 'trainable',
you should update the __init__ to include **kwargs to solve your problem (strange thing, I used the exact model+configuration in TensorFlow 2.3.0 and cannot reproduce this issue (Ubuntu 18.04)
I have a siamese network and I want to perform a grid seach on it using GridSearchCV.
So I create a model using the following function:
def createMod(learn_rate=0.01, optimizer='Adam'):
#K.clear_session()
# network definition
base_network = create_base_network(input_shape)
input_a = Input(shape=input_shape)
input_b = Input(shape=input_shape)
# because we re-use the same instance `base_network`,
# the weights of the network will be shared across the two branches
processed_a = base_network(input_a)
processed_b = base_network(input_b)
distance = Lambda(euclidean_distance,
output_shape=eucl_dist_output_shape)([processed_a, processed_b])
prediction = Dense(1,activation='sigmoid')(distance)
model = Model([input_a, input_b], prediction)
if(optimizer=='SGD'):
opt = SGD(lr=learn_rate)
elif (optimizer=='RMSprop'):
opt = RMSprop(lr=learn_rate)
else:
opt = Adam(lr=learn_rate)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[accuracy])
return model
And then I create the KerasClassifier and GridSearch as follows:
model = KerasClassifier(build_fn=createMod, verbose=0)
param_grid = dict(epochs=epochs, batch_size=batch_size, learn_rate=learn_rate,optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=5)
X = [tr_pairs[:, 0], tr_pairs[:, 1]]
grid_result = grid.fit(X, tr_y)
However this throws the following value error:
ValueError: Found input variables with inconsistent numbers of samples: [2, 1054]
The shape of tr_pairs[:, 0] and tr_pairs[:, 1] is (1054, 6) and tr_y is (1054,)
The code for the base networks is:
def create_base_network(input_shape):
K.clear_session()
encoder = build_encoder(latent_dim, n_in)
decoder = build_decoder(latent_dim, n_in)
item = Input(shape=(n_in, ))
encoded_repr = encoder(item)
reconstructed_item = decoder(encoded_repr)
autoencoder = Model(item, reconstructed_item)
return autoencoder
The code for the encoder and decoder are:
def build_encoder(latent_dim, input_dim):
input_layer = Input(shape=(input_dim, ))
h = Dense(32, activation='relu', activity_regularizer=regularizers.l1(10e-5))(input_layer)
h = Dropout(0.1)(h)
h = Dense(64, activation='relu')(h)
h = Dropout(0.1)(h)
# h = Dense(128, activation='relu')(h)
# h = Dropout(0.1)(h)
latent_repr = Dense(latent_dim, activation='relu')(h)
return Model(input_layer, latent_repr)
def build_decoder(latent_dim, input_dim):
model = Sequential()
# model.add(Dense(128, input_dim=latent_dim, activation='relu'))
# model.add(Dropout(0.1))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(input_dim, activation='relu'))
z = Input(shape=(latent_dim,))
item = model(z)
return Model(z, item)
The code works when I do the normal keras model's .fit function but doesn't work here.. is there a problem somewhere in my code or is it just not possible to feed multiple inputs in Grid Search and if that is the case is there a way I can still perform the grid search?
this is workaround to pass multiple input. I create a dummy model that receives a SINGLE input in the format (n_sample, 2, 6) and then split it into two parts using Lambda layer. you can modify this according to your siamese structure.
def createMod(optimizer='Adam'):
combi_input = Input((2,6)) # (n_sample, 2, 6)
input_a = Lambda(lambda x: x[:,0])(combi_input) # (n_sample, 6)
input_b = Lambda(lambda x: x[:,1])(combi_input) # (n_sample, 6)
c = Concatenate()([input_a,input_b])
x = Dense(32)(c)
prediction = Dense(1,activation='sigmoid')(x)
model = Model(combi_input, prediction)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics='accuracy')
return model
tr_pairs = np.random.uniform(0,1, (1054, 2, 6))
tr_y = np.random.randint(0,2, 1054)
model = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=createMod, verbose=0)
batch_size = [10, 20]
epochs = [10, 5]
optimizer = ['adam','SGD']
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(tr_pairs, tr_y)
I am experimenting with the use of multi-input to a double CNN.
However, I am getting the Graph disconnected: cannot obtain value for tensor Tensor("embedding_1_input:0", shape=(?, 40), dtype=float32) at layer "embedding_1_input". The following previous layers were accessed without issue: []
I didn't find a fix for it, knowing that there are layers with shape = (?, 50) why is there such dimension "?" ????
from keras.models import Model
from keras.layers import Input, Dot
from keras.layers.core import Reshape
from keras.backend import int_shape
max_words=50
thedata = [' '.join(list(x)) for x in self.input_data]
emb_size = 15
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(thedata)
self.dictionary = tokenizer.word_index
vocab_size = len(tokenizer.word_index) + 1
vocab_size = 5000
allWordIndices = []
for text in thedata:
wordIndices = self.convert_text_to_index_array(text)
allWordIndices.append(wordIndices)
allWordIndices = np.asarray(allWordIndices)
train_x = tokenizer.sequences_to_matrix(allWordIndices, mode='binary')
train_y = list(map(lambda x: self.c[x], self.output_data))
X_train, X_test, Y_train, Y_test = train_test_split(train_x, train_y, test_size = 0.1, shuffle=True)
vocab_size1 = 50000
input_size = 40
input_shape = (input_size, 1)
# first model
L_branch = Sequential()
L_branch.add(Embedding(vocab_size1, emb_size, input_length=max_words, trainable=True))
L_branch.add(Conv1D(50, activation='relu', kernel_size=10, input_shape=input_shape))
L_branch.add(MaxPooling1D(emb_size))
L_branch.add(Flatten())
L_branch.add(Dense(emb_size, activation='sigmoid'))
# second model
R_branch = Sequential()
R_branch.add(Embedding(vocab_size, output_dim=emb_size, input_length=max_words, trainable=True))
R_branch.add(Dense(emb_size, activation='sigmoid'))
input_in1 = Input(shape=(input_size, 1, 1))
input_in2 = Input(shape=(input_size, 1, 1))
merged = Dot(axes=(1, 2))([L_branch.outputs[0], R_branch.outputs[0]])
print(merged.shape)
#
merged = Reshape((-1, int_shape(merged)[1],))(merged)
latent = Conv1D(50, activation='relu', kernel_size=self.nb_classes, input_shape=(1, input_size, 1))(merged)
latent = MaxPooling1D(self.nb_classes)(latent)
out = Dense(self.nb_classes, activation='sigmoid')(latent)
final_model = Model([input_in2, input_in1], out)
final_model.compile(
loss='mse',
optimizer='adam',
metrics=['accuracy'])
final_model.summary()
final_model.fit(
[X_train, train_x],
Y_train,
batch_size=200,
epochs=5,
verbose=1
# validation_split=0.1
)
I tried adding flatten layer after the second embedding but I got an error. Even with a dropout.
I also tried eliminating the maxpooling from the L_branch model.
Tried not working with the reshape and just expand the input dims since I was getting an error of the second Conv1D saying the layer expects dims = 3 but it was getting dims = 2.
latent = Conv1D(50, activation='relu', kernel_size=nb_classes, input_shape=(1, input_size, 1))(merged)
File "/root/anaconda3/envs/oea/lib/python3.7/site-packages/keras/engine/base_layer.py", line 414, in __call__
self.assert_input_compatibility(inputs)
File "/root/anaconda3/envs/oea/lib/python3.7/site-packages/keras/engine/base_layer.py", line 311, in assert_input_compatibility
str(K.ndim(x)))
ValueError: Input 0 is incompatible with layer conv1d_2: expected ndim=3, found ndim=2`
I also didn't get which input is used in the first model and which input is used in the second?
you have to define your new model in this way: final_model = Model([L_branch.input, R_branch.input], out)
I provide an example of network with a structure similar to yours
vocab_size1 = 5000
vocab_size2 = 50000
input_size1 = 40
input_size2 = 40
max_words=50
emb_size = 15
nb_classes = 10
# first model
L_branch = Sequential()
L_branch.add(Embedding(vocab_size1, emb_size, input_length=input_size1, trainable=True))
L_branch.add(Conv1D(50, activation='relu', kernel_size=10))
L_branch.add(MaxPooling1D())
L_branch.add(Flatten())
L_branch.add(Dense(emb_size, activation='relu'))
# second model
R_branch = Sequential()
R_branch.add(Embedding(vocab_size2, emb_size, input_length=input_size2, trainable=True))
R_branch.add(Flatten())
R_branch.add(Dense(emb_size, activation='relu'))
merged = Concatenate()([L_branch.output, R_branch.output])
latent = Dense(50, activation='relu')(merged)
out = Dense(nb_classes, activation='softmax')(latent)
final_model = Model([L_branch.input, R_branch.input], out)
final_model.compile(
loss='sparse_categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
final_model.summary()
X1 = np.random.randint(0,vocab_size1, (100,input_size1))
X2 = np.random.randint(0,vocab_size2, (100,input_size2))
y = np.random.randint(0,nb_classes, 100)
final_model.fit([X1,X2], y, epochs=10)
I'm pretty new to Keras and LSTMs. I've been trying to train my model of sequences to predict the future price of a stock with the code below but the error above kept popping up.
I have tried changing the dtypes of both x_data, y_data with .astype(np.float16). However, all times I am returned with the TypeError stating that I have a float32 type.
If it helps, here are the shapes of my data:
xtrain.shape : (32, 24, 67), ytrain.shape : (32, 24, 1), xtest.shape
: (38, 67), ytest.shape : (38, 1)
Does anyone have any idea on what might be wrong? I've been stuck at this for awhile. It would be great if someone could give me a hint.
y_data = y_data.to_numpy().astype(np.float32)
x_data = main_df.to_numpy().astype(np.float32)
num_x_signals = x_data.shape[1]
num_y_signals = y_data.shape[1]
# SPLIT TRAIN TEST DATA
ratio = 0.85
train_ratio = int(ratio * len(x_data))
x_train = x_data[0:train_ratio]
x_test = x_data[train_ratio:]
y_train = y_data[0:train_ratio]
y_test = y_data[train_ratio:]
# GENERATE RANDOM SEQUENCES
batch_size = 32
sequence_length = 24
EPOCHS = 50
def batch_generator(x_train, y_train, batch_size, sequence_length, num_x_signals, num_y_signals, num_train):
while True:
x_shape = (batch_size, sequence_length, num_x_signals)
x_batch = np.zeros(shape = x_shape).astype(np.float32)
y_shape = (batch_size, sequence_length, num_y_signals)
y_batch = np.zeros(shape = y_shape).astype(np.float32)
for i in range(batch_size):
idx = np.random.randint(num_train - sequence_length)
x_batch[i] = x_train[idx:idx+sequence_length]
y_batch[i] = y_train[idx:idx+sequence_length]
yield (x_batch, y_batch)
generator = batch_generator(x_train, y_train, batch_size, sequence_length, num_x_signals, num_y_signals, train_ratio)
xtrain, ytrain = next(generator)
xtest, ytest = (np.expand_dims(x_test, axis=0),
np.expand_dims(y_test, axis=0))
# LSTM MODEL
model = Sequential()
model.add(LSTM(32, input_shape = (None, num_x_signals,), return_sequences = True))
model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(LSTM(128, return_sequences = True))
model.add(Dropout(0.15))
model.add(BatchNormalization())
model.add(LSTM(128))
model.add(Dropout(0.18))
model.add(BatchNormalization())
model.add(Dense(32, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation = 'softmax'))
opt = tf.keras.optimizers.Adam(lr = 0.001, decay = 1e-6)
model.compile(
loss = 'sparse_categorical_crossentropy',
optimizer = opt,
metrics = ['accuracy']
)
name_of_file = f"{to_predict}-{sequence_length}-{future_predict}-{int(time.time())}"
tensorboard = TensorBoard(log_dir = "logs/{}".format(name_of_file))
filepath = "LSTM_Final-{epoch:02d}-{val_acc:.3f}"
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max')) # saves only the best ones
history = model.fit(
xtrain, ytrain,
epochs = EPOCHS,
validation_data = (xtest, ytest),
callbacks = [tensorboard, checkpoint]
)
score = model.evaluate(xtest, ytest, verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
model.save("models/{}".format(name_of_file))
I found this issue had to do with the loss function specified.
My code:
import tensorflow as tf
from tensorflow import keras
model = tf.keras.Sequential([
keras.layers.Dense(64, activation=tf.nn.relu, input_shape=[3]),
keras.layers.Dense(64, activation=tf.nn.relu),
keras.layers.Dense(1)
])
#I changed the loss function from 'sparse_categorical_crossentropy' to 'mean_squared error'
model.compile(optimizer='adam',loss='mean_squared_error',metrics=['accuracy'])
X = train_dataset.to_numpy()
y = train_labels.to_numpy()
model.fit(X,y, epochs=5)
X shape was (920,3) and dtype = float64
y shape was (920,1) and dtype = float64
My problem was in the model.fit method. I took the 'sparse_categorical_crossentropy' function from an image recognition example and what I was trying here is a neural network for house prices prediction.
I am trying to implement a system by encoding inputs using CNN. After CNN, I need to get a vector and use it in another deep learning method.
def get_input_representation(self):
# get word vectors from embedding
inputs = tf.nn.embedding_lookup(self.embeddings, self.input_placeholder)
sequence_length = inputs.shape[1] # 56
vocabulary_size = 160 # 18765
embedding_dim = 256
filter_sizes = [3,4,5]
num_filters = 3
drop = 0.5
epochs = 10
batch_size = 30
# this returns a tensor
print("Creating Model...")
inputs = Input(shape=(sequence_length,), dtype='int32')
embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, input_length=sequence_length)(inputs)
reshape = Reshape((sequence_length,embedding_dim,1))(embedding)
conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape)
maxpool_0 = MaxPool2D(pool_size=(sequence_length - filter_sizes[0] + 1, 1), strides=(1,1), padding='valid')(conv_0)
maxpool_1 = MaxPool2D(pool_size=(sequence_length - filter_sizes[1] + 1, 1), strides=(1,1), padding='valid')(conv_1)
maxpool_2 = MaxPool2D(pool_size=(sequence_length - filter_sizes[2] + 1, 1), strides=(1,1), padding='valid')(conv_2)
concatenated_tensor = Concatenate(axis=1)([maxpool_0, maxpool_1, maxpool_2])
flatten = Flatten()(concatenated_tensor)
dropout = Dropout(drop)(flatten)
output = Dense(units=2, activation='softmax')(dropout)
model = Model(inputs=inputs, outputs=output)
adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
print("Traning Model...")
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, callbacks=[checkpoint], validation_data=(X_test, y_test)) # starts training
return ??
The above code, trains the model using X_train and Y_train and then tests it. However in my system I do not have Y_train or Y_test, I only need the vector in the last hidden layer before softmax layer. How can I obtain it?
For that you can define a backend function to get the output of arbitrary layer(s):
from keras import backend as K
func = K.function([model.input], [model.layers[index_of_layer].output])
You can find the index of your desired layer using model.summary() where the layers are listed starting from index zero. If you need the layer before the last layer you can use -2 as the index (i.e. .layers attribute is actually a list so you can index it like a list in python). Then you can use the function you have defined by passing a list of input array(s):
outputs = func(inputs)
Alternatively, you can also define a model for this purpose. This has been covered in Keras documentation more thoroughly so I advise you to read that.