I am running tensorflow 2.4 on colab. I tried to save the model using tf.train.Checkpoint() since it includes model subclassing, but after restoration I saw It didn't restored any weights of my model.
Here are few snippets:
### From tensorflow tutorial nmt_with_attention
class Encoder(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
...
self.gru = tf.keras.layers.GRU(self.enc_units,
return_sequences=True,
return_state=True,
recurrent_initializer='glorot_uniform')
.
.
.
class NMT_Train(tf.keras.Model):
def __init__(self, inp_vocab_size, tar_vocab_size, max_length_inp, max_length_tar, emb_dims, units, batch_size, source_tokenizer, target_tokenizer):
super(NMT_Train, self).__init__()
self.encoder = Encoder(inp_vocab_size, emb_dims, units, batch_size)
...
.
.
.
model = NMT_Train(INP_VOCAB, TAR_VOCAB, MAXLEN, MAXLEN, EMB_DIMS, UNITS, BATCH_SIZE, english_tokenizer, hindi_tokenizer)
model.compile(optimizer = tf.keras.optimizers.Adam(),
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= True))
model.fit(dataset, epochs=2)
checkpoint = tf.train.Checkpoint(model = model)
manager = tf.train.CheckpointManager(checkpoint, './ckpts', max_to_keep=1)
manager.save()
model.encoder.gru.get_weights() ### get the output
##[array([[-0.0627057 , 0.05900152, 0.06614069, ...
model.optimizer.get_weights() ### get the output
##[90, array([[ 6.6851695e-05, -4.6736805e-06, -2.3183979e-05, ...
When I later restored it I didn't get any gru weights:
model = NMT_Train(INP_VOCAB, TAR_VOCAB, MAXLEN, MAXLEN, EMB_DIMS, UNITS, BATCH_SIZE, english_tokenizer, hindi_tokenizer)
model.compile(optimizer = tf.keras.optimizers.Adam(),
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= True))
checkpoint = tf.train.Checkpoint(model = model)
manager = tf.train.CheckpointManager(checkpoint, './ckpts', max_to_keep=1)
manager.restore_or_initialize()
model.encoder.gru.get_weights() ### empty list
## []
model.optimizer.get_weights() ### empty list
## []
I also tried checkpoint.restore(manager.latest_checkpoint) but nothing changed.
Is there any thing wrong I am doing?? Or suggest any other way around to save the model so that I can retrain it for further epochs.
You are defining a keras model, so why do not use keras model chekpoints?
From Keras documentation:
model.compile(loss=..., optimizer=...,
metrics=['accuracy'])
EPOCHS = 10
checkpoint_filepath = '/tmp/checkpoint'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=True,
monitor='val_accuracy',
mode='max',
save_best_only=True)
# Model weights are saved at the end of every epoch, if it's the best seen
# so far.
model.fit(epochs=EPOCHS, callbacks=[model_checkpoint_callback])
# The model weights (that are considered the best) are loaded into the model.
model.load_weights(checkpoint_filepath)
Related
I have a model in TensorFlow that I converted to Pytorch. I want to check If the two models are the same or if I'm mixing things up. Here is my code in tensofrlow:
model = Sequential()
model.add(
LSTM(5, input_shape=(4, 1000))
)
model.add(
Dense(1, activation='tanh')
)
model.compile(
loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']
)
model.fit(
X_train, y_train, epochs=100, batch_size=27
)
I built the equivalent model in Pytorch in this way:
class LSTM1(nn.Module):
def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
super(LSTM1, self).__init__()
self.num_classes = num_classes #number of classes
self.num_layers = num_layers #number of layers
self.input_size = input_size #input size
self.seq_length = seq_length #sequence length
self.hidden_size = hidden_size #hidden state
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
num_layers=num_layers, batch_first=True) #lstm
self.fc = nn.Linear(self.hidden_size, num_classes) #fully connected last layer
self.sigmoid = nn.Sigmoid()
def forward(self,x):
h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
# Propagate input through LSTM
output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
out = self.sigmoid(hn)
out = self.fc(out) #Final Output
out = self.sigmoid(out)
return out
num_epochs = 100 #100 epochs
learning_rate = 0.001 #0.001 lr
input_size = 1000 #number of features
num_layers = 5 #number of stacked lstm layers
hidden_size = 1
num_classes = 1 #number of output classes
X_train = np.concatenate((X_phage, X_bac))
y_train = np.concatenate((np.ones(len(X_phage)), np.zeros(len(X_bac))))
X_train_tensors_final = Variable(torch.Tensor(X_train))
y_train_tensors = Variable(torch.Tensor(y_train))
print(X_train_tensors_final.shape)
model = LSTM1(num_classes, input_size, hidden_size, num_layers, X_train_tensors_final.shape[1]) #our lstm class
print("Training Shape", X_train_tensors_final.shape, y_train_tensors.shape)
print(model)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
outputs = model.forward(X_train_tensors_final) #forward pass
optimizer.zero_grad() #caluclate the gradient, manually setting to 0
# obtain the loss function
outputs = outputs[-20:]
y_train_tensors = y_train_tensors.type(torch.LongTensor)
y_train_tensors = torch.reshape(y_train_tensors, (20, 1))
loss = criterion(outputs, y_train_tensors.float())
loss.backward() #calculates the loss of the loss function
optimizer.step() #improve from loss, i.e backprop
train_acc = torch.sum((outputs > 0.5).bool().float() == y_train_tensors)
final_train_acc = train_acc/20
print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
print('Accuracy: ', final_train_acc)
My data is 20 instances, where each instance has 1000 features and 4 timesteps, therefore it is shaped like this (20,4,1000). However, When I run the two models, I got different results (different loss and accuracy). Therefore I'm afraid I have missed something and the models are not the same.
I would appreciate it if someone can validate that these models are the same and if they were then why I'm getting different results?
I have created a model with an LSTM layer as shown below and want to get the internal state (hidden state and cell state) after the training step and save it. After the training step, I will use the network for a prediction and want to reinitialize the LSTM with the saved internal state before the next training step. This way I can continue from the same point after each training step. I haven't been able to find something helpful for the current version of tensoflow, i.e 2.x.
import tensorflow as tf
class LTSMNetwork(object):
def __init__(self, num_channels, num_hidden_neurons, learning_rate, time_steps, batch_size):
self.num_channels = num_channels
self.num_hidden_neurons = num_hidden_neurons
self.learning_rate = learning_rate
self.time_steps = time_steps
self.batch_size =batch_size
def lstm_model(self):
self.model = tf.keras.Sequential()
self.model.add(tf.keras.layers.LSTM(batch_input_shape=(self.batch_size, self.time_steps, self.num_channels),
units=self.num_hidden_neurons[0],
activation='tanh', recurrent_activation='sigmoid',
return_sequences=True, stateful=True))
#self.model.add(tf.keras.layers.LSTM(units=self.num_hidden_neurons[1], stateful=True))
hidden_layer = tf.keras.layers.Dense(units=self.num_hidden_neurons[1], activation=tf.nn.sigmoid)
self.model.add(hidden_layer)
self.model.add(tf.keras.layers.Dense(units=self.num_channels, name="output_layer", activation=tf.nn.tanh))
self.model.compile(optimizer=tf.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse', metrics=['binary_accuracy'])
return self.model
if __name__=='__main__':
num_channels = 3
num_hidden_neurons = [150, 100]
learning_rate = 0.001
time_steps = 1
batch_size = 1
lstm_network = LTSMNetwork(num_channels=num_channels, num_hidden_neurons=num_hidden_neurons,
learning_rate=learning_rate, time_steps=time_steps, batch_size=batch_size)
model = lstm_network.lstm_model()
model.summary()
You can define a custom Callback and save the hidden and cell states at every epoch for example. Afterwards, you can choose from which epoch you want to extract the states and then use lstm_layer.reset_states(*) to set the initial state again:
import tensorflow as tf
class LTSMNetwork(object):
def __init__(self, num_channels, num_hidden_neurons, learning_rate, time_steps, batch_size):
self.num_channels = num_channels
self.num_hidden_neurons = num_hidden_neurons
self.learning_rate = learning_rate
self.time_steps = time_steps
self.batch_size =batch_size
def lstm_model(self):
self.model = tf.keras.Sequential()
self.model.add(tf.keras.layers.LSTM(batch_input_shape=(self.batch_size, self.time_steps, self.num_channels),
units=self.num_hidden_neurons[0],
activation='tanh', recurrent_activation='sigmoid',
return_sequences=True, stateful=True))
hidden_layer = tf.keras.layers.Dense(units=self.num_hidden_neurons[1], activation=tf.nn.sigmoid)
self.model.add(hidden_layer)
self.model.add(tf.keras.layers.Dense(units=self.num_channels, name="output_layer", activation=tf.nn.tanh))
self.model.compile(optimizer=tf.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse', metrics=['binary_accuracy'])
return self.model
states = {}
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, lstm_layer):
self.lstm_layer = lstm_layer
def on_epoch_end(self, epoch, logs=None):
states[epoch] = lstm_layer.states
num_channels = 3
num_hidden_neurons = [150, 100]
learning_rate = 0.001
time_steps = 1
batch_size = 1
lstm_network = LTSMNetwork(num_channels=num_channels, num_hidden_neurons=num_hidden_neurons,
learning_rate=learning_rate, time_steps=time_steps, batch_size=batch_size)
model = lstm_network.lstm_model()
lstm_layer = model.layers[0]
x = tf.random.normal((1, 1, 3))
y = tf.random.normal((1, 1, 3))
model.fit(x, y, epochs=5, callbacks=[CustomCallback(lstm_layer)])
model.summary()
lstm_layer.reset_states(states[0]) # Sets hidden state from first epoch.
States consists of 5 internal states for each of the 5 epochs.
I have managed to save the internal state of the LSTM after the training step and reinitialize the LSTM with the saved internal states before the next training step.
You can create a variable and set its value to the currently stored value in a variable. How can I copy a variable in tensorflow
states_ = {}
# Save the hidden state
internal_state_h = lstm_layer.states[0]
v1 = tf.Variable(initial_value=np.zeros((1, 150)), dtype=tf.float32, shape=(1, 150))
copy_state_h = v1.assign(internal_state_h)
# Save the cell state
internal_state_c = lstm_layer.states[1]
v2 = tf.Variable(initial_value=np.zeros((1, 150)), dtype=tf.float32, shape=(1, 150))
copy_state_c = v2.assign(internal_state_c)
# Create a tuple and add it to the dictionary
states_[0] = (copy_state_h, copy_state_c)
# Reset the internal state
lstm_layer.reset_states(states_[0])
A call for prediction changes the internal states, however by following these steps, you can restore the internal states of RNN to what it was before the prediction.
I train the following model based on GRU, note that I am passing the argument stateful=True to the GRU builder.
class LearningToSurpriseModel(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, rnn_units):
super().__init__(self)
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(rnn_units,
stateful=True,
return_sequences=True,
return_state=True,
reset_after=True
)
self.dense = tf.keras.layers.Dense(vocab_size)
def call(self, inputs, states=None, return_state=False, training=False):
x = inputs
x = self.embedding(x, training=training)
if states is None:
states = self.gru.get_initial_state(x)
x, states = self.gru(x, initial_state=states, training=training)
x = self.dense(x, training=training)
if return_state:
return x, states
else:
return x
#tf.function
def train_step(self, inputs):
[defining here my training step]
I instantiate my model
model = LearningToSurpriseModel(
vocab_size=len(ids_from_chars.get_vocabulary()),
embedding_dim=embedding_dim,
rnn_units=rnn_units
)
[compile and do stuff]
and train for EPOCHS epochs
for i in range(EPOCHS):
model.fit(train_dataset, validation_data=validation_dataset, epochs=1, callbacks = [EarlyS], verbose=1)
model.reset_states()
What is the behavior of this code regarding GRU states : are states updated for each new batch of data or only for each new epoch ? The desired behavior is a reset for each new epoch only. If not done, how to implement this ?
EDIT
Tensorflow implements the reset_states function for Models as
def reset_states(self):
for layer in self.layers:
if hasattr(layer, 'reset_states') and getattr(layer, 'stateful', False):
layer.reset_states()
Does it means (contrary to what doc otherwise seems to imply) states can be reset only if stateful=False ? It is what I infer from the condition on getattr(layer, 'stateful', False).
I'm using TF2 installed via pip in a ubuntu 18.04 box
$ pip freeze | grep "tensorflow"
tensorflow==2.0.0
tensorflow-estimator==2.0.1
And I'm playing with a custom layer.
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.layers import Input, Concatenate, Dense, Bidirectional, LSTM, Embedding
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import imdb
class Attention(tf.keras.layers.Layer):
def __init__(self, units):
super(Attention, self).__init__()
self.W1 = Dense(units)
self.W2 = Dense(units)
self.V = Dense(1)
def call(self, features, hidden):
hidden_with_time_axis = tf.expand_dims(hidden, 1)
score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
attention_weights = tf.nn.softmax(self.V(score), axis=1)
context_vector = attention_weights * features
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
vocab_size = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)
max_len = 200
rnn_cell_size = 128
x_train = sequence.pad_sequences(x_train, maxlen=max_len, padding='post')
x_test = sequence.pad_sequences(x_test, maxlen=max_len, truncating='post', padding='post')
# Network
sequence_input = Input(shape=(max_len,), dtype='int32')
embedded_sequences = Embedding(vocab_size, 128, input_length=max_len)(sequence_input)
# lstm = Bidirectional(LSTM(rnn_cell_size, dropout=0.3, return_sequences=True, return_state=True), name="bi_lstm_0")(embedded_sequences)
lstm, forward_h, forward_c, backward_h, backward_c = Bidirectional(LSTM(rnn_cell_size, dropout=0.2, return_sequences=True, return_state=True))(embedded_sequences)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
attention = Attention(8)
context_vector, attention_weights = attention(lstm, state_h)
output = Dense(1, activation='sigmoid')(context_vector)
model = Model(inputs=sequence_input, outputs=output)
# summarize layers
print(model.summary())
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(x_train, y_train, epochs=10, batch_size=200, validation_split=.3, verbose=1)
result = model.evaluate(x_test, y_test)
print(result)
I would like to debug/inspect the Attention.call() function, but I'm not able to get the tensors values when a set a breakpoint inside the funcion.
Before I start the .fit(), I can verify that the eager execution is Enabled
print(tf.executing_eagerly())
True
But inside the Attention.call() function the eager execution is Disabled
print(tf.executing_eagerly())
False
Any reason for the eager execution be false during the call() execution ? How to enable it ?
By default, tf.keras model is compiled to a static graph to deliver the best execution performance. Just think that #tf.function is by default annotated for tf.keras model.
https://www.tensorflow.org/api_docs/python/tf/keras/Model#run_eagerly
To enable eager mode explicitly for tf.keras model, in your code, compile the model with run_eagerly=True.
model.compile(optimizer='adam', run_eagerly = True, loss='binary_crossentropy', metrics=['accuracy'])
I am currently doing a project in which I need to predict eye disease in a group of images. I am using the Keras built-in applications. I am getting good results on VGG16 and VGG19, but on the Xception architecture I keep getting AUC of exactly 0.5 every epoch.
I have tried different optimizers and learning rates, but nothing works. I solved the same problem with VGG19 by switching from RMSProp optimizer to Adam optimizer, but I can't get it to work for Xception.
def buildModel():
from keras.models import Model
from keras.layers import Dense, Flatten
from keras.optimizers import adam
input_model = applications.xception.Xception(
include_top=False,
weights='imagenet',
input_tensor=None,
input_shape=input_sizes["xception"],
pooling=None,
classes=2)
base_model = input_model
x = base_model.output
x = Flatten()(x)
predictions = Dense(2, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer=adam(lr=0.01), loss='binary_crossentropy', metrics=['accuracy'])
return model
class Histories(keras.callbacks.Callback):
def __init__(self, val_data):
super(Histories, self).__init__()
self.x_batch = []
self.y_batch = []
for i in range(len(val_data)):
x, y = val_data.__getitem__(i)
self.x_batch.extend(x)
self.y_batch.extend(np.ndarray.astype(y, int))
self.aucs = []
self.specificity = []
self.sensitivity = []
self.losses = []
return
def on_train_begin(self, logs={}):
initFile("results/xception_results_adam_3.txt")
return
def on_train_end(self, logs={}):
return
def on_epoch_begin(self, epoch, logs={}):
return
def on_epoch_end(self, epoch, logs={}):
self.losses.append(logs.get('loss'))
y_pred = self.model.predict(np.asarray(self.x_batch))
con_mat = confusion_matrix(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
tn, fp, fn, tp = con_mat.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
print("Specificity: %f Sensitivity: %f AUC: %f"%(spec, sens, auc_score))
print(con_mat)
self.sensitivity.append(sens)
self.specificity.append(spec)
self.aucs.append(auc_score)
writeToFile("results/xception_results_adam_3.txt", epoch, auc_score, spec, sens, self.losses[epoch])
return
# What follows is data from the Jupyter Notebook that I actually use to evaluate
#%% Initialize data
trainDirectory = 'RetinaMasks/train'
valDirectory = 'RetinaMasks/val'
testDirectory = 'RetinaMasks/test'
train_datagen = ImageDataGenerator(rescale=1. / 255)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
trainDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
valDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
test_generator = test_datagen.flow_from_directory(
testDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
#%% Create model
model = buildModel("xception")
#%% Initialize metrics
from keras.callbacks import EarlyStopping
from MetricsCallback import Histories
import keras
metrics = Histories(validation_generator)
es = EarlyStopping(monitor='val_loss',
min_delta=0,
patience=20,
verbose=0,
mode='auto',
baseline=None,
restore_best_weights=False)
mcp = keras.callbacks.ModelCheckpoint("saved_models/xception.adam.lr0.1_{epoch:02d}.hdf5",
monitor='val_loss',
verbose=0,
save_best_only=False,
save_weights_only=False,
mode='auto',
period=1)
#%% Train model
from StaticDataAugmenter import superDirectorySize
history = model.fit_generator(
train_generator,
steps_per_epoch=superDirectorySize(trainDirectory) // 16,
epochs=100,
validation_data=validation_generator,
validation_steps=superDirectorySize(valDirectory) // 16,
callbacks=[metrics, es, mcp],
workers=8,
shuffle=False
)
I honestly have no idea what causes this behavior, or how to prevent it. Thank you in advance, and I apologize for the long code snippet :)
Your learning rate is too high.
Try lowering the learning rate.
I used to run into this when using transfer learning, I was fine-tuning at very high learning rates.
An extended AUC of 0.5 over multiple epochs in case of a binary classification means that your (convolutional) neural network is not able to distinguish between the classes at all. This is in turn because it's not able to learn anything.
Use learning_rates of 0.0001,0.00001,0.000001.
At the same time, you should try to unfreeze/make some layers trainable, due to the fact that you entire feature extractor is frozen; in fact this could be another reason why the network is incapable of learning anything.
I am quite confident that your problem will be solved if you lower your learning rate :).
An AUC of 0.5 implies that your network is randomly guessing the output, which means it didn't learn anything. This was already disscued for example here.
As Timbus Calin suggested, you could do a "line search" of the learning rate starting with 0.000001 and then increase the learning rate by potencies of 10.
I would suggest you directly start with a random search, where you not only try to optimize the learning rate, but also other hyperparameters like for example the batch size. Read more about random search in this paper.
You are not computing the AUC correctly, you currently have this:
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
AUC is computed from (probability) scores produced by the model. The argmax of the model output does not provide scores, but class labels. The correct function call is:
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred[:, 1])
Note that the score needed to compute ROC is the probability of the positive class, which is the second element of the softmax output. This is why only the second column of the predictions is used to make the AUC.
What about this?
def buildModel():
from keras.models import Model
from keras.layers import Dense, Flatten
from keras.optimizers import adam
input_model = applications.xception.Xception(
include_top=False,
weights='imagenet',
input_tensor=None,
input_shape=input_sizes["xception"],
pooling='avg', # 1
classes=2)
base_model = input_model
x = base_model.output
# x = Flatten()(x) # 2
predictions = Dense(2, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer=adam(lr=0.01),
loss='categorical_crossentropy', # 3
metrics=['accuracy'])
return model
class Histories(keras.callbacks.Callback):
def __init__(self, val_data):
super(Histories, self).__init__()
self.x_batch = []
self.y_batch = []
for i in range(len(val_data)):
x, y = val_data.__getitem__(i)
self.x_batch.extend(x)
self.y_batch.extend(np.ndarray.astype(y, int))
self.aucs = []
self.specificity = []
self.sensitivity = []
self.losses = []
return
def on_train_begin(self, logs={}):
initFile("results/xception_results_adam_3.txt")
return
def on_train_end(self, logs={}):
return
def on_epoch_begin(self, epoch, logs={}):
return
def on_epoch_end(self, epoch, logs={}):
self.losses.append(logs.get('loss'))
y_pred = self.model.predict(np.asarray(self.x_batch))
con_mat = confusion_matrix(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
tn, fp, fn, tp = con_mat.ravel()
sens = tp/(tp+fn)
spec = tn/(tn+fp)
auc_score = roc_auc_score(np.asarray(self.y_batch).argmax(axis=-1), y_pred.argmax(axis=-1))
print("Specificity: %f Sensitivity: %f AUC: %f"%(spec, sens, auc_score))
print(con_mat)
self.sensitivity.append(sens)
self.specificity.append(spec)
self.aucs.append(auc_score)
writeToFile("results/xception_results_adam_3.txt", epoch, auc_score, spec, sens, self.losses[epoch])
return
# What follows is data from the Jupyter Notebook that I actually use to evaluate
#%% Initialize data
trainDirectory = 'RetinaMasks/train'
valDirectory = 'RetinaMasks/val'
testDirectory = 'RetinaMasks/test'
train_datagen = ImageDataGenerator(rescale=1. / 255)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
trainDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
valDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
test_generator = test_datagen.flow_from_directory(
testDirectory,
target_size=(299, 299),
batch_size=16,
class_mode='categorical')
#%% Create model
model = buildModel("xception")
#%% Initialize metrics
from keras.callbacks import EarlyStopping
from MetricsCallback import Histories
import keras
metrics = Histories(validation_generator)
es = EarlyStopping(monitor='val_loss',
min_delta=0,
patience=20,
verbose=0,
mode='auto',
baseline=None,
restore_best_weights=False)
mcp = keras.callbacks.ModelCheckpoint("saved_models/xception.adam.lr0.1_{epoch:02d}.hdf5",
monitor='val_loss',
verbose=0,
save_best_only=False,
save_weights_only=False,
mode='auto',
period=1)
#%% Load saved model
from keras.models import load_model
# model = load_model("saved_models/vgg16.10.hdf5") # 4
#%% Train model
from StaticDataAugmenter import superDirectorySize
history = model.fit_generator(
train_generator,
steps_per_epoch=superDirectorySize(trainDirectory) // 16,
epochs=100,
validation_data=validation_generator,
validation_steps=superDirectorySize(valDirectory) // 16,
callbacks=[metrics, es, mcp],
workers=8,
shuffle=False
)
For 1 and 2,I think it doesn't make sense to use FC layer right after ReLU without use a pooling layer, never try it so it might not help anything.
For 3, why are you using BCE when your generators are using class_mode='categorical'?
For 4, as I comment above, this mean you are loading your VGG model and train it, instead of using the Xception from buildModel().