I have developed a Deep Q-Learning agent. I have used verbose=0 in model.fit() function and still getting the following line as output:
1/1 [==============================] - 0s 12ms/step
here's how i build the model:
def build_model(self):
# Build the model
model = Sequential()
model.add(Dense(24, input_dim=STATE_SIZE, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(ACTION_SIZE, activation='linear'))
model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
#if os.path.isfile(self.weight_backup):
# model.load_weights(self.weight_backup)
# self.epsilon = self.epsilon_min
return model
I fit the data in replay() function:
def replay(self, batch_size):
# training
if len(self.memory) < batch_size:
return
mini_batch = random.sample(self.memory, batch_size)
for state, action, reward, next_state, done in mini_batch:
if done:
target = reward
else:
target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
train_target = self.model.predict(state)
train_target[0][action] = target
#callbacks = [ProgbarLogger()]
self.model.fit(state, train_target, epochs=1, verbose=0)
Is there a way to prevent output?
I have found the solution. It comes from predict() function and after setting verbose to 0 fixed the problem
Related
I have a model in TensorFlow that I converted to Pytorch. I want to check If the two models are the same or if I'm mixing things up. Here is my code in tensofrlow:
model = Sequential()
model.add(
LSTM(5, input_shape=(4, 1000))
)
model.add(
Dense(1, activation='tanh')
)
model.compile(
loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']
)
model.fit(
X_train, y_train, epochs=100, batch_size=27
)
I built the equivalent model in Pytorch in this way:
class LSTM1(nn.Module):
def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
super(LSTM1, self).__init__()
self.num_classes = num_classes #number of classes
self.num_layers = num_layers #number of layers
self.input_size = input_size #input size
self.seq_length = seq_length #sequence length
self.hidden_size = hidden_size #hidden state
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
num_layers=num_layers, batch_first=True) #lstm
self.fc = nn.Linear(self.hidden_size, num_classes) #fully connected last layer
self.sigmoid = nn.Sigmoid()
def forward(self,x):
h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
# Propagate input through LSTM
output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
out = self.sigmoid(hn)
out = self.fc(out) #Final Output
out = self.sigmoid(out)
return out
num_epochs = 100 #100 epochs
learning_rate = 0.001 #0.001 lr
input_size = 1000 #number of features
num_layers = 5 #number of stacked lstm layers
hidden_size = 1
num_classes = 1 #number of output classes
X_train = np.concatenate((X_phage, X_bac))
y_train = np.concatenate((np.ones(len(X_phage)), np.zeros(len(X_bac))))
X_train_tensors_final = Variable(torch.Tensor(X_train))
y_train_tensors = Variable(torch.Tensor(y_train))
print(X_train_tensors_final.shape)
model = LSTM1(num_classes, input_size, hidden_size, num_layers, X_train_tensors_final.shape[1]) #our lstm class
print("Training Shape", X_train_tensors_final.shape, y_train_tensors.shape)
print(model)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
outputs = model.forward(X_train_tensors_final) #forward pass
optimizer.zero_grad() #caluclate the gradient, manually setting to 0
# obtain the loss function
outputs = outputs[-20:]
y_train_tensors = y_train_tensors.type(torch.LongTensor)
y_train_tensors = torch.reshape(y_train_tensors, (20, 1))
loss = criterion(outputs, y_train_tensors.float())
loss.backward() #calculates the loss of the loss function
optimizer.step() #improve from loss, i.e backprop
train_acc = torch.sum((outputs > 0.5).bool().float() == y_train_tensors)
final_train_acc = train_acc/20
print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
print('Accuracy: ', final_train_acc)
My data is 20 instances, where each instance has 1000 features and 4 timesteps, therefore it is shaped like this (20,4,1000). However, When I run the two models, I got different results (different loss and accuracy). Therefore I'm afraid I have missed something and the models are not the same.
I would appreciate it if someone can validate that these models are the same and if they were then why I'm getting different results?
I have created a model with an LSTM layer as shown below and want to get the internal state (hidden state and cell state) after the training step and save it. After the training step, I will use the network for a prediction and want to reinitialize the LSTM with the saved internal state before the next training step. This way I can continue from the same point after each training step. I haven't been able to find something helpful for the current version of tensoflow, i.e 2.x.
import tensorflow as tf
class LTSMNetwork(object):
def __init__(self, num_channels, num_hidden_neurons, learning_rate, time_steps, batch_size):
self.num_channels = num_channels
self.num_hidden_neurons = num_hidden_neurons
self.learning_rate = learning_rate
self.time_steps = time_steps
self.batch_size =batch_size
def lstm_model(self):
self.model = tf.keras.Sequential()
self.model.add(tf.keras.layers.LSTM(batch_input_shape=(self.batch_size, self.time_steps, self.num_channels),
units=self.num_hidden_neurons[0],
activation='tanh', recurrent_activation='sigmoid',
return_sequences=True, stateful=True))
#self.model.add(tf.keras.layers.LSTM(units=self.num_hidden_neurons[1], stateful=True))
hidden_layer = tf.keras.layers.Dense(units=self.num_hidden_neurons[1], activation=tf.nn.sigmoid)
self.model.add(hidden_layer)
self.model.add(tf.keras.layers.Dense(units=self.num_channels, name="output_layer", activation=tf.nn.tanh))
self.model.compile(optimizer=tf.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse', metrics=['binary_accuracy'])
return self.model
if __name__=='__main__':
num_channels = 3
num_hidden_neurons = [150, 100]
learning_rate = 0.001
time_steps = 1
batch_size = 1
lstm_network = LTSMNetwork(num_channels=num_channels, num_hidden_neurons=num_hidden_neurons,
learning_rate=learning_rate, time_steps=time_steps, batch_size=batch_size)
model = lstm_network.lstm_model()
model.summary()
You can define a custom Callback and save the hidden and cell states at every epoch for example. Afterwards, you can choose from which epoch you want to extract the states and then use lstm_layer.reset_states(*) to set the initial state again:
import tensorflow as tf
class LTSMNetwork(object):
def __init__(self, num_channels, num_hidden_neurons, learning_rate, time_steps, batch_size):
self.num_channels = num_channels
self.num_hidden_neurons = num_hidden_neurons
self.learning_rate = learning_rate
self.time_steps = time_steps
self.batch_size =batch_size
def lstm_model(self):
self.model = tf.keras.Sequential()
self.model.add(tf.keras.layers.LSTM(batch_input_shape=(self.batch_size, self.time_steps, self.num_channels),
units=self.num_hidden_neurons[0],
activation='tanh', recurrent_activation='sigmoid',
return_sequences=True, stateful=True))
hidden_layer = tf.keras.layers.Dense(units=self.num_hidden_neurons[1], activation=tf.nn.sigmoid)
self.model.add(hidden_layer)
self.model.add(tf.keras.layers.Dense(units=self.num_channels, name="output_layer", activation=tf.nn.tanh))
self.model.compile(optimizer=tf.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse', metrics=['binary_accuracy'])
return self.model
states = {}
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, lstm_layer):
self.lstm_layer = lstm_layer
def on_epoch_end(self, epoch, logs=None):
states[epoch] = lstm_layer.states
num_channels = 3
num_hidden_neurons = [150, 100]
learning_rate = 0.001
time_steps = 1
batch_size = 1
lstm_network = LTSMNetwork(num_channels=num_channels, num_hidden_neurons=num_hidden_neurons,
learning_rate=learning_rate, time_steps=time_steps, batch_size=batch_size)
model = lstm_network.lstm_model()
lstm_layer = model.layers[0]
x = tf.random.normal((1, 1, 3))
y = tf.random.normal((1, 1, 3))
model.fit(x, y, epochs=5, callbacks=[CustomCallback(lstm_layer)])
model.summary()
lstm_layer.reset_states(states[0]) # Sets hidden state from first epoch.
States consists of 5 internal states for each of the 5 epochs.
I have managed to save the internal state of the LSTM after the training step and reinitialize the LSTM with the saved internal states before the next training step.
You can create a variable and set its value to the currently stored value in a variable. How can I copy a variable in tensorflow
states_ = {}
# Save the hidden state
internal_state_h = lstm_layer.states[0]
v1 = tf.Variable(initial_value=np.zeros((1, 150)), dtype=tf.float32, shape=(1, 150))
copy_state_h = v1.assign(internal_state_h)
# Save the cell state
internal_state_c = lstm_layer.states[1]
v2 = tf.Variable(initial_value=np.zeros((1, 150)), dtype=tf.float32, shape=(1, 150))
copy_state_c = v2.assign(internal_state_c)
# Create a tuple and add it to the dictionary
states_[0] = (copy_state_h, copy_state_c)
# Reset the internal state
lstm_layer.reset_states(states_[0])
A call for prediction changes the internal states, however by following these steps, you can restore the internal states of RNN to what it was before the prediction.
I train the following model based on GRU, note that I am passing the argument stateful=True to the GRU builder.
class LearningToSurpriseModel(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, rnn_units):
super().__init__(self)
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(rnn_units,
stateful=True,
return_sequences=True,
return_state=True,
reset_after=True
)
self.dense = tf.keras.layers.Dense(vocab_size)
def call(self, inputs, states=None, return_state=False, training=False):
x = inputs
x = self.embedding(x, training=training)
if states is None:
states = self.gru.get_initial_state(x)
x, states = self.gru(x, initial_state=states, training=training)
x = self.dense(x, training=training)
if return_state:
return x, states
else:
return x
#tf.function
def train_step(self, inputs):
[defining here my training step]
I instantiate my model
model = LearningToSurpriseModel(
vocab_size=len(ids_from_chars.get_vocabulary()),
embedding_dim=embedding_dim,
rnn_units=rnn_units
)
[compile and do stuff]
and train for EPOCHS epochs
for i in range(EPOCHS):
model.fit(train_dataset, validation_data=validation_dataset, epochs=1, callbacks = [EarlyS], verbose=1)
model.reset_states()
What is the behavior of this code regarding GRU states : are states updated for each new batch of data or only for each new epoch ? The desired behavior is a reset for each new epoch only. If not done, how to implement this ?
EDIT
Tensorflow implements the reset_states function for Models as
def reset_states(self):
for layer in self.layers:
if hasattr(layer, 'reset_states') and getattr(layer, 'stateful', False):
layer.reset_states()
Does it means (contrary to what doc otherwise seems to imply) states can be reset only if stateful=False ? It is what I infer from the condition on getattr(layer, 'stateful', False).
I am training a CNN model using Keras on Google Colab for binary image classification, the problem is when i use Sigmoid function i get accuracy fixed on 0.5000, and when i change metrics to 'acc' i get 0.000e+00 as accuracy. Also, when i change the activation function to 'Softmax' my model start learning.
Ps: i am using google colab where Tensorflow version is 2.5.0
My code:
def define_model(input_shape, num_classes):
model=ResNet50(include_top = False, weights = 'imagenet', input_shape = input_shape)
x = model.output
x = GlobalAveragePooling2D()(x)
preds = Dense(num_classes,activation='sigmoid')(x)
model = Model(inputs=model.input,outputs=preds)
return model
def train(epochs):
train_generator = ImageDataGenerator(rescale=1.0/255.0,vertical_flip=True, horizontal_flip=True)
test_generator = ImageDataGenerator(rescale=1.0/255.0)
train_generator = train_generator.flow_from_directory(
'trainset/',
target_size=(image_size, image_size),
batch_size=BATCH_SIZE_TRAINING,
seed = 7)
validation_generator = test_generator.flow_from_directory(
'testset/',
target_size=(image_size, image_size),
batch_size=BATCH_SIZE_VALIDATION,
seed = 7)
input_shape = (CHANNELS, image_size, image_size) if K.image_data_format() == 'channels_first' \
else (image_size, image_size, CHANNELS)
model = define_model(input_shape, NUM_CLASSES)
opt = optimizers.Adam(learning_rate=1e-6, beta_1=0.9, beta_2=0.99, amsgrad=False)
model.summary()
model.compile(loss='binary_crossentropy',
optimizer=opt,
metrics=['acc'])
filepath=path+"weights-improvement-{epoch:02d}-vacc:{val_accuracy:.2f}-tacc:{accuracy:.2f}.hdf5"
'''cb_early_stopper = EarlyStopping(monitor = 'val_accuracy', mode='min', verbose=1, patience = EARLY_STOP_PATIENCE)
cb_checkpointer = ModelCheckpoint(filepath = filepath, monitor = 'val_accuracy', save_best_only = True, mode = 'auto')
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.25, patience=5, min_lr=1e-7)'''
fit_history = model.fit(train_generator,
epochs = NUM_EPOCHS,
validation_data=validation_generator,
verbose=1,
class_weight=class_weights)
# callbacks = [cb_checkpointer, cb_early_stopper, reduce_lr],
return model, fit_history
def main():
start_time = time()
model, fit_history = train(epochs=NUM_EPOCHS)
end_time = time()
seconds_elapsed = end_time - start_time
print('token time: ', seconds_elapsed)
hours, rest = divmod(seconds_elapsed, 3600)
minutes, seconds = divmod(rest, 60)
if __name__ == "__main__":
main()
The problem solved by adding this code to the .flow_from_directory() function:
class_mode='binary',
Thanks to this thread on github:
https://github.com/keras-team/keras/issues/13006
I have a working Step_Decay:
def step_decay(epoch):
initial_lr = 0.01
decay_factor=0.1
step_size=1
new_lr = initial_lr * (decay_factor ** np.floor(epoch / step_size))
print("Learning rate: " + str(new_lr))
return new_lr
lr_sched = keras.callbacks.LearningRateScheduler(step_decay)
But I want to stop reducing the learning rate once loss < 0.1.
How can I access epoch loss number to accomplish this?
By keras.callbacks.Callback to record loss history and learning rate during the training procedure.
learning-rate-schedules - Access to loss by step_decay with history loss
def step_decay(epoch):
initial_lrate = 0.1
drop = 0.5
epochs_drop = 10.0
lrate = initial_lrate * math.pow(drop,
math.floor((1+epoch)/epochs_drop))
return lrate
lrate = LearningRateScheduler(step_decay)
class LossHistory(keras.callbacks.Callback):
def on_train_begin(self, logs={}):
self.losses = []
self.lr = []
def on_epoch_end(self, batch, logs={}):
self.losses.append(logs.get(‘loss’))
self.lr.append(step_decay(len(self.losses)))
loss_history = LossHistory()
lrate = LearningRateScheduler(step_decay)
callbacks_list = [loss_history, lrate]
history = model.fit(X_train, y_train,
validation_data=(X_test, y_test),
epochs=epochs,
batch_size=batch_size,
callbacks=callbacks_list,
verbose=2)
Access epoch loss number:
def get_learningrate_metric(optimizer):
def learningrate(y_true, y_pred):
return optimizer.learningrate
return learningrate
x = Input((50,))
out = Dense(1, activation='sigmoid')(x)
model = Model(x, out)
optimizer = Adam(lr=0.001)
learningrate_metric = get_learningrate_metric(optimizer)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['acc', learningrate_metric])
# reducing the learning rate by half every 2 epochs
cbks = [LearningRateScheduler(lambda epoch: 0.001 * 0.5 ** (epoch // 2)),
TensorBoard(write_graph=False)]
X = np.random.rand(1000, 50)
Y = np.random.randint(2, size=1000)
model.fit(X, Y, epochs=10, callbacks=cbks)
or
Adadelta = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, decay=0.1)
Adadelta is an extension of Adagrad that seeks to reduce its aggressive, monotonically decreasing learning rate.
get the learning rate value after every epoch
Or can it help you?
EarlyStopping