I build Neural network in tensorflow and trained it.
I extracted weights and bias from estimator
weights1 = self.model.get_variable_value('dense/kernel')
bias1 = self.model.get_variable_value('dense/bias')
weights2 = self.model.get_variable_value('dense_1/kernel')
bias1 = self.model.get_variable_value('dense_1/bias')
...
And I build MLP with numpy in python
layer1 = np.dot(inputs, weight1)
layer1 = np.add(layer1, bias1)
layer1 = np.maximum(layer1, layer1 * 0.2, layer1)
...
I used leaky_relu activation function, so I was implemented it too but the output is completely different with from tensorflow. I don't know what's wrong about it.
Edit)
def my_dnn_regression_fn(features, labels, mode, params):
top = tf.feature_column.input_layer(features, params["feature_columns"])
for units in params.get("hidden_units", [20]):
top = tf.layers.dense(inputs=top, units=units, activation=tf.nn.leaky_relu)
output_layer = tf.layers.dense(inputs=top, units=1)
predictions = tf.squeeze(output_layer, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(
mode=mode, predictions={"label": predictions})
average_loss = tf.losses.mean_squared_error(labels, predictions)
batch_size = tf.shape(labels)[0]
total_loss = tf.to_float(batch_size) * average_loss
if mode == tf.estimator.ModeKeys.TRAIN:
mse = tf.metrics.mean_squared_error(labels, predictions)
rmse = tf.metrics.root_mean_squared_error(labels, predictions)
absolute_error = tf.metrics.mean_absolute_error(labels, predictions)
mre = tf.metrics.mean_relative_error(labels, predictions, labels)
tf.summary.scalar('mse', mse[1])
tf.summary.scalar('mre', mre[1])
tf.summary.scalar('rmse', rmse[1])
tf.summary.scalar('absolute', absolute_error[1])
# vars = tf.trainable_variables()
# lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in vars]) * 0.001
l1_regularizer = tf.contrib.layers.l1_regularizer(
scale=0.001, scope=None
)
weights = tf.trainable_variables() # all vars of your graph
lossL1 = tf.contrib.layers.apply_regularization(l1_regularizer, weights)
# average_loss = tf.add(average_loss, lossL2)
average_loss = tf.add(average_loss, lossL1)
total_loss = tf.to_float(batch_size) * average_loss
optimizer = params.get("optimizer", tf.train.AdamOptimizer)
optimizer = optimizer(params.get("learning_rate", None))
train_op = optimizer.minimize(
loss=average_loss, global_step=tf.train.get_global_step())
# eval_metrics = {"rmse": rmse, "absolute": absolute_error, "mre": mre}
eval_metrics = {"mse": mse, "rmse": rmse, "absolute": absolute_error, "mre": mre}
return tf.estimator.EstimatorSpec(
mode=mode, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metrics)
assert mode == tf.estimator.ModeKeys.EVAL
mse = tf.metrics.mean_squared_error(labels, predictions)
rmse = tf.metrics.root_mean_squared_error(labels, predictions)
absolute_error = tf.metrics.mean_absolute_error(labels, predictions)
mre = tf.metrics.mean_relative_error(labels, predictions, labels)
eval_metrics = {"mse": mse, "rmse": rmse, "absolute": absolute_error, "mre": mre}
return tf.estimator.EstimatorSpec(
mode=mode,
loss=total_loss,
eval_metric_ops=eval_metrics)
My dnn regrassion code!!
Related
I have a classifier on top of BERT, and I would like to see the predict probability for creating the ROC curve. How do I get the predict proba?. The predicted probas will be used to calculate the TPR FPR and threshold for ROC curve.
here is the code
class BertBinaryClassifier(nn.Module):
def __init__(self, dropout=0.1):
super(BertBinaryClassifier, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-uncased')
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, tokens, masks=None):
_, pooled_output = self.bert(tokens, attention_mask=masks, output_all_encoded_layers=False)
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output)
prediction = self.sigmoid(linear_output)
return prediction
# Config setting
BATCH_SIZE = 4
EPOCHS = 5
# Making dataloaders
train_dataset = torch.utils.data.TensorDataset(train_tokens_tensor, train_masks_tensor, train_y_tensor)
train_sampler = torch.utils.data.RandomSampler(train_dataset)
train_dataloader = torch.utils.data.DataLoader(train_dataset, sampler=train_sampler, batch_size=BATCH_SIZE)
test_dataset = torch.utils.data.TensorDataset(test_tokens_tensor, test_masks_tensor, test_y_tensor)
test_sampler = torch.utils.data.SequentialSampler(test_dataset)
test_dataloader = torch.utils.data.DataLoader(test_dataset, sampler=test_sampler, batch_size=BATCH_SIZE)
bert_clf = BertBinaryClassifier()
bert_clf = bert_clf.cuda()
#wandb.watch(bert_clf)
optimizer = torch.optim.Adam(bert_clf.parameters(), lr=3e-6)
# training
for epoch_num in range(EPOCHS):
bert_clf.train()
train_loss = 0
for step_num, batch_data in enumerate(train_dataloader):
token_ids, masks, labels = tuple(t for t in batch_data)
token_ids, masks, labels = token_ids.to(device), masks.to(device), labels.to(device)
preds = bert_clf(token_ids, masks)
loss_func = nn.BCELoss()
batch_loss = loss_func(preds, labels)
train_loss += batch_loss.item()
bert_clf.zero_grad()
batch_loss.backward()
optimizer.step()
#wandb.log({"Training loss": train_loss})
print('Epoch: ', epoch_num + 1)
print("\r" + "{0}/{1} loss: {2} ".format(step_num, len(train_data) / BATCH_SIZE, train_loss / (step_num + 1)))
# evaluating on test
bert_clf.eval()
bert_predicted = []
all_logits = []
probs=[]
with torch.no_grad():
test_loss = 0
for step_num, batch_data in enumerate(test_dataloader):
token_ids, masks, labels = tuple(t for t in batch_data)
token_ids, masks, labels = token_ids.to(device), masks.to(device), labels.to(device)
logits = bert_clf(token_ids, masks)
pr=logits.ravel()
probs+=pr
loss_func = nn.BCELoss()
loss = loss_func(logits, labels)
test_loss += loss.item()
numpy_logits = logits.cpu().detach().numpy()
#print(numpy_logits)
#wandb.log({"Testing loss": test_loss})
bert_predicted += list(numpy_logits[:, 0] > 0.5)
all_logits += list(numpy_logits[:, 0])
I am able to get the prediction score to calculate the accuracy or f1 score. But not the probability for creating ROC curve.
Thanks
In your forward, you:
def forward(self, tokens, masks=None):
_, pooled_output = self.bert(...) # Get output of BERT
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output) # Take linear combination of outputs
# (unconstrained score - "logits")
prediction = self.sigmoid(linear_output) # Normalise scores
# (constrained between [0,1] - "probabilities")
return prediction
Hence the result of calling your model can be directly supplied to calculate the False Positive and True Positive rates e.g:
from sklearn import metrics
...
test_probs = bert_clf(token_ids, masks)
fpr, tpr, thresholds = metrics.roc_curve(labels, test_probs)
roc_auc = metrics.auc(fpr, tpr)
I am trying to fine tune BERT for multi-label classification.I have my own data processor,and using a pretrained BERT.I add a finetuning layer at end of pretrained BERT for my task.
I have a create model function that adds a finetuning layer at end of existing BERT.
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
labels, num_labels, use_one_hot_embeddings):
"""Creates a classification model."""
model = modeling.BertModel(
config=bert_config,
is_training=is_training,
input_ids=input_ids,
input_mask=input_mask,
token_type_ids=segment_ids,
use_one_hot_embeddings=use_one_hot_embeddings)
# In the demo, we are doing a simple classification task on the entire
# segment.
#
# If you want to use the token-level output, use model.get_sequence_output()
# instead.
output_layer = model.get_pooled_output()
hidden_size = output_layer.shape[-1].value
output_weights = tf.get_variable(
"output_weights", [num_labels, hidden_size],
initializer=tf.truncated_normal_initializer(stddev=0.02))
output_bias = tf.get_variable(
"output_bias", [num_labels], initializer=tf.zeros_initializer())
with tf.variable_scope("loss"):
if is_training:
# I.e., 0.1 dropout
output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
logits = tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits, output_bias)
# print("labels:",labels,";logits:",logits,"isinstance(labels,list):",isinstance(labels,list))
# mulit-label classification: 1.multi-hot==> then use sigmoid to transform it to possibility
probabilities = tf.nn.sigmoid(logits)
# log_probs=tf.log(probabilities)
labels = tf.cast(labels, tf.float32)
# below is for single label classification
# one-hot for single label classification
# probabilities = tf.nn.softmax(logits, axis=-1)
# log_probs = tf.nn.log_softmax(logits, axis=-1)
# one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
tf.logging.debug("num_labels = %s; logits = %s; labels = %s" % (num_labels, logits, labels))
# print("log_probs:",log_probs)
# per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) # 利用交叉熵就和
per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)
loss = tf.reduce_mean(per_example_loss)
return (loss, per_example_loss, logits, probabilities)
I use it inside model_fn_builder, where the logits returned are used in the estimator
def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
num_train_steps, num_warmup_steps, use_tpu,
use_one_hot_embeddings):
"""Returns `model_fn` closure for TPUEstimator."""
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
"""The `model_fn` for TPUEstimator."""
tf.logging.info("*** Features ***")
for name in sorted(features.keys()):
tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape))
input_ids = features["input_ids"]
input_mask = features["input_mask"]
segment_ids = features["segment_ids"]
label_ids = features["label_ids"]
is_real_example = None
if "is_real_example" in features:
is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
else:
is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
is_training = (mode == tf.estimator.ModeKeys.TRAIN)
(total_loss, per_example_loss, logits, probabilities) = create_model(
bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
num_labels, use_one_hot_embeddings)
tvars = tf.trainable_variables()
initialized_variable_names = {}
scaffold_fn = None
if init_checkpoint:
(assignment_map, initialized_variable_names
) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
if use_tpu:
def tpu_scaffold():
tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
return tf.train.Scaffold()
scaffold_fn = tpu_scaffold
else:
tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
tf.logging.info("**** Trainable Variables ****")
for var in tvars:
init_string = ""
if var.name in initialized_variable_names:
init_string = ", *INIT_FROM_CKPT*"
tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape,
init_string)
output_spec = None
if mode == tf.estimator.ModeKeys.TRAIN:
train_op = optimization.create_optimizer(
total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
logging_hook = tf.train.LoggingTensorHook({"loss": total_loss,'precision:': t_precision,'recall:': t_recall}, every_n_iter=10)
output_spec = tf.contrib.tpu.TPUEstimatorSpec(
mode=mode,
loss=total_loss,
train_op=train_op,
training_hooks=[logging_hook],
scaffold_fn=scaffold_fn)
elif mode == tf.estimator.ModeKeys.EVAL:
def metric_fn(per_example_loss, label_ids, logits, is_real_example):
# print("###metric_fn.logits:",logits.shape) # (?,80)
# predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
# print("###metric_fn.label_ids:",label_ids.shape,";predictions:",predictions.shape) # label_ids: (?,80);predictions:(?,)
print(logits)
logits_split = tf.split(logits, args.num_classes,
axis=-1) # a list. length is num_classes
label_ids_split = tf.split(logits, args.num_classes,
axis=-1) # a list. length is num_classes
accuracy = tf.constant(0.0, dtype=tf.float64)
for j, logits in enumerate(logits_split): #
# accuracy = tf.metrics.accuracy(label_ids, predictions)
label_id_ = tf.cast(label_ids_split[j], dtype=tf.int32)
current_auc, update_op_auc = tf.metrics.auc(label_id_, logits)
# TP = tf.count_nonzero(logits * label_id_)
# TN = tf.count_nonzero((logits - 1) * (label_id_ - 1))
# FP = tf.count_nonzero(logits * (label_id_ - 1))
# FN = tf.count_nonzero((logits - 1) * label_id_)
# current_precision,update_op_precision = tf.metrics.Precision(label_id_, logits)
# current_recall,update_op_recall = tf.metrics.Recall(label_id_, logits)
prec,prec_op=precision(label_id_,logits)
rec,rec_op=recall(label_id_,logits)
f_1=f1(label_id_,logits)
eval_loss = tf.metrics.mean(values=per_example_loss)
return {
"eval_precision":(prec,prec_op),
"eval_recall" : (rec_op,rec_op),
"eval_auc" : (current_auc, update_op_auc),
"eval_loss": eval_loss,
}
eval_metrics = (metric_fn,
[per_example_loss, label_ids, logits, is_real_example])
output_spec = tf.contrib.tpu.TPUEstimatorSpec(
mode=mode,
loss=total_loss,
eval_metrics=eval_metrics,
scaffold_fn=scaffold_fn)
else:
output_spec = tf.contrib.tpu.TPUEstimatorSpec(
mode=mode,
predictions={"probabilities": probabilities},
scaffold_fn=scaffold_fn)
return output_spec
return model_fn
In my model_fn when estimator is in eval mode, I use the logits to calculate various metrics defined in metric_fn (defined inside model_fn_builder)
I am getting the following error in traceback:
ERROR:tensorflow:Error recorded from evaluation_loop: 2 root error(s) found.
(0) Invalid argument: assertion failed: [`predictions` contains negative values] [Condition x >= 0 did not holdelement-wise:] [x (Reshape:0) = ] [0 -1 -2...]
[[node confusion_matrix/assert_non_negative_1/assert_less_equal/Assert/AssertGuard/Assert (defined at /home/aditya_vartak/virtualenvs/anaconda3/envs/tf1/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
[[confusion_matrix_2/ones_like/_1429]]
(1) Invalid argument: assertion failed: [`predictions` contains negative values] [Condition x >= 0 did not holdelement-wise:] [x (Reshape:0) = ] [0 -1 -2...]
[[node confusion_matrix/assert_non_negative_1/assert_less_equal/Assert/AssertGuard/Assert (defined at /home/aditya_vartak/virtualenvs/anaconda3/envs/tf1/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
0 successful operations.
0 derived errors ignored.
I understand error is due to negative values in logits. My question is Why? and what is the workaround?
Edit 1: If question is vague, I want to add that I did apply sigmoid activation on the weighted sum of the last layer of pretrained BERT with shape [hidden_dimension,num_classes] , the outputs are stored in probablities, following which applied sigmoid_cross_entropy_with_logits.(as showin in create_model()). According to docs it returns value between 0,1 for each input. So how do probablities get negative value?I feel the problem is in metric_fn(). but not understanding what exactly it is
I created a classifier with tensorflow(1.14.1) estimator and calculated the metrics every step. I used tensorboard to show the metrics and it agregrated every step. I want to reset it each epoch.
This is for HPC, running Python(3.7) and tensorflow(1.14.1).
def fn_builder(...):
def model_fn(features, labels, mode, params):
# create bert-mlp net
ids = features['ids']
words = features['words']
masks = features['mask']
label_mask = features['label_mask']
# model
is_training = (mode == tf.estimator.ModeKeys.TRAIN)
loss, prd_label = create_model(...)
pre = tf.metrics.precision(labels, prd_label, weights=label_mask)
metrics = {'Precision': pre}
tf.summary.scalar('p', pre[1])
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = Adam(learning_rate=learning_rate)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, eval_metric_ops=metrics)
return pi_model_fn
if __name__ == "__main__":
batch_size = 5
epochs = 30
num_eval_steps = 20 / batch_size + 1
num_train_steps = 20 * epochs / batch_size
pi_checkpointing_config = tf.estimator.RunConfig(
model_dir='data/pi',
save_checkpoints_secs=10 * 60, # Save checkpoints every 10min.
keep_checkpoint_max=10, # Retain the 10 most recent checkpoints.
)
pi = tf.estimator.Estimator(
model_fn=fn_builder(...),
config=pi_checkpointing_config)
pi.train(
input_fn=input_fn_creator(...),
max_steps=num_train_steps
)
This is mine: https://tva1.sinaimg.cn/large/006y8mN6ly1g77gj8vie2j30m808ut8y.jpg
And this what I want: https://i.stack.imgur.com/LraT7.png
I'm trying to implement a network for MNIST dataset using custom estimators.
Here is my input function:
def input_train_fn():
train, test = tf.keras.datasets.mnist.load_data()
mnist_x, mnist_y = train
mnist_y = tf.cast(mnist_y, tf.int32)
mnist_x = tf.cast(mnist_x, tf.int32)
features = {'image': mnist_x}
labels = mnist_y
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
return dataset
Here is how I define my model:
def my_model(features, labels, mode, params):
# create net
net = tf.feature_column.input_layer(features, params['feature_columns'])
# create hidden layers
for unit in params['hidden_units']:
net = tf.layers.dense(net, unit, tf.nn.relu)
# create output layer
legits = tf.layers.dense(net, params['n_classes'], activation=None)
# predict (if in predict mode)
predicted_classes = tf.arg_max(legits, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'class_ids': predicted_classes,
'probabilities': tf.nn.softmax(legits),
'logits': legits
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
# define loss function
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=legits)
# evaluation metrics
accuracy = tf.metrics.accuracy(labels=labels,
predictions=predicted_classes,
name='acc_op')
metrics = {'accuracy': accuracy}
tf.summary.scalar('accuracy', accuracy[1])
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
And this is how I call the train function:
feature_columns = [tf.feature_column.numeric_column('image', shape=[28, 28], dtype=tf.int32), ]
classifier = tf.estimator.Estimator(model_fn=my_model,
params={
'feature_columns': feature_columns,
'hidden_units': [10, 10],
'n_classes': 10,
}, model_dir='/model')
classifier.train(input_fn=input_train_fn, steps=10)
As far as I can see i'm doing everything by the book both for estimators and feature_columns but I get the error:
ValueError: Cannot reshape a tensor with 784 elements to shape [28,784] (21952 elements) for 'input_layer/image/Reshape' (op: 'Reshape') with input shapes: [28,28], 2 and with input tensors computed as partial shapes: input1 = [28,784].
Is there anything I'm missing?
thanks in advance and any help appreciated.
First, you need to produce batches. For more detail see https://www.tensorflow.org/guide/datasets
...
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
dataset = dataset.batch(size)
return dataset
Then reshape your image and cast to float. -1 is for batch_size, it will be substituted during training. Cast labels to float is optional depending on the datatype provided.
net = tf.cast(tf.reshape(features, [-1, 28*28]), tf.float32)
labels = tf.cast(labels, tf.int64)
net = tf.layers.dense(net, 10, tf.nn.relu)
legits = tf.layers.dense(net, 10, activation=None)
predicted_classes = tf.arg_max(legits, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'class_ids': predicted_classes,
'probabilities': tf.nn.softmax(legits),
'logits': legits
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=legits)
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode, loss=loss)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
classifier = tf.estimator.Estimator(model_fn=my_model)
classifier.train(input_fn=lambda: input_train_fn(), steps=10)
Following the instructions on tf custom estimator
I have created a cnn estimator and tried to train it. While training, i initialized tensorboard and was hoping to see some visualizations about training steps. However, tensorboard only showed the graph of my custom estimator but none of the scalar values i have defined.
Here's roughly what I have in code
def model_fn(features, labels, mode, params=None):
tf.logging.set_verbosity(tf.logging.INFO)
n_classes = params['n_classes']
base_learning_rate = params['learning_rate']
decay_rate = params['decay_rate']
embedding_dim = params['embedding_dim']
x = VGG_block1(features, (3, 3), 64, name='block1_1')
x = VGG_block1(x, (3, 3), 128, name='block1_2')
x = VGG_block1(x, (3, 3), 256, name='block1_3', regularizer=tf.contrib.layers.l1_regularizer(.1))
x = VGG_block2(x, (3, 3), 512, name='block2_4')
x = VGG_block2(x, (3, 3), 1024, name='block2_5')
x = conv2d(x, 512, (5, 5), padding='valid', normalizer_fn=batch_norm, activation_fn=tf.nn.leaky_relu,
weights_initializer=he_uniform())
x = flatten(x)
embedding = fully_connected(x, embedding_dim)
logits = fully_connected(embedding, n_classes)
# make predictions
predictions = {
'classes': tf.argmax(logits, axis=1, name='classes'),
'probabilities': tf.nn.softmax(logits, name='softmax'),
'embeddings':embedding
}
# if we are in prediction mode
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# otherwise define losses for training
c_loss, center = center_loss(embedding, labels, .9, n_classes)
xent_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits))
total_loss = xent_loss + 0.5 * c_loss
# evaluation methods
accuracy, update_op = tf.metrics.accuracy(labels=labels, predictions=predictions['classes'], name='accuracy')
batch_acc = tf.reduce_mean(tf.cast(tf.equal(tf.cast(labels, tf.int64), predictions['classes']), tf.float32))
tf.summary.scalar('batch_acc', batch_acc)
tf.summary.scalar('streaming_acc', update_op)
tf.summary.scalar('total_loss', total_loss)
tf.summary.scalar('center_loss', c_loss)
tf.summary.scalar('xent_loss', xent_loss)
# training mode
if mode == tf.estimator.ModeKeys.TRAIN:
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
global_step = tf.Variable(0, trainable=False)
global_step_op = tf.assign(global_step, global_step + 1)
learning_rate = tf.train.exponential_decay(base_learning_rate, global_step, 8000, decay_rate, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)
with tf.control_dependencies(update_ops+[global_step_op]):
objective = optimizer.minimize(total_loss)
return tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, train_op=objective)
eval_metric_ops = {
'accuracy': (accuracy, update_op)
}
return tf.estimator.EstimatorSpec(mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops)
X_train, X_test, y_train, y_test = load_data()
epochs = 10
batch_size = 64
n_classes = len(classes)
model_params = {'n_classes':n_classes,
'learning_rate':0.0001,
'decay_rate':0.5,
'embedding_dim':128}
model_dir = 'output'
face_classifier = tf.estimator.Estimator(model_fn=model_fn, params=model_params, model_dir=model_dir)
My Tensorflow version is 1.12.0
Edit
Forgot to mention I was using eager execution for this exercise, for unknown reasons that was the cause of this bug
as was mentioned in the edit, disabling eager execution solved the problem