As the title the error I'm getting is:
ValueError: Expected scalar shape for SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits:0, saw shape: (52692,).
The shapes I'm passing:
logits - [52692, 4] - rank 2
labels - [52692] - rank 1
There are 4 classes
Here is how I pass my input to the model basically a tuple of (features, labels)
def _input_fn(training_dir, training_filename):
def getFeatureLabelOH(file):
features, labels = csvGetColumns(file)
count = len(features)
# converting features and labels to integers
featuresVec, labelsVec = convCharToVec(features, alphabetDict, maxFeatureLen), \
[conventions[label] for label in labels]
featuresVec = tf.convert_to_tensor(featuresVec, dtype=tf.int32)
labelsVec = tf.convert_to_tensor(labelsVec, dtype=tf.int32)
labelsVec = tf.reshape(labelsVec, [-1])
return {"featuresVec": featuresVec, "labelsVec": labelsVec, "count": count}
data = getFeatureLabelOH(os.path.join(training_dir,
return (data["featuresVec"], data["labelsVec"])
And my actual model
def model_fn(features, labels, mode, params):
net = keras.layers.Embedding(alphabetLen + 1, 8, input_length=maxFeatureLen)(features)
net = keras.layers.LSTM(12)(net)
logits = keras.layers.Dense(len(conventions), activation=tf.nn.softmax)(net) #output
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.train.get_global_step(),
learning_rate=0.001,
optimizer="AdamOptimizer")
eval_metric_ops = {}
return tf.estimator.EstimatorSpec(
mode=mode,
loss=loss,
train_op=train_op,
eval_metric_ops=eval_metric_ops)
Related
I am trying to fine tune BERT for multi-label classification.I have my own data processor,and using a pretrained BERT.I add a finetuning layer at end of pretrained BERT for my task.
I have a create model function that adds a finetuning layer at end of existing BERT.
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
labels, num_labels, use_one_hot_embeddings):
"""Creates a classification model."""
model = modeling.BertModel(
config=bert_config,
is_training=is_training,
input_ids=input_ids,
input_mask=input_mask,
token_type_ids=segment_ids,
use_one_hot_embeddings=use_one_hot_embeddings)
# In the demo, we are doing a simple classification task on the entire
# segment.
#
# If you want to use the token-level output, use model.get_sequence_output()
# instead.
output_layer = model.get_pooled_output()
hidden_size = output_layer.shape[-1].value
output_weights = tf.get_variable(
"output_weights", [num_labels, hidden_size],
initializer=tf.truncated_normal_initializer(stddev=0.02))
output_bias = tf.get_variable(
"output_bias", [num_labels], initializer=tf.zeros_initializer())
with tf.variable_scope("loss"):
if is_training:
# I.e., 0.1 dropout
output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
logits = tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits, output_bias)
# print("labels:",labels,";logits:",logits,"isinstance(labels,list):",isinstance(labels,list))
# mulit-label classification: 1.multi-hot==> then use sigmoid to transform it to possibility
probabilities = tf.nn.sigmoid(logits)
# log_probs=tf.log(probabilities)
labels = tf.cast(labels, tf.float32)
# below is for single label classification
# one-hot for single label classification
# probabilities = tf.nn.softmax(logits, axis=-1)
# log_probs = tf.nn.log_softmax(logits, axis=-1)
# one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
tf.logging.debug("num_labels = %s; logits = %s; labels = %s" % (num_labels, logits, labels))
# print("log_probs:",log_probs)
# per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) # 利用交叉熵就和
per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits)
loss = tf.reduce_mean(per_example_loss)
return (loss, per_example_loss, logits, probabilities)
I use it inside model_fn_builder, where the logits returned are used in the estimator
def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
num_train_steps, num_warmup_steps, use_tpu,
use_one_hot_embeddings):
"""Returns `model_fn` closure for TPUEstimator."""
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
"""The `model_fn` for TPUEstimator."""
tf.logging.info("*** Features ***")
for name in sorted(features.keys()):
tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape))
input_ids = features["input_ids"]
input_mask = features["input_mask"]
segment_ids = features["segment_ids"]
label_ids = features["label_ids"]
is_real_example = None
if "is_real_example" in features:
is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
else:
is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)
is_training = (mode == tf.estimator.ModeKeys.TRAIN)
(total_loss, per_example_loss, logits, probabilities) = create_model(
bert_config, is_training, input_ids, input_mask, segment_ids, label_ids,
num_labels, use_one_hot_embeddings)
tvars = tf.trainable_variables()
initialized_variable_names = {}
scaffold_fn = None
if init_checkpoint:
(assignment_map, initialized_variable_names
) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
if use_tpu:
def tpu_scaffold():
tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
return tf.train.Scaffold()
scaffold_fn = tpu_scaffold
else:
tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
tf.logging.info("**** Trainable Variables ****")
for var in tvars:
init_string = ""
if var.name in initialized_variable_names:
init_string = ", *INIT_FROM_CKPT*"
tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape,
init_string)
output_spec = None
if mode == tf.estimator.ModeKeys.TRAIN:
train_op = optimization.create_optimizer(
total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)
logging_hook = tf.train.LoggingTensorHook({"loss": total_loss,'precision:': t_precision,'recall:': t_recall}, every_n_iter=10)
output_spec = tf.contrib.tpu.TPUEstimatorSpec(
mode=mode,
loss=total_loss,
train_op=train_op,
training_hooks=[logging_hook],
scaffold_fn=scaffold_fn)
elif mode == tf.estimator.ModeKeys.EVAL:
def metric_fn(per_example_loss, label_ids, logits, is_real_example):
# print("###metric_fn.logits:",logits.shape) # (?,80)
# predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
# print("###metric_fn.label_ids:",label_ids.shape,";predictions:",predictions.shape) # label_ids: (?,80);predictions:(?,)
print(logits)
logits_split = tf.split(logits, args.num_classes,
axis=-1) # a list. length is num_classes
label_ids_split = tf.split(logits, args.num_classes,
axis=-1) # a list. length is num_classes
accuracy = tf.constant(0.0, dtype=tf.float64)
for j, logits in enumerate(logits_split): #
# accuracy = tf.metrics.accuracy(label_ids, predictions)
label_id_ = tf.cast(label_ids_split[j], dtype=tf.int32)
current_auc, update_op_auc = tf.metrics.auc(label_id_, logits)
# TP = tf.count_nonzero(logits * label_id_)
# TN = tf.count_nonzero((logits - 1) * (label_id_ - 1))
# FP = tf.count_nonzero(logits * (label_id_ - 1))
# FN = tf.count_nonzero((logits - 1) * label_id_)
# current_precision,update_op_precision = tf.metrics.Precision(label_id_, logits)
# current_recall,update_op_recall = tf.metrics.Recall(label_id_, logits)
prec,prec_op=precision(label_id_,logits)
rec,rec_op=recall(label_id_,logits)
f_1=f1(label_id_,logits)
eval_loss = tf.metrics.mean(values=per_example_loss)
return {
"eval_precision":(prec,prec_op),
"eval_recall" : (rec_op,rec_op),
"eval_auc" : (current_auc, update_op_auc),
"eval_loss": eval_loss,
}
eval_metrics = (metric_fn,
[per_example_loss, label_ids, logits, is_real_example])
output_spec = tf.contrib.tpu.TPUEstimatorSpec(
mode=mode,
loss=total_loss,
eval_metrics=eval_metrics,
scaffold_fn=scaffold_fn)
else:
output_spec = tf.contrib.tpu.TPUEstimatorSpec(
mode=mode,
predictions={"probabilities": probabilities},
scaffold_fn=scaffold_fn)
return output_spec
return model_fn
In my model_fn when estimator is in eval mode, I use the logits to calculate various metrics defined in metric_fn (defined inside model_fn_builder)
I am getting the following error in traceback:
ERROR:tensorflow:Error recorded from evaluation_loop: 2 root error(s) found.
(0) Invalid argument: assertion failed: [`predictions` contains negative values] [Condition x >= 0 did not holdelement-wise:] [x (Reshape:0) = ] [0 -1 -2...]
[[node confusion_matrix/assert_non_negative_1/assert_less_equal/Assert/AssertGuard/Assert (defined at /home/aditya_vartak/virtualenvs/anaconda3/envs/tf1/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
[[confusion_matrix_2/ones_like/_1429]]
(1) Invalid argument: assertion failed: [`predictions` contains negative values] [Condition x >= 0 did not holdelement-wise:] [x (Reshape:0) = ] [0 -1 -2...]
[[node confusion_matrix/assert_non_negative_1/assert_less_equal/Assert/AssertGuard/Assert (defined at /home/aditya_vartak/virtualenvs/anaconda3/envs/tf1/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
0 successful operations.
0 derived errors ignored.
I understand error is due to negative values in logits. My question is Why? and what is the workaround?
Edit 1: If question is vague, I want to add that I did apply sigmoid activation on the weighted sum of the last layer of pretrained BERT with shape [hidden_dimension,num_classes] , the outputs are stored in probablities, following which applied sigmoid_cross_entropy_with_logits.(as showin in create_model()). According to docs it returns value between 0,1 for each input. So how do probablities get negative value?I feel the problem is in metric_fn(). but not understanding what exactly it is
I'm trying to implement a network for MNIST dataset using custom estimators.
Here is my input function:
def input_train_fn():
train, test = tf.keras.datasets.mnist.load_data()
mnist_x, mnist_y = train
mnist_y = tf.cast(mnist_y, tf.int32)
mnist_x = tf.cast(mnist_x, tf.int32)
features = {'image': mnist_x}
labels = mnist_y
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
return dataset
Here is how I define my model:
def my_model(features, labels, mode, params):
# create net
net = tf.feature_column.input_layer(features, params['feature_columns'])
# create hidden layers
for unit in params['hidden_units']:
net = tf.layers.dense(net, unit, tf.nn.relu)
# create output layer
legits = tf.layers.dense(net, params['n_classes'], activation=None)
# predict (if in predict mode)
predicted_classes = tf.arg_max(legits, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'class_ids': predicted_classes,
'probabilities': tf.nn.softmax(legits),
'logits': legits
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
# define loss function
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=legits)
# evaluation metrics
accuracy = tf.metrics.accuracy(labels=labels,
predictions=predicted_classes,
name='acc_op')
metrics = {'accuracy': accuracy}
tf.summary.scalar('accuracy', accuracy[1])
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
And this is how I call the train function:
feature_columns = [tf.feature_column.numeric_column('image', shape=[28, 28], dtype=tf.int32), ]
classifier = tf.estimator.Estimator(model_fn=my_model,
params={
'feature_columns': feature_columns,
'hidden_units': [10, 10],
'n_classes': 10,
}, model_dir='/model')
classifier.train(input_fn=input_train_fn, steps=10)
As far as I can see i'm doing everything by the book both for estimators and feature_columns but I get the error:
ValueError: Cannot reshape a tensor with 784 elements to shape [28,784] (21952 elements) for 'input_layer/image/Reshape' (op: 'Reshape') with input shapes: [28,28], 2 and with input tensors computed as partial shapes: input1 = [28,784].
Is there anything I'm missing?
thanks in advance and any help appreciated.
First, you need to produce batches. For more detail see https://www.tensorflow.org/guide/datasets
...
dataset = tf.data.Dataset.from_tensor_slices((features, labels))
dataset = dataset.batch(size)
return dataset
Then reshape your image and cast to float. -1 is for batch_size, it will be substituted during training. Cast labels to float is optional depending on the datatype provided.
net = tf.cast(tf.reshape(features, [-1, 28*28]), tf.float32)
labels = tf.cast(labels, tf.int64)
net = tf.layers.dense(net, 10, tf.nn.relu)
legits = tf.layers.dense(net, 10, activation=None)
predicted_classes = tf.arg_max(legits, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'class_ids': predicted_classes,
'probabilities': tf.nn.softmax(legits),
'logits': legits
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=legits)
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(mode, loss=loss)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
classifier = tf.estimator.Estimator(model_fn=my_model)
classifier.train(input_fn=lambda: input_train_fn(), steps=10)
I build Neural network in tensorflow and trained it.
I extracted weights and bias from estimator
weights1 = self.model.get_variable_value('dense/kernel')
bias1 = self.model.get_variable_value('dense/bias')
weights2 = self.model.get_variable_value('dense_1/kernel')
bias1 = self.model.get_variable_value('dense_1/bias')
...
And I build MLP with numpy in python
layer1 = np.dot(inputs, weight1)
layer1 = np.add(layer1, bias1)
layer1 = np.maximum(layer1, layer1 * 0.2, layer1)
...
I used leaky_relu activation function, so I was implemented it too but the output is completely different with from tensorflow. I don't know what's wrong about it.
Edit)
def my_dnn_regression_fn(features, labels, mode, params):
top = tf.feature_column.input_layer(features, params["feature_columns"])
for units in params.get("hidden_units", [20]):
top = tf.layers.dense(inputs=top, units=units, activation=tf.nn.leaky_relu)
output_layer = tf.layers.dense(inputs=top, units=1)
predictions = tf.squeeze(output_layer, 1)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(
mode=mode, predictions={"label": predictions})
average_loss = tf.losses.mean_squared_error(labels, predictions)
batch_size = tf.shape(labels)[0]
total_loss = tf.to_float(batch_size) * average_loss
if mode == tf.estimator.ModeKeys.TRAIN:
mse = tf.metrics.mean_squared_error(labels, predictions)
rmse = tf.metrics.root_mean_squared_error(labels, predictions)
absolute_error = tf.metrics.mean_absolute_error(labels, predictions)
mre = tf.metrics.mean_relative_error(labels, predictions, labels)
tf.summary.scalar('mse', mse[1])
tf.summary.scalar('mre', mre[1])
tf.summary.scalar('rmse', rmse[1])
tf.summary.scalar('absolute', absolute_error[1])
# vars = tf.trainable_variables()
# lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in vars]) * 0.001
l1_regularizer = tf.contrib.layers.l1_regularizer(
scale=0.001, scope=None
)
weights = tf.trainable_variables() # all vars of your graph
lossL1 = tf.contrib.layers.apply_regularization(l1_regularizer, weights)
# average_loss = tf.add(average_loss, lossL2)
average_loss = tf.add(average_loss, lossL1)
total_loss = tf.to_float(batch_size) * average_loss
optimizer = params.get("optimizer", tf.train.AdamOptimizer)
optimizer = optimizer(params.get("learning_rate", None))
train_op = optimizer.minimize(
loss=average_loss, global_step=tf.train.get_global_step())
# eval_metrics = {"rmse": rmse, "absolute": absolute_error, "mre": mre}
eval_metrics = {"mse": mse, "rmse": rmse, "absolute": absolute_error, "mre": mre}
return tf.estimator.EstimatorSpec(
mode=mode, loss=total_loss, train_op=train_op, eval_metric_ops=eval_metrics)
assert mode == tf.estimator.ModeKeys.EVAL
mse = tf.metrics.mean_squared_error(labels, predictions)
rmse = tf.metrics.root_mean_squared_error(labels, predictions)
absolute_error = tf.metrics.mean_absolute_error(labels, predictions)
mre = tf.metrics.mean_relative_error(labels, predictions, labels)
eval_metrics = {"mse": mse, "rmse": rmse, "absolute": absolute_error, "mre": mre}
return tf.estimator.EstimatorSpec(
mode=mode,
loss=total_loss,
eval_metric_ops=eval_metrics)
My dnn regrassion code!!
I'm trying to build a model that uses sampled_softmax_loss and I can't seem to get the input tensors shaped properly for the function. Here is an example that as best I can tell should match the documentation, but throws this exception:
ValueError: Shape must be rank 2 but is rank 1 for
'sampled_softmax_loss/LogUniformCandidateSampler' (op:
'LogUniformCandidateSampler') with input shapes: [?].
Code:
import tensorflow as tf
import numpy as np
f1 = np.random.randint(low = 0, high = 4,size = 100)
labels = np.random.randint(low = 0, high = 5,size = 100)
f1_t = tf.feature_column.categorical_column_with_vocabulary_list('f1', vocabulary_list = [0,1,2,3])
base_columns = [f1_t]
feat_dict = {'f1' : f1}
def my_model_fn(
features,
labels,
mode,
params):
logits = tf.feature_column.linear_model(features, base_columns,units = params["n_classes"])
if mode == tf.estimator.ModeKeys.PREDICT:
predictions = {
'probabilities': tf.nn.softmax(logits),
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)
weights = [v for v in tf.global_variables() if v.name == 'linear_model/f1/weights:0'][0]
biases = [v for v in tf.global_variables() if v.name == 'linear_model/bias_weights:0'][0]
if mode == "train":
loss = tf.nn.sampled_softmax_loss(
weights=tf.transpose(weights),
biases=biases,
labels=labels,
inputs=logits,
num_classes = 5,
num_sampled= 11,
num_true=1,
partition_strategy="div")
elif mode == "eval":
None
# implement later
if mode == tf.estimator.ModeKeys.EVAL:
return tf.estimator.EstimatorSpec(
mode,
loss=loss,
)
optimizer = tf.train.FtrlOptimizer(learning_rate=.1,l2_regularization_strength=0.1)#AdagradOptimizer(0.001)
train_op = optimizer.minimize(
loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(
mode,
loss=loss,
train_op=train_op)
classifier = tf.estimator.Estimator(
model_fn = my_model_fn,
params = {
"feature_columns" : base_columns,
"n_classes" : 5
})
classifier.train(
input_fn = tf.estimator.inputs.numpy_input_fn(feat_dict,
labels,
batch_size = 3,
num_epochs=2,
shuffle=True))
If anyone could give me some pointers, I'll owe you virtual beer eternally.
Labels should be in the shape [batch, one_hot], e.g., [100, 6]
I have a model function which accepts Features, targets and mode but when I add tf.keras layers I'm currently getting Exception pred must be a Tensor, a Variable, or a Python bool.
But, When I run the same code with out using tf.keras but directly from keras(i.e. from keras.layers), It's working.
Code :
def model_fn(features, labels, mode):
if mode == tf.estimator.ModeKeys.TRAIN:
tf.keras.backend.set_learning_phase(1)
else:
tf.keras.backend.set_learning_phase(0)
input_feature = features['x']
table = lookup.index_table_from_file(vocabulary_file='vocab.txt', num_oov_buckets=1, default_value=-1)
text = tf.squeeze(input_feature, [1])
words = tf.string_split(text)
densewords = tf.sparse_tensor_to_dense(words, default_value=PADWORD)
numbers = table.lookup(densewords)
padding = tf.constant([[0, 0], [0, MAX_FEATURES]])
padded = tf.pad(numbers, padding)
sliced = tf.slice(padded, [0, 0], [-1, MAX_FEATURES])
print('words_sliced={}'.format(words))
#embeds = tf.keras.layers.Embedding(MAX_FEATURES, 50, input_length=MAX_FEATURES)(sliced)
embeds = tf.contrib.layers.embed_sequence(sliced, vocab_size=MAX_FEATURES, embed_dim=50)
print('words_embed={}'.format(embeds))
f1 = tf.keras.layers.Dropout(0.2)(embeds)
f1 = tf.keras.layers.Conv1D(filters, kernel_size, padding='valid', activation='relu', strides=1)(f1)
f1 = tf.keras.layers.GlobalAveragePooling1D()(f1)
# f1 = layers.BatchNormalization()(f1)
f1 = tf.keras.layers.Dense(hidden_dims)(f1)
f1 = tf.keras.layers.Dropout(0.5)(f1)
f1 = tf.keras.layers.Activation('relu')(f1)
logits = tf.keras.layers.Dense(11)(f1)
predictions_dict = {
'class': tf.argmax(logits, 1),
'prob': tf.nn.softmax(logits)
}
prediction_output = tf.estimator.export.PredictOutput({"classes": tf.argmax(input=logits, axis=1),
"probabilities": tf.nn.softmax(logits,
name="softmax_tensor")})
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions_dict, export_outputs={
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: prediction_output
})
# one_hot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=11)
loss = tf.losses.sparse_softmax_cross_entropy(labels, logits=logits)
if mode == tf.contrib.learn.ModeKeys.TRAIN:
train_op = tf.contrib.layers.optimize_loss(loss, tf.contrib.framework.get_global_step(), optimizer='Adam',
learning_rate=0.001)
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
eval_metrics_ops = {
'accuracy': tf.metrics.accuracy(labels=labels, predictions=predictions_dict['class'])
}
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics_ops)
When I execute the above script I'm getting an exception
TypeError: pred must be a Tensor, a Variable, or a Python bool.
But also when I used keras(from keras) directly without tf.keras it's working. What is going wrong here ?
Code :
if mode == tf.estimator.ModeKeys.TRAIN:
tf.keras.backend.set_learning_phase(True)
else:
tf.keras.backend.set_learning_phase(False)
Setting learning_phase = True or False is solving the problem.