Tensorflow: Global step must be from the same graph as loss - python

I'm trying to use Tensorflow to do some classification with the tf.contrib.layers package, and I've run into a problem I can't quite figure out. As far as I can tell from examples (e.g. this and it's tutorial), everything with the graph is handled by the API. I can download and run the same code in my environment perfectly well.
However, when I run my code, I get the an error that my global step is not from the same graph as my loss, which seems bizarre: ValueError: Tensor("global_step:0", shape=(), dtype=int64_ref) must be from the same graph as Tensor("softmax_cross_entropy_loss/value:0", shape=(), dtype=float32). The error occurs during the construction of the train_op
Here's my tensorflow code (I do have some other code for handling the loading of the data, but it doesn't use anything from tensorflow). Sorry that the code is sort of messy right now: I've been tearing it apart trying to figure this error out.
import numpy as np
import tensorflow as tf
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
import data # my data loading module
def train(training_file, vocab_path, hidden_units=[10, 20, 10], estimator=tf.contrib.learn.DNNClassifier):
"""
Given a training CSV file, train a Tensorflow neural network
"""
training_set = data.load(training_file)
vocab = tf.contrib.learn.preprocessing.VocabularyProcessor(data.DOC_LENGTH)
vocab = vocab.restore(vocab_path)
training_data = tf.one_hot(training_set.data, len(vocab.vocabulary_._mapping), dtype=tf.float32)
training_targets = tf.constant(np.array(training_set.targets, dtype=np.int32))
classifier = tf.contrib.learn.Estimator(model_fn=lambda features, targets, mode, params: model_fn(features, targets, mode, params, hidden_units))
classifier.fit(input_fn=lambda: (training_data, training_targets), steps=2000)
return classifier
def model_fn(features, targets, mode, params, hidden_units):
if len(hidden_units) <= 0:
raise ValueError("Hidden units must be a iterable of ints of length >= 1")
# Define the network
network = tf.contrib.layers.relu(features, hidden_units[0])
for i in range(1, len(hidden_units)):
network = tf.contrib.layers.relu(network, hidden_units[i])
# Flatten the network
network = tf.reshape(network, [-1, hidden_units[-1] * data.DOC_LENGTH])
# Add dropout to enhance feature use
network = tf.layers.dropout(inputs=network, rate=0.5, training=mode == tf.contrib.learn.ModeKeys.TRAIN)
# Calculate the logits
logits = tf.contrib.layers.fully_connected(network, 15)
loss = None
train_op = None
if mode != tf.contrib.learn.ModeKeys.INFER:
targets = tf.cast(tf.one_hot(targets, 15, 1, 0), dtype=tf.float32)
loss = tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=targets)
if mode == tf.contrib.learn.ModeKeys.TRAIN:
# This train_op causes the error
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=tf.train.get_global_step(),
optimizer='Adam',
learning_rate=0.01)
predictions = {
"classes": tf.argmax(input=logits, axis=1),
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
return model_fn_lib.ModelFnOps(mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def main(unusedargv):
# ... parses arguments
classifier = train(args.train_data, args.vocab)
print(evaluate(classifier, args.train_data))
print(evaluate(classifier, args.test_data))
if __name__ == "__main__":
tf.app.run()
Here's the full stack trace:
File "categorize.py", line 126, in main
classifier = train(args.train_data, args.vocab)
File "categorize.py", line 39, in train
classifier.fit(input_fn=lambda: (training_data, training_targets), steps=2000)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 280, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 426, in fit
loss = self._train_model(input_fn=input_fn, hooks=hooks)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 934, in _train_model
model_fn_ops = self._call_legacy_get_train_ops(features, labels)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1003, in _call_legacy_get_train_ops
train_ops = self._get_train_ops(features, labels)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1162, in _get_train_ops
return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1133, in _call_model_fn
model_fn_results = self._model_fn(features, labels, **kwargs)
File "categorize.py", line 37, in <lambda>
classifier = tf.contrib.learn.Estimator(model_fn=lambda features, targets, mode, params: model_fn(features, targets, mode, params, hidden_units))
File "categorize.py", line 73, in model_fn
learning_rate=0.01)
File "/usr/local/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/optimizers.py", line 152, in optimize_loss
with vs.variable_scope(name, "OptimizeLoss", [loss, global_step]):
File "/usr/local/Cellar/python3/3.6.0_1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/contextlib.py", line 82, in __enter__
return next(self.gen)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 1410, in variable_scope
g = ops._get_graph_from_inputs(values) # pylint: disable=protected-access
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3968, in _get_graph_from_inputs
_assert_same_graph(original_graph_element, graph_element)
File "/usr/local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3907, in _assert_same_graph
"%s must be from the same graph as %s." % (item, original_item))
ValueError: Tensor("global_step:0", shape=(), dtype=int64_ref) must be from the same graph as Tensor("softmax_cross_entropy_loss/value:0", shape=(), dtype=float32).
Here's my code:

The context of two functions are different, so, you need to use the tf.Graph() in the calling function to set the default graph as follows.
def train(...):
with tf.Graph().as_default():
...
...
training_data = tf.one_hot(training_set.data, len(vocab.vocabulary_._mapping), dtype=tf.float32)
training_targets = tf.constant(np.array(training_set.targets, dtype=np.int32))
classifier = tf.contrib.learn.Estimator(model_fn=lambda features, targets, mode, params: model_fn(features, targets, mode, params, hidden_units))
classifier.fit(input_fn=lambda: (training_data, training_targets), steps=2000)
return classifier

I figured out the problem! This may specified have to do with the Estimator interface, but basically I needed to move my tensorflow variable definition into the Estimator. I ended up making a method to do this, but it also worked when I defined the variables in the lambda:
def train(training_file, vocab_path, hidden_units=[10, 20, 10]):
"""
Given a training CSV file, train a Tensorflow neural network
"""
training_set = data.load(training_file)
vocab = tf.contrib.learn.preprocessing.VocabularyProcessor(data.DOC_LENGTH)
vocab = vocab.restore(vocab_path)
# Note not defining the variables here
training_data = training_set.data
training_targets = np.array(training_set.targets, dtype=np.int32)
classifier = tf.contrib.learn.Estimator(model_fn=lambda features, targets, mode, params: model_fn(features, targets, mode, params, hidden_units))
# Note the variable definition here
classifier.fit(
input_fn=lambda:
(tf.one_hot(training_data, len(vocab.vocabulary_._mapping), dtype=tf.float32)
tf.constant(training_targets)),
steps=2000))
return classifier

Related

Optimising model.parameters and custom learnable parameter together using torch.optim gives non-leaf tensor error

Framework: PyTorch
I am trying to optimise a custom nn.parameter(Temperature) used in softmax calculation along with the model parameters using a single Adam optimiser while model training. But doing so gives the following error:
ValueError: can't optimize a non-leaf Tensor
Here is my custom loss function:
class CrossEntropyLoss2d(torch.nn.Module):
def __init__(self, weight=None):
super().__init__()
self.temperature = torch.nn.Parameter(torch.ones(1, requires_grad=True, device=device))
self.loss = torch.nn.NLLLoss(weight)
self.loss.to(device)
def forward(self, outputs, targets):
T_logits = self.temp_scale(outputs)
return self.loss(torch.nn.functional.log_softmax(T_logits, dim=1), targets)
def temp_scale(self, logits):
temp = self.temperature.unsqueeze(1).expand(logits.size(1), logits.size(2), logits.size(3))
return logits/temp
.
.
.
.
.
.
Here is the part of training code:
criterion = CrossEntropyLoss2d(weight)
params = list(model.parameters()) +list(criterion.temperature)
optimizer = Adam(params, 5e-4, (0.9, 0.999), eps=1e-08, weight_decay=1e-4)
Error:
File "train_my_net_city.py", line 270, in train
optimizer = Adam(params, 5e-4, (0.9, 0.999), eps=1e-08, weight_decay=1e-4)
File "/home/saquib/anaconda3/lib/python3.8/site-packages/torch/optim/adam.py", line 48, in __init__
super(Adam, self).__init__(params, defaults)
File "/home/saquib/anaconda3/lib/python3.8/site-packages/torch/optim/optimizer.py", line 54, in __init__
self.add_param_group(param_group)
File "/home/saquib/anaconda3/lib/python3.8/site-packages/torch/optim/optimizer.py", line 257, in add_param_group
raise ValueError("can't optimize a non-leaf Tensor")
ValueError: can't optimize a non-leaf Tensor
Checking the variable for leaf gives true:
print(criterion.temperature.is_leaf)
True
The error arises due to the criterion.temperature parameter and not due to model.parameters.
Got it working by doing so:
params = list(model.parameters())
params.append(criterion.temperature)

TensorFlow graph error in Estimator (ValueError: Tensor (...) must be from the same graph as Tensor(...))

UPDATE: Testing the same code with tensorflow-gpu 1.13.1 works both on my PC and on Google Cloud.
Using TensorFlow Estimator and running train_and_evaluate gives me the following error message:
"ValueError: Tensor("Const:0", shape=(3,), dtype=float32) must be from the same graph as Tensor("ParallelMapDataset:0", shape=(), dtype=variant, device=/device:CPU:0)." (see the full error output near bottom)
This happens when training the CNN on my PC with a GPU (GeForge RTX 2070). I am using Python 3.7 with tensorflow-gpu/tensorflow 1.14.0, Keras 2.2.4, running in a Conda environment.
It happens after the following log message "... Saving checkpoints for 2716 into C:/EstimatorOutput/10/model.ckpt." and appear to be when the input function for the evaluation step is being processed.
The code, as it is now, has run previously with no issue, but this has suddenly changed for reasons that are unclear to me.
I ran similar code on Google Cloud (which also previously ran fine), and the same problem occur (see error output near bottom; Run on GPU (BASIC_GPU); TensorFlow 1.14; Keras 2.2.4)
The error seems to be related to the evaluation step when the graph is created for some reason the new graph is not compatible.
Here is my code - >
My task module:
import tensorflow as tf
from train_model import model #("train_model" is local folder)
from train_model.model import create_estimator
if __name__ == '__main__':
model_num = 10
# Throw properties into params dict to pass to other functions
params = {}
params['train csv'] = "train_set_local.csv"
params['eval csv'] = "eval_set_local.csv"
params['output path'] = "C:/EstimatorOutput/" + str(model_num) + "/"
params['data path'] = "C:/Databases/Birds_dB/Images"
params['image size'] = [244, 224]
params["batch size"] = 16*2
params['use random flip'] = True
params['learning rate'] = 0.000001
params['dropout rate'] = 0.50
params['num classes'] = 123
params['train steps'] = 65000
params['eval steps'] = 20
params['eval_throttle_secs'] = 600
params['num parallel calls'] = 4
# Run the training job
model.go_train(params) # (See "go_train" below in model script ->)
My model module
import tensorflow as tf
from tensorflow.python.keras import estimator as kes
from tensorflow.python.keras.applications.vgg16 import VGG16
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dropout, Flatten, Dense
from train_model.input_fn import make_input_fn
def create_estimator(params):
# Import VGG16 model for transfer learning
base_model = VGG16(weights='imagenet')
base_model.summary()
x = base_model.get_layer('fc2').output
x = Dropout(params['dropout rate'])(x)
predictions = Dense(params['num classes'], activation="sigmoid", name="sm_out")(x)
model = Model(inputs=base_model.input, outputs=predictions)
for layer in model.layers:
layer.trainable = True
model.compile(
loss="binary_crossentropy",
optimizer=tf.train.AdamOptimizer(params['learning rate'],
beta1=0.9,
beta2=0.999),
metrics=["categorical_accuracy"]
)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.95
run_config = tf.estimator.RunConfig(
session_config=config,
model_dir=params['output path']
)
# Convert to Estimator
estimator_model = kes.model_to_estimator(
keras_model=model,
config=run_config
)
return estimator_model
def go_train(params):
# Create the estimator
Est = create_estimator(params)
# Set up Estimator train and evaluation specifications
train_spec = tf.estimator.TrainSpec(
input_fn=make_input_fn(params['train csv'], tf.estimator.ModeKeys.TRAIN, params, augment=True),
max_steps=params['train steps']
)
eval_spec = tf.estimator.EvalSpec(
input_fn=make_input_fn(params['eval csv'], tf.estimator.ModeKeys.EVAL, params, augment=True),
steps=params['eval steps'], # Evaluates on "eval steps" batches
throttle_secs=params['eval_throttle_secs']
)
# Run training and evaluation
tf.estimator.train_and_evaluate(Est, train_spec, eval_spec)
My input module:
import tensorflow as tf
from keras.applications.vgg16 import preprocess_input
tf.logging.set_verbosity(v=tf.logging.INFO)
HEIGHT = 224
WIDTH = 224
NUM_CHANNELS = 3
NCLASSES = 123
def read_and_preprocess_with_augment(image_bytes, label=None):
return read_and_preprocess(image_bytes, label, augment=True)
def read_and_preprocess(image_bytes, label=None, augment=False):
image = tf.image.decode_jpeg(contents=image_bytes, channels=NUM_CHANNELS)
image = tf.image.convert_image_dtype(image=image, dtype=tf.float32) # 0-1
image = tf.expand_dims(input=image, axis=0) # resize_bilinear needs batches
if augment:
# Resize to slightly larger than target size
image = tf.image.resize_bilinear(images=image, size=[HEIGHT + 50, WIDTH + 50], align_corners=False)
# Image random rotation
degree_angle = tf.random.uniform((), minval=-25, maxval=25, dtype=tf.dtypes.float32)
radian = degree_angle * 3.14 / 180
image = tf.contrib.image.rotate(image, radian, interpolation='NEAREST')
# remove batch dimension
image = tf.squeeze(input=image, axis=0)
# Random Crop
image = tf.random_crop(value=image, size=[HEIGHT, WIDTH, NUM_CHANNELS])
# Random L-R flip
image = tf.image.random_flip_left_right(image=image)
# Random brightness
image = tf.image.random_brightness(image=image, max_delta=63.0 / 255.0)
# Random contrast
image = tf.image.random_contrast(image=image, lower=0.2, upper=1.8)
else:
image = tf.image.resize_bilinear(images=image, size=[HEIGHT, WIDTH], align_corners=False)
image = tf.squeeze(input=image, axis=0) # remove batch dimension
image = tf.cast(tf.round(image * 255), tf.int32)
image = preprocess_input(image)
label = tf.one_hot(tf.strings.to_number(label, out_type=tf.int32), depth=NCLASSES)
return {"input_1": image}, label
def make_input_fn(csv_of_filenames, mode, params, augment=False):
def _input_fn():
def decode_csv(csv_row):
filename, label = tf.decode_csv(records=csv_row, record_defaults=[[""], [""]])
image_bytes = tf.read_file(filename=filename)
return image_bytes, label
# Create tf.data.dataset from filename
dataset = tf.data.TextLineDataset(filenames=csv_of_filenames).map(map_func=decode_csv, num_parallel_calls=params['num parallel calls'])
if augment:
dataset = dataset.map(map_func=read_and_preprocess_with_augment, num_parallel_calls=params['num parallel calls'])
else:
dataset = dataset.map(map_func=read_and_preprocess, num_parallel_calls=params['num parallel calls'])
if mode == tf.estimator.ModeKeys.TRAIN:
num_epochs = None
dataset = dataset.shuffle(buffer_size=10*params["batch size"])
else:
num_epochs = 1
dataset = dataset.repeat(count=num_epochs).batch(batch_size=params["batch size"]).prefetch(4)
images, labels = dataset.make_one_shot_iterator().get_next()
return images, labels
return _input_fn
Error output on PC
As mentioned, the above code when running locally on my GPU results is this series of error messages(abbreviated):
Saving checkpoints for 2716 into ....
...
...
File "C:...\estimator.py", line 501, in _evaluate
self._evaluate_build_graph(input_fn, hooks, checkpoint_path))
File "C:...\estimator.py", line 1501, in _evaluate_build_graph
self._call_model_fn_eval(input_fn, self.config))
File "C:...\estimator.py", line 1534, in _call_model_fn_eval
input_fn, ModeKeys.EVAL)
File "C:...\estimator.py", line 1022, in _get_features_and_labels_from_input_fn
self._call_input_fn(input_fn, mode))
File "C:...\estimator.py", line 1113, in _call_input_fn
return input_fn(**kwargs)
File "C:...\input_fn.py", line 71, in _input_fn
dataset = dataset.map(map_func=read_and_preprocess_with_augment, num_parallel_calls=params['num parallel calls'])
File "C:...dataset_ops.py", line 1776, in map
self, map_func, num_parallel_calls, preserve_cardinality=False))
File "C:...\dataset_ops.py", line 3239, in init
**flat_structure(self))
File "C:...\gen_dataset_ops.py", line 4179, in parallel_map_dataset
name=name)
File "C:...\op_def_library.py", line 366, in _apply_op_helper
g = ops._get_graph_from_inputs(_Flatten(keywords.values()))
File "C:...\ops.py", line 6135, in _get_graph_from_inputs
_assert_same_graph(original_graph_element, graph_element)
File "C:...ops.py", line 6071, in _assert_same_graph
(item, original_item))
ValueError: Tensor("Const:0", shape=(3,), dtype=float32) must be from the same graph as Tensor("ParallelMapDataset:0", shape=(), dtype=variant, device=/device:CPU:0).
Error output on Google Cloud
service
The replica master 0 exited with a non-zero status of 1.
Traceback (most recent call last): [...]
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1534, in _call_model_fn_eval input_fn, ModeKeys.EVAL)
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1022, in _get_features_and_labels_from_input_fn self._call_input_fn(input_fn, mode))
File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1113, in _call_input_fn return input_fn(**kwargs)
File "/root/.local/lib/python3.5/site-packages/train_model/input_fn.py", line 87, in _input_fn dataset = dataset.map(map_func=read_and_preprocess_with_augment, num_parallel_calls=params['num parallel calls'])
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 1776, in map self, map_func, num_parallel_calls, preserve_cardinality=False))
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 3239, in init **flat_structure(self)) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_dataset_ops.py", line 4179, in parallel_map_dataset name=name) File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 366, in _apply_op_helper g = ops._get_graph_from_inputs(_Flatten(keywords.values()))
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 6135, in _get_graph_from_inputs _assert_same_graph(original_graph_element, graph_element)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 6071, in _assert_same_graph (item, original_item))
ValueError: Tensor("Const_1:0", shape=(3,), dtype=float32, device=/device:CPU:0) must be from the same graph as Tensor("ParallelMapDataset:0", shape=(), dtype=variant, device=/device:CPU:0).
Any help/hint is much appreciated. I am stuck at this point and do not know how to debug this one!
use this preprocess function:
from tensorflow.keras.applications.mobilenet import preprocess_input
It has same functionality to that of VGGs preprocess input.

How to read multiple .mat files (which are too large to fit in memory) using tensorflow dataset

I have around 550K samples, each sample being 200x50x1. The size of this dataset is around 57GB.
I want to train a network on this set but I am having trouble reading it.
batch_size=8
def _read_py_function(filename,labels_slice):
with h5py.File(filename, 'r') as f:
data_slice = np.asarray(f['feats'])
print(data_slice.shape)
return data_slice, labels_slice
placeholder_files = tf.placeholder(tf.string, [None])
placeholder_labels = tf.placeholder(tf.int32, [None])
dataset = tf.data.Dataset.from_tensor_slices((placeholder_files,placeholder_labels))
dataset = dataset.map(
lambda filename, label: tuple(tf.py_func(
_read_py_function, [filename,label], [tf.uint8, tf.int32])))
dataset = dataset.shuffle(buffer_size=50000)
dataset = dataset.batch(batch_size)
iterator = tf.data.Iterator.from_structure(dataset.output_types, dataset.output_shapes)
data_X, data_y = iterator.get_next()
data_y = tf.cast(data_y, tf.int32)
net = conv_layer(inputs=data_X,num_outputs=8, kernel_size=3, stride=2, scope='rcl_0')
net = pool_layer(inputs=net,kernel_size=2,scope='pl_0')
net = dropout_layer(inputs=net,scope='dl_0')
net = flatten_layer(inputs=net,scope='flatten_0')
net = dense_layer(inputs=net,num_outputs=256,scope='dense_0')
net = dense_layer(inputs=net,num_outputs=64,scope='dense_1')
out = dense_layer(inputs=net,num_outputs=10,scope='dense_2')
And I run the session using :
sess.run(train_iterator, feed_dict = {placeholder_files: filenames, placeholder_labels: ytrain})
try:
while True:
_, loss, acc = sess.run([train_op, loss_op, accuracy_op])
train_loss += loss
train_accuracy += acc
except tf.errors.OutOfRangeError:
pass
But I am getting the error even before running the session :
Traceback (most recent call last):
File "SFCC-trial-134.py", line 297, in <module>
net = rcnn_layer(inputs=data_X,num_outputs=8, kernel_size=3, stride=2, scope='rcl_0')
File "SFCC-trial-134.py", line 123, in rcnn_layer
reuse=False)
File "SFCC-trial-134.py", line 109, in conv_layer
reuse = reuse
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args
return func(*args, **current_args)
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1154, in convolution2d
conv_dims=2)
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args
return func(*args, **current_args)
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1025, in convolution
(conv_dims + 2, input_rank))
TypeError: %d format: a number is required, not NoneType
I though about using TFRecords but had a hard time creating those. Couldn't find a good post where I learn to create them for my kind of dataset.
conv_layer is defined as follows :
def conv_layer(inputs, num_outputs, kernel_size, stride, normalizer_fn=None, activation_fn=nn.relu, trainable=True, scope='noname', reuse=False):
net = slim.conv2d(inputs = inputs,
num_outputs = num_outputs,
kernel_size = kernel_size,
stride = stride,
normalizer_fn = normalizer_fn,
activation_fn = activation_fn,
trainable = trainable,
scope = scope,
reuse = reuse
)
return net
Do not pass tf.py_func inside your map function. You can read the file image by passing the function name directly inside your map function. I am posing only the relevant parts of the code.
def _read_py_function(filename, label):
return tf.zeros((224, 224, 3), dtype=tf.float32), tf.ones((1,), dtype=tf.int32)
dataset = dataset.map(lambda filename, label: _read_py_function(filename, label))
Another change is your iterator will expect only floating point of input. So you will have to change your tf.uint8 type of output to float.

Tensorflow estimator error: 'Tensor' object is not iterable

My code uses keras model and a tf dataset and loads image files from disk.
When running this code:
import tensorflow as tf
import os
# 函数的功能时将filename对应的图片文件读进来,并缩放到统一的大小
def _parse_function(filename, label):
image_string = tf.read_file(filename)
#image_string=tf.gfile.FastGFile(filename).read()
image_decoded = tf.image.decode_png(image_string)
image_resized = tf.image.resize_images(image_decoded, [28, 28])
return image_resized, label
# 图片文件的列表
pics=os.listdir("D:/kaggle/flower/data/train/daisy")
print(pics)
filenames = tf.constant(["D:/kaggle/flower/data/train/daisy/"+e for e in pics])
# label[i]就是图片filenames[i]的label
labels = tf.constant([0]*len(pics))
# 此时dataset中的一个元素是(filename, label)
dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
dataset = dataset.map(_parse_function)
# 此时dataset中的一个元素是(image_resized, label)
def from_dataset(ds):
return lambda: ds.make_one_shot_iterator().get_next()
# 此时dataset中的一个元素是(image_resized_batch, label_batch)
dataset = dataset.shuffle(buffer_size=1000).batch(32).repeat()
print(dataset.output_shapes)
# iterator = dataset.make_one_shot_iterator()
# one_element = iterator.get_next()
keras_inception_v3 = tf.keras.applications.inception_v3.InceptionV3(weights=None)
# Compile model with the optimizer, loss, and metrics you'd like to train with.
keras_inception_v3.compile(optimizer=tf.keras.optimizers.SGD(lr=0.0001, momentum=0.9),
loss='categorical_crossentropy',
metric='accuracy')
# Create an Estimator from the compiled Keras model. Note the initial model
# state of the keras model is preserved in the created Estimator.
est_inception_v3 = tf.keras.estimator.model_to_estimator(keras_model=keras_inception_v3)
est_inception_v3.train(input_fn=from_dataset(dataset),steps=10)
eval_result = est_inception_v3.evaluate(input_fn=from_dataset(dataset))
print(eval_result)
I get the following error:
Traceback (most recent call last):
File "C:/Users/lxm1042642197/PycharmProjects/models/samples/dt.py", line 54, in <module>
est_inception_v3.train(input_fn=from_dataset(dataset),steps=10)
File "C:\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 302, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 711, in _train_model
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "C:\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 694, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "C:\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\estimator.py", line 145, in model_fn
labels)
File "C:\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\estimator.py", line 92, in _clone_and_build_model
keras_model, features)
File "C:\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\estimator.py", line 58, in _create_ordered_io
for key in estimator_io_dict:
File "C:\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 505, in __iter__
raise TypeError("'Tensor' object is not iterable.")
TypeError: 'Tensor' object is not iterable.

SystemError: <built-in function TF_Run> returned a result with an error set

I want to implement NER task with TensorFlow, so I divided the conll2003 corpus
into train_set and valid_set, and the train_set and valid_set has the same format. The I run the code below on train_set(in the code it is called train_reader )
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
from nlpfromscratch.vocab import MultiVocab
from nlpfromscratch.csvreader import CSVReader
from nlpfromscratch.embeds import Embeddings
from nlpfromscratch.mlp import linear
from nlpfromscratch.loss import reg_softmax_loss
from nlpfromscratch.args import parser
from nlpfromscratch.convnet import conv_max
from nlpfromscratch.evaluation import metrics, prf_eval
import os
train_path = "E:\\github\\nlpfromscratch-tf\\data\\conll2003\\train_ner_w3.csv"
valid_path = "E:\\github\\nlpfromscratch-tf\\data\\conll2003\\valid_ner_w3.csv"
log_dir = "E:\\github\\nlpfromscratch-tf\\data\\conll2003\\"
batch_size = 32
vocab_path = "E:\\github\\nlpfromscratch-tf\\data\\conll2003\\vocab.json"
word_dim = 50
feat_dim = 8
n_hidden = 300
lambda_ = 0.001
init = 0.01
max_epochs = 20
train_reader = CSVReader(train_path, batch_size) #read the data
valid_reader = CSVReader(valid_path, batch_size)
seq_len = train_reader.seq_len
num_feats = train_reader.num_feats
chkpt = os.path.join(log_dir, 'nlpfromscratch')
with tf.Graph().as_default():
sess = tf.Session()
global_step = tf.contrib.framework.get_or_create_global_step()
tokens_pl = tf.placeholder(tf.string, (batch_size, seq_len))
features_pl = tf.placeholder(tf.string, (batch_size, seq_len, num_feats))
labels_pl = tf.placeholder(tf.string, (batch_size, ))
multi_vocab = MultiVocab(vocab_path)
label_lookup = multi_vocab.labels.lookup(labels_pl)
metrics(multi_vocab._labels)
embeddings = Embeddings(multi_vocab, word_dim, feat_dim, num_feats)
encoded_input = embeddings.encode(tokens_pl, features_pl)
wx_plus_b = linear(encoded_input, n_hidden, 'hidden', lambda_)
hidden = tf.nn.relu(wx_plus_b)
logits = linear(hidden, multi_vocab.num_classes, 'classify', lambda_)
with tf.name_scope('predict'):
predict = multi_vocab.labels_inv.lookup(
tf.argmax(tf.nn.softmax(logits), axis=1))
loss = reg_softmax_loss(logits, label_lookup)
grad = tf.train.GradientDescentOptimizer(init)
train_op = grad.minimize(loss,global_step = global_step)
summary = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
embeddings.embed_visualization(summary_writer, log_dir, vocab_path)
saver = tf.train.Saver( )
sess.run(tf.global_variables_initializer())
sess.run(tf.tables_initializer())
for tokens, features, labels in train_reader.batcher(max_epochs):
feed_dict = {tokens_pl:tokens, features_pl:features, labels_pl:labels}
summ, step_loss, gs, _ =sess.run([summary, loss, global_step, train_op],
feed_dict=feed_dict)
if gs % 10 == 0:
print(gs, train_reader.epoch, max_epochs, step_loss)
it had no problem at all.But when I changed the train_set into valid_set.
predict = multi_vocab.labels_inv.lookup(tf.argmax(tf.nn.softmax(logits), axis=1))
for tokens, features, truths, in valid_reader.batcher(1): # only one epoch for validation
feed_dict = {tokens_pl:tokens, features_pl:features}
predictions = sess.run(predict, feed_dict=feed_dict)
it went wrong and returned:
Traceback (most recent call last):
File "<ipython-input-21-a79489f6e284>", line 1, in <module>
runfile('E:/github/nlpfromscratch-tf/train.py',wdir='E:/github/nlpfromscratch-tf')
File "F:\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 714, in runfile
execfile(filename, namespace)
File "F:\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 89, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "E:/github/nlpfromscratch-tf/train.py", line 97, in <module>
features_pl)
File "E:\github\nlpfromscratch-tf\nlpfromscratch\evaluation.py", line 35, in prf_eval
predictions = sess.run(predict, feed_dict=feed_dict)
File "F:\Anaconda\lib\site-packages\tensorflow\python\client\session.py", line 767, in run
run_metadata_ptr)
File "F:\Anaconda\lib\site-packages\tensorflow\python\client\session.py", line 965, in _run
feed_dict_string, options, run_metadata)
File "F:\Anaconda\lib\site-packages\tensorflow\python\client\session.py", line 1015, in _do_run
target_list, options, run_metadata)
File "F:\Anaconda\lib\site-packages\tensorflow\python\client\session.py", line 1022, in _do_call
return fn(*args)
File "F:\Anaconda\lib\site-packages\tensorflow\python\client\session.py", line 1004, in _run_fn
status, run_metadata)
SystemError: <built-in function TF_Run> returned a result with an error set
First I thought it maybe wrong with my code for validation. So I used the valid_set for training like this:
for tokens, features, labels in valid_reader.batcher(max_epochs):
feed_dict = {tokens_pl:tokens, features_pl:features, labels_pl:labels}
summ, step_loss, gs, _ =sess.run([summary, loss, global_step, train_op],
feed_dict=feed_dict)
It returned the same error! So I am quite confused why the the train_set has no problems but the valid_set goes wrong when they has total the same format.

Categories

Resources