Related
I'm working on a simple mlp model. The input shape for model fitting is here.
fea_train_np.shape = (6000, 1, 15, 21, 512)
fea_val_np.shape = (1500, 1, 15, 21, 512)
y_train_np.shape = (6000, 2)
y_val_np.shape = (1500, 2)
And here is the mlp I'm working on. The last layer using linear activation as I want to do regression instead of classification.
mlp1 = keras.Sequential(
[
layers.Flatten(),
layers.Dense(256, activation='relu'), # Add a fully-connecte layer with 16 units and relu activation function as the hidden layer
layers.Dense(10, activation='linear')
],
)
mlp1.compile(optimizer = optimizers.Adam(learning_rate = 0.001),
loss = keras.losses.MeanSquaredError(),
metrics = [keras.metrics.MeanSquaredError()])
mlp = mlp1.fit(fea_train_np, y_train_np, epochs=20, batch_size=8, validation_data=(fea_val_np, y_val_np))
result = mlp.predict(fea_val_np, y_val_np)
And I got this error when I was trying to fit my model:
Train on 6000 samples, validate on 1500 samples
Epoch 1/20
8/6000 [..............................] - ETA: 12s
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1618 try:
-> 1619 c_op = c_api.TF_FinishOperation(op_desc)
1620 except errors.InvalidArgumentError as e:
InvalidArgumentError: Dimensions must be equal, but are 10 and 2 for 'loss/output_1_loss/SquaredDifference' (op: 'SquaredDifference') with input shapes: [8,10], [8,2].
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-32-37335a6a8cd3> in <module>
11 metrics = [keras.metrics.MeanSquaredError()])
12
---> 13 mlp = mlp1.fit(fea_train_np, y_train_np, epochs=20, batch_size=8, validation_data=(fea_val_np, y_val_np))
14 result = mlp.predict(fea_val_np, y_val_np)
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
340 mode=ModeKeys.TRAIN,
341 training_context=training_context,
--> 342 total_epochs=epochs)
343 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
344
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
126 step=step, mode=mode, size=current_batch_size) as batch_logs:
127 try:
--> 128 batch_outs = execution_function(iterator)
129 except (StopIteration, errors.OutOfRangeError):
130 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
96 # `numpy` translates Tensors to values in Eager mode.
97 return nest.map_structure(_non_none_constant_value,
---> 98 distributed_function(input_fn))
99
100 return execution_function
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
566 xla_context.Exit()
567 else:
--> 568 result = self._call(*args, **kwds)
569
570 if tracing_count == self._get_tracing_count():
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
613 # This is the first call of __call__, so we have to initialize.
614 initializers = []
--> 615 self._initialize(args, kwds, add_initializers_to=initializers)
616 finally:
617 # At this point we know that the initialization is complete (or less
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
495 self._concrete_stateful_fn = (
496 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 497 *args, **kwds))
498
499 def invalid_creator_scope(*unused_args, **unused_kwds):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2387 args, kwargs = None, None
2388 with self._lock:
-> 2389 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2390 return graph_function
2391
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\function.py in _maybe_define_function(self, args, kwargs)
2701
2702 self._function_cache.missed.add(call_context_key)
-> 2703 graph_function = self._create_graph_function(args, kwargs)
2704 self._function_cache.primary[cache_key] = graph_function
2705 return graph_function, args, kwargs
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2591 arg_names=arg_names,
2592 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2593 capture_by_value=self._capture_by_value),
2594 self._function_attributes,
2595 # Tell the ConcreteFunction to clean up its graph once it goes out of
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
976 converted_func)
977
--> 978 func_outputs = python_func(*func_args, **func_kwargs)
979
980 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\def_function.py in wrapped_fn(*args, **kwds)
437 # __wrapped__ allows AutoGraph to swap in a converted function. We give
438 # the function a weak reference to itself to avoid a reference cycle.
--> 439 return weak_wrapped_fn().__wrapped__(*args, **kwds)
440 weak_wrapped_fn = weakref.ref(wrapped_fn)
441
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in distributed_function(input_iterator)
83 args = _prepare_feed_values(model, input_iterator, mode, strategy)
84 outputs = strategy.experimental_run_v2(
---> 85 per_replica_function, args=args)
86 # Out of PerReplica outputs reduce or pick values to return.
87 all_outputs = dist_utils.unwrap_output_dict(
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
761 fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
762 convert_by_default=False)
--> 763 return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
764
765 def reduce(self, reduce_op, value, axis):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
1817 kwargs = {}
1818 with self._container_strategy().scope():
-> 1819 return self._call_for_each_replica(fn, args, kwargs)
1820
1821 def _call_for_each_replica(self, fn, args, kwargs):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
2162 self._container_strategy(),
2163 replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
-> 2164 return fn(*args, **kwargs)
2165
2166 def _reduce_to(self, reduce_op, value, destinations):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\autograph\impl\api.py in wrapper(*args, **kwargs)
290 def wrapper(*args, **kwargs):
291 with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
--> 292 return func(*args, **kwargs)
293
294 if inspect.isfunction(func) or inspect.ismethod(func):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics, standalone)
431 y,
432 sample_weights=sample_weights,
--> 433 output_loss_metrics=model._output_loss_metrics)
434
435 if reset_metrics:
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
310 sample_weights=sample_weights,
311 training=True,
--> 312 output_loss_metrics=output_loss_metrics))
313 if not isinstance(outs, list):
314 outs = [outs]
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
251 output_loss_metrics=output_loss_metrics,
252 sample_weights=sample_weights,
--> 253 training=training))
254 if total_loss is None:
255 raise ValueError('The model cannot be run '
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
165
166 if hasattr(loss_fn, 'reduction'):
--> 167 per_sample_losses = loss_fn.call(targets[i], outs[i])
168 weighted_losses = losses_utils.compute_weighted_loss(
169 per_sample_losses,
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\losses.py in call(self, y_true, y_pred)
219 y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
220 y_pred, y_true)
--> 221 return self.fn(y_true, y_pred, **self._fn_kwargs)
222
223 def get_config(self):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\losses.py in mean_squared_error(y_true, y_pred)
769 y_pred = ops.convert_to_tensor(y_pred)
770 y_true = math_ops.cast(y_true, y_pred.dtype)
--> 771 return K.mean(math_ops.squared_difference(y_pred, y_true), axis=-1)
772
773
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\ops\gen_math_ops.py in squared_difference(x, y, name)
10037 try:
10038 _, _, _op, _outputs = _op_def_library._apply_op_helper(
> 10039 "SquaredDifference", x=x, y=y, name=name)
10040 except (TypeError, ValueError):
10041 result = _dispatch.dispatch(
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\op_def_library.py in _apply_op_helper(op_type_name, name, **keywords)
740 op = g._create_op_internal(op_type_name, inputs, dtypes=None,
741 name=scope, input_types=input_types,
--> 742 attrs=attr_protos, op_def=op_def)
743
744 # `outputs` is returned as a separate return value so that the output
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\func_graph.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
593 return super(FuncGraph, self)._create_op_internal( # pylint: disable=protected-access
594 op_type, inputs, dtypes, input_types, name, attrs, op_def,
--> 595 compute_device)
596
597 def capture(self, tensor, name=None, shape=None):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
3320 input_types=input_types,
3321 original_op=self._default_original_op,
-> 3322 op_def=op_def)
3323 self._create_op_helper(ret, compute_device=compute_device)
3324 return ret
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
1784 op_def, inputs, node_def.attr)
1785 self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1786 control_input_ops)
1787 name = compat.as_str(node_def.name)
1788 # pylint: enable=protected-access
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1620 except errors.InvalidArgumentError as e:
1621 # Convert to ValueError for backwards compatibility.
-> 1622 raise ValueError(str(e))
1623
1624 return c_op
ValueError: Dimensions must be equal, but are 10 and 2 for 'loss/output_1_loss/SquaredDifference' (op: 'SquaredDifference') with input shapes: [8,10], [8,2].
I tried to change loss = keras.losses.MeanSquaredError() to loss = [keras.losses.MeanSquaredError()] and the error keeps the same.
Can someone tell me what I did wrong here? Any suggestion will be appreciated.
I think the problem doesnt have to do with the loss function you use but with the dimensions of the data you use.
I see that y_val_np.shape has 2 dimensions (shape[1]), but in the model mlp1 the last layer returns output of 10 dimensions.
If this is helpful, and that is what u need to do, i believe changing the dims on the last layer of mlp1 to 2 instead of 10 will solve the problem
I am trying to evaluate my deep learning RNN model but I keep getting this one error. I have absolutely no idea what the error means and I am unable to solve it. Any help is appreciated. Thanks.
This is the code I am using to evaluate my model:
model = keras.models.load_model('D:/Semester 3.2 OFFICIAL/Deep Learning/Assignment 2/test_model_14 files/Model14Checkpoint-188-0.60.h5')
model.load_weights('D:/Semester 3.2 OFFICIAL/Deep Learning/Assignment 2/test_model_14 files/Model14Checkpoint-188-0.60.h5')
evaluate = model.evaluate(X_test_seq_padded, y_test, batch_size=128)
loss = evaluate[0]
acc = evaluate[1] * 100
print("Loss: {:0.3f} - Accuracy: {:0.3f}%".format(loss,acc))
And this is the error that it returns:
128/8510 [..............................] - ETA: 1:05
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-8-8b3cba2991da> in <module>
13 model = keras.models.load_model('D:/Semester 3.2 OFFICIAL/Deep Learning/Assignment 2/test_model_14 files/Model14Checkpoint-188-0.60.h5')
14 model.load_weights('D:/Semester 3.2 OFFICIAL/Deep Learning/Assignment 2/test_model_14 files/Model14Checkpoint-188-0.60.h5')
---> 15 evaluate = model.evaluate(X_test_seq_padded, y_test, batch_size=128)
16
17 loss = evaluate[0]
~\anaconda3\envs\three point seven\lib\site-packages\tensorflow_core\python\keras\engine\training.py in evaluate(self, x, y, batch_size, verbose, sample_weight, steps, callbacks, max_queue_size, workers, use_multiprocessing)
928 max_queue_size=max_queue_size,
929 workers=workers,
--> 930 use_multiprocessing=use_multiprocessing)
931
932 def predict(self,
~\anaconda3\envs\three point seven\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in evaluate(self, model, x, y, batch_size, verbose, sample_weight, steps, callbacks, max_queue_size, workers, use_multiprocessing, **kwargs)
488 sample_weight=sample_weight, steps=steps, callbacks=callbacks,
489 max_queue_size=max_queue_size, workers=workers,
--> 490 use_multiprocessing=use_multiprocessing, **kwargs)
491
492 def predict(self, model, x, batch_size=None, verbose=0, steps=None,
~\anaconda3\envs\three point seven\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in _model_iteration(self, model, mode, x, y, batch_size, verbose, sample_weight, steps, callbacks, max_queue_size, workers, use_multiprocessing, **kwargs)
473 mode=mode,
474 training_context=training_context,
--> 475 total_epochs=1)
476 cbks.make_logs(model, epoch_logs, result, mode)
477
~\anaconda3\envs\three point seven\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
126 step=step, mode=mode, size=current_batch_size) as batch_logs:
127 try:
--> 128 batch_outs = execution_function(iterator)
129 except (StopIteration, errors.OutOfRangeError):
130 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
~\anaconda3\envs\three point seven\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
96 # `numpy` translates Tensors to values in Eager mode.
97 return nest.map_structure(_non_none_constant_value,
---> 98 distributed_function(input_fn))
99
100 return execution_function
~\anaconda3\envs\three point seven\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
566 xla_context.Exit()
567 else:
--> 568 result = self._call(*args, **kwds)
569
570 if tracing_count == self._get_tracing_count():
~\anaconda3\envs\three point seven\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
636 *args, **kwds)
637 # If we did not create any variables the trace we have is good enough.
--> 638 return self._concrete_stateful_fn._filtered_call(canon_args, canon_kwds) # pylint: disable=protected-access
639
640 def fn_with_cond(*inner_args, **inner_kwds):
~\anaconda3\envs\three point seven\lib\site-packages\tensorflow_core\python\eager\function.py in _filtered_call(self, args, kwargs)
1609 if isinstance(t, (ops.Tensor,
1610 resource_variable_ops.BaseResourceVariable))),
-> 1611 self.captured_inputs)
1612
1613 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
~\anaconda3\envs\three point seven\lib\site-packages\tensorflow_core\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1690 # No tape is watching; skip to running the function.
1691 return self._build_call_outputs(self._inference_function.call(
-> 1692 ctx, args, cancellation_manager=cancellation_manager))
1693 forward_backward = self._select_forward_and_backward_functions(
1694 args,
~\anaconda3\envs\three point seven\lib\site-packages\tensorflow_core\python\eager\function.py in call(self, ctx, args, cancellation_manager)
543 inputs=args,
544 attrs=("executor_type", executor_type, "config_proto", config),
--> 545 ctx=ctx)
546 else:
547 outputs = execute.execute_with_cancellation(
~\anaconda3\envs\three point seven\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
~\anaconda3\envs\three point seven\lib\site-packages\six.py in raise_from(value, from_value)
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: indices[28,27] = 10792 is not in [0, 10000)
[[node sequential_3/embedding_3/embedding_lookup (defined at <ipython-input-8-8b3cba2991da>:15) ]]
[[sequential_3/embedding_3/embedding_lookup/_17]]
(1) Invalid argument: indices[28,27] = 10792 is not in [0, 10000)
[[node sequential_3/embedding_3/embedding_lookup (defined at <ipython-input-8-8b3cba2991da>:15) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_distributed_function_8770]
Errors may have originated from an input operation.
Input Source operations connected to node sequential_3/embedding_3/embedding_lookup:
sequential_3/embedding_3/embedding_lookup/7696 (defined at C:\Users\acer\anaconda3\envs\three point seven\lib\contextlib.py:112)
Input Source operations connected to node sequential_3/embedding_3/embedding_lookup:
sequential_3/embedding_3/embedding_lookup/7696 (defined at C:\Users\acer\anaconda3\envs\three point seven\lib\contextlib.py:112)
Function call stack:
distributed_function -> distributed_function
I am a tad new to Tensorflow and I am having trouble running this simple CNN.
I have my images separated into separate directories for each class, which I load into train_dataset using image_dataset_from_directory.
from the documentation, this should yield a tuple (images, labels), where images has shape (batch_size, image_size[0], image_size[1], num_channels), and labels are a float32 tensor of shape (batch_size, num_classes). num_channels is 3 as the images are rgb
However when I try to fit using my model, I get an error saying that the predictions are [32,5] and labels shape [160]. It seems to me the batches in the labels have 'collapsed'.
Here's some snippets:
BATCH_SIZE = 32
EPOCHS = 1
IMG_SIZE=(300, 300)
SEED = 1
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
directory='train/train_images/', label_mode='categorical', class_names=class_names, color_mode='rgb', batch_size=BATCH_SIZE, image_size=IMG_SIZE)
IMG_SHAPE = IMG_SIZE + (3,)
n_classes = len(train_dataset.class_names)
def build_model():
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(input_shape=IMG_SHAPE, kernel_size=(5, 5), filters=32, activation='relu'),
tf.keras.layers.MaxPool2D(pool_size=(3, 3)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(256, activation='relu'),
tf.keras.layers.Dropout(0.25),
tf.keras.layers.Dense(units=n_classes, activation='softmax')
])
return model
model = build_model()
model.compile(optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.sparse_categorical_crossentropy,
metrics=['accuracy'])
model.fit(train_dataset, epochs = EPOCHS, batch_size = BATCH_SIZE)
Error Message:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-19-86d96e744ef0> in <module>
----> 1 model.fit(train_dataset, epochs = EPOCHS, batch_size = BATCH_SIZE)
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in _method_wrapper(self, *args, **kwargs)
106 def _method_wrapper(self, *args, **kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self, *args, **kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in __call__(self, *args, **kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args, **kwds)
781
782 new_tracing_count = self._get_tracing_count()
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py in _call(self, *args, **kwds)
805 # In this case we have created variables on the first call, so we run the
806 # defunned version which is guaranteed to never create variables.
--> 807 return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
808 elif self._stateful_fn is not None:
809 # Release the lock early so that multiple threads can perform the call
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in __call__(self, *args, **kwargs)
2827 with self._lock:
2828 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 2829 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
2830
2831 #property
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _filtered_call(self, args, kwargs, cancellation_manager)
1846 resource_variable_ops.BaseResourceVariable))],
1847 captured_inputs=self.captured_inputs,
-> 1848 cancellation_manager=cancellation_manager)
1849
1850 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1922 # No tape is watching; skip to running the function.
1923 return self._build_call_outputs(self._inference_function.call(
-> 1924 ctx, args, cancellation_manager=cancellation_manager))
1925 forward_backward = self._select_forward_and_backward_functions(
1926 args,
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/function.py in call(self, ctx, args, cancellation_manager)
548 inputs=args,
549 attrs=attrs,
--> 550 ctx=ctx)
551 else:
552 outputs = execute.execute_with_cancellation(
/opt/conda/lib/python3.7/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: logits and labels must have the same first dimension, got logits shape [32,5] and labels shape [160]
[[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits (defined at <ipython-input-18-1904262c6a7b>:1) ]] [Op:__inference_train_function_928]
Function call stack:
train_function
I think you should explicitly compile your model before executing. For that, you may omit 'input_shape' parameter in first layer. See model.compile in keras documentation. make sure that you keep loss as "categorical_crossentropy" or tf.keras.losses.CategoricalCrossentroy() . Then try again. I hope this helps.
Also it would help if you share your file structure.
So I'm creating a model using the functional API in tf.keras in which I'm doing a multi-input model.
The input for training is of shape (n_examples = 58667, n_dim = 2748). Each example is a concatenate of a 2048 and a 700 dimensions vector.
But I'm getting an error message that I don't understand:
InvalidArgumentError: Dimension 0 in both shapes must be equal, but are 1 and 0. Shapes are [1] and [0]. for 'model_27/concatenate_28/concat' (op: 'ConcatV2') with input shapes: [1,100], [0,100], [] and with computed input tensors: input[2] = <1>.
Here are dummy inputs and imports to make it runnable:
from tensorflow.keras import models, layers, losses, metrics, optimizers
from tensorflow.keras.layers import Dense, Concatenate, Input, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from sklearn.model_selection import train_test_split
import numpy as np
fake_train = np.random.rand(10000,2748)
fake_test = np.random.randint(0,1,(10000,1))
x_train, x_dev, y_train, y_dev = train_test_split(fake_train, fake_test, test_size = 0.2)
My model is created with this function:
def build_model():
input0 = Input(shape=(2748,))
branch1 = Lambda(lambda x:x[:2048])(input0)
branch1 = Dense(1000, activation='relu')(branch1)
branch1 = Dense(100, activation='relu')(branch1)
branch1 = Dense(100, activation='relu')(branch1)
branch2 = Lambda(lambda x:x[2048:])(input0)
branch2 = Dense(1000, activation='relu')(branch2)
branch2 = Dense(100, activation='relu')(branch2)
branch2 = Dense(100, activation='relu')(branch2)
out = layers.concatenate([branch1, branch2],axis=-1)
out = Dense(10, activation = 'relu')(out)
out = Dense(1, activation='sigmoid')(out)
model = Model(inputs=input0, outputs=out)
model.compile(optimizer=optimizers.Adam(lr=0.001),
loss='binary_crossentropy',
metrics=['accuracy', recall_m, precision_m])
return model
Here are parameters for cross validation for the dummy data:
k = 3 #Number of folds for CV
num_epochs = 4 #for test only
batch_size = 1
And this is my cross-validation for the model, which started the error:
all_loss_histories = []
all_recall_histories = []
all_precision_histories = []
for i in range(k):
val_data = x_train[i * num_val_samples:(i+1) * num_val_samples]
val_targets = y_train[i * num_val_samples:(i+1) * num_val_samples]
partial_train_data = np.concatenate(
[x_train[:i*num_val_samples],
x_train[(i+1)*num_val_samples:]],
axis = 0)
partial_train_targets = np.concatenate(
[y_train[:i*num_val_samples],
y_train[(i+1)*num_val_samples:]],
axis = 0)
model = build_model()
history = model.fit(partial_train_data,
partial_train_targets,
epochs = num_epochs,
batch_size = batch_size,
verbose = 1,
validation_data = (val_data, val_targets),
use_multiprocessing=False)
print('Finished training fold '+str(i+1))
loss_history = history.history['val_loss']
recall_history = history.history['val_recall_m']
precision_history = history.history['val_precision_m']
all_loss_histories.append(loss_history)
all_recall_histories.append(recall_history)
all_precision_histories.append(precision_history)
Any idea why there is an error?
Using python3.7 and tf 2.0 on a MacBook Pro 2018 (on OSX, not on a linux VM)
Thanks!
The complete error:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1609 try:
-> 1610 c_op = c_api.TF_FinishOperation(op_desc)
1611 except errors.InvalidArgumentError as e:
InvalidArgumentError: Dimension 0 in both shapes must be equal, but are 1 and 0. Shapes are [1] and [0]. for 'model_33/concatenate_34/concat' (op: 'ConcatV2') with input shapes: [1,100], [0,100], [] and with computed input tensors: input[2] = <1>.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<timed exec> in <module>
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
726 max_queue_size=max_queue_size,
727 workers=workers,
--> 728 use_multiprocessing=use_multiprocessing)
729
730 def evaluate(self,
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
322 mode=ModeKeys.TRAIN,
323 training_context=training_context,
--> 324 total_epochs=epochs)
325 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
326
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
121 step=step, mode=mode, size=current_batch_size) as batch_logs:
122 try:
--> 123 batch_outs = execution_function(iterator)
124 except (StopIteration, errors.OutOfRangeError):
125 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
84 # `numpy` translates Tensors to values in Eager mode.
85 return nest.map_structure(_non_none_constant_value,
---> 86 distributed_function(input_fn))
87
88 return execution_function
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
455
456 tracing_count = self._get_tracing_count()
--> 457 result = self._call(*args, **kwds)
458 if tracing_count == self._get_tracing_count():
459 self._call_counter.called_without_tracing()
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in _call(self, *args, **kwds)
501 # This is the first call of __call__, so we have to initialize.
502 initializer_map = object_identity.ObjectIdentityDictionary()
--> 503 self._initialize(args, kwds, add_initializers_to=initializer_map)
504 finally:
505 # At this point we know that the initialization is complete (or less
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in _initialize(self, args, kwds, add_initializers_to)
406 self._concrete_stateful_fn = (
407 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 408 *args, **kwds))
409
410 def invalid_creator_scope(*unused_args, **unused_kwds):
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
1846 if self.input_signature:
1847 args, kwargs = None, None
-> 1848 graph_function, _, _ = self._maybe_define_function(args, kwargs)
1849 return graph_function
1850
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _maybe_define_function(self, args, kwargs)
2148 graph_function = self._function_cache.primary.get(cache_key, None)
2149 if graph_function is None:
-> 2150 graph_function = self._create_graph_function(args, kwargs)
2151 self._function_cache.primary[cache_key] = graph_function
2152 return graph_function, args, kwargs
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2039 arg_names=arg_names,
2040 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2041 capture_by_value=self._capture_by_value),
2042 self._function_attributes,
2043 # Tell the ConcreteFunction to clean up its graph once it goes out of
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
913 converted_func)
914
--> 915 func_outputs = python_func(*func_args, **func_kwargs)
916
917 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in wrapped_fn(*args, **kwds)
356 # __wrapped__ allows AutoGraph to swap in a converted function. We give
357 # the function a weak reference to itself to avoid a reference cycle.
--> 358 return weak_wrapped_fn().__wrapped__(*args, **kwds)
359 weak_wrapped_fn = weakref.ref(wrapped_fn)
360
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in distributed_function(input_iterator)
71 strategy = distribution_strategy_context.get_strategy()
72 outputs = strategy.experimental_run_v2(
---> 73 per_replica_function, args=(model, x, y, sample_weights))
74 # Out of PerReplica outputs reduce or pick values to return.
75 all_outputs = dist_utils.unwrap_output_dict(
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/distribute/distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
758 fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
759 convert_by_default=False)
--> 760 return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
761
762 def reduce(self, reduce_op, value, axis):
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/distribute/distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
1785 kwargs = {}
1786 with self._container_strategy().scope():
-> 1787 return self._call_for_each_replica(fn, args, kwargs)
1788
1789 def _call_for_each_replica(self, fn, args, kwargs):
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/distribute/distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
2130 self._container_strategy(),
2131 replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
-> 2132 return fn(*args, **kwargs)
2133
2134 def _reduce_to(self, reduce_op, value, destinations):
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
290 def wrapper(*args, **kwargs):
291 with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
--> 292 return func(*args, **kwargs)
293
294 if inspect.isfunction(func) or inspect.ismethod(func):
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics)
262 y,
263 sample_weights=sample_weights,
--> 264 output_loss_metrics=model._output_loss_metrics)
265
266 if reset_metrics:
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
309 sample_weights=sample_weights,
310 training=True,
--> 311 output_loss_metrics=output_loss_metrics))
312 if not isinstance(outs, list):
313 outs = [outs]
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
250 output_loss_metrics=output_loss_metrics,
251 sample_weights=sample_weights,
--> 252 training=training))
253 if total_loss is None:
254 raise ValueError('The model cannot be run '
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
125 inputs = nest.map_structure(ops.convert_to_tensor, inputs)
126
--> 127 outs = model(inputs, **kwargs)
128 outs = nest.flatten(outs)
129
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
845 outputs = base_layer_utils.mark_as_return(outputs, acd)
846 else:
--> 847 outputs = call_fn(cast_inputs, *args, **kwargs)
848
849 except errors.OperatorNotAllowedInGraphError as e:
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/network.py in call(self, inputs, training, mask)
706 return self._run_internal_graph(
707 inputs, training=training, mask=mask,
--> 708 convert_kwargs_to_constants=base_layer_utils.call_context().saving)
709
710 def compute_output_shape(self, input_shape):
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/network.py in _run_internal_graph(self, inputs, training, mask, convert_kwargs_to_constants)
858
859 # Compute outputs.
--> 860 output_tensors = layer(computed_tensors, **kwargs)
861
862 # Update tensor_dict.
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
845 outputs = base_layer_utils.mark_as_return(outputs, acd)
846 else:
--> 847 outputs = call_fn(cast_inputs, *args, **kwargs)
848
849 except errors.OperatorNotAllowedInGraphError as e:
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/layers/merge.py in call(self, inputs)
180 return y
181 else:
--> 182 return self._merge_function(inputs)
183
184 #tf_utils.shape_type_conversion
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/layers/merge.py in _merge_function(self, inputs)
392
393 def _merge_function(self, inputs):
--> 394 return K.concatenate(inputs, axis=self.axis)
395
396 #tf_utils.shape_type_conversion
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/backend.py in concatenate(tensors, axis)
2706 return sparse_ops.sparse_concat(axis, tensors)
2707 else:
-> 2708 return array_ops.concat([to_dense(x) for x in tensors], axis)
2709
2710
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/util/dispatch.py in wrapper(*args, **kwargs)
178 """Call target, and fall back on dispatchers if there is a TypeError."""
179 try:
--> 180 return target(*args, **kwargs)
181 except (TypeError, ValueError):
182 # Note: convert_to_eager_tensor currently raises a ValueError, not a
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/array_ops.py in concat(values, axis, name)
1429 dtype=dtypes.int32).get_shape().assert_has_rank(0)
1430 return identity(values[0], name=name)
-> 1431 return gen_array_ops.concat_v2(values=values, axis=axis, name=name)
1432
1433
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/ops/gen_array_ops.py in concat_v2(values, axis, name)
1255 _attr_N = len(values)
1256 _, _, _op = _op_def_lib._apply_op_helper(
-> 1257 "ConcatV2", values=values, axis=axis, name=name)
1258 _result = _op.outputs[:]
1259 _inputs_flat = _op.inputs
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
791 op = g.create_op(op_type_name, inputs, dtypes=None, name=scope,
792 input_types=input_types, attrs=attr_protos,
--> 793 op_def=op_def)
794 return output_structure, op_def.is_stateful, op
795
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/func_graph.py in create_op(***failed resolving arguments***)
546 return super(FuncGraph, self)._create_op_internal( # pylint: disable=protected-access
547 op_type, inputs, dtypes, input_types, name, attrs, op_def,
--> 548 compute_device)
549
550 def capture(self, tensor, name=None):
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
3427 input_types=input_types,
3428 original_op=self._default_original_op,
-> 3429 op_def=op_def)
3430 self._create_op_helper(ret, compute_device=compute_device)
3431 return ret
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
1771 op_def, inputs, node_def.attr)
1772 self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1773 control_input_ops)
1774 # pylint: enable=protected-access
1775
~/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1611 except errors.InvalidArgumentError as e:
1612 # Convert to ValueError for backwards compatibility.
-> 1613 raise ValueError(str(e))
1614
1615 return c_op
ValueError: Dimension 0 in both shapes must be equal, but are 1 and 0. Shapes are [1] and [0]. for 'model_33/concatenate_34/concat' (op: 'ConcatV2') with input shapes: [1,100], [0,100], [] and with computed input tensors: input[2] = <1>.```
I believe you want
Lambda(lambda x:x[:, :2048])(input0)
and
Lambda(lambda x:x[:,2048:])(input0)
since there is a batch axis which should be preserved. Your current code is splitting around 2048 along the batch axis, which results in one input to concatenate that has batch size 0, and one input that has batch size 1. Since the batch sizes don't match, they can't be concatenated.
Making this correction allows me to run your code without the error.
When i reading Google tensorflow2.0 tutorials, i meet an surprised error when i try to test in my jupyter. It is so strange!It run fluently in Google colab! The tutorials is this
My computer GPU is gtx1060 6G, and memory is 16G, I think my computer is ok to run this tutorials.
I try run it on Jupyter, and it run error.But run fluently on Google colab!
You can see the error code following or go to the web:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds
# tfds.disable_progress_bar()
(train_data, test_data), info = tfds.load(
'imdb_reviews/subwords8k',
split = (tfds.Split.TRAIN, tfds.Split.TEST),
with_info=True, as_supervised=True)
encoder = info.features['text'].encoder
padded_shapes = ([None],())
train_batches = train_data.shuffle(1000).padded_batch(10, padded_shapes = padded_shapes)
test_batches = test_data.shuffle(1000).padded_batch(10, padded_shapes = padded_shapes)
embedding_dim=16
model = keras.Sequential([
layers.Embedding(encoder.vocab_size, embedding_dim,mask_zero=True),
layers.Bidirectional(tf.keras.layers.LSTM(32)),
layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
history = model.fit(
train_batches,
epochs=10,
validation_data=test_batches, validation_steps=20,verbose=2)
It is my first time meeting this error, and I don't know how to fix it, but it run fluently on Google colab, I don't know why?
The error following:
Epoch 1/10
---------------------------------------------------------------------------
CancelledError Traceback (most recent call last)
<ipython-input-2-8f27353fef79> in <module>
31 train_batches,
32 epochs=10,
---> 33 validation_data=test_batches, validation_steps=20,verbose=2)
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
726 max_queue_size=max_queue_size,
727 workers=workers,
--> 728 use_multiprocessing=use_multiprocessing)
729
730 def evaluate(self,
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
322 mode=ModeKeys.TRAIN,
323 training_context=training_context,
--> 324 total_epochs=epochs)
325 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
326
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
121 step=step, mode=mode, size=current_batch_size) as batch_logs:
122 try:
--> 123 batch_outs = execution_function(iterator)
124 except (StopIteration, errors.OutOfRangeError):
125 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
84 # `numpy` translates Tensors to values in Eager mode.
85 return nest.map_structure(_non_none_constant_value,
---> 86 distributed_function(input_fn))
87
88 return execution_function
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
455
456 tracing_count = self._get_tracing_count()
--> 457 result = self._call(*args, **kwds)
458 if tracing_count == self._get_tracing_count():
459 self._call_counter.called_without_tracing()
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
485 # In this case we have created variables on the first call, so we run the
486 # defunned version which is guaranteed to never create variables.
--> 487 return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
488 elif self._stateful_fn is not None:
489 # Release the lock early so that multiple threads can perform the call
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\function.py in __call__(self, *args, **kwargs)
1821 """Calls a graph function specialized to the inputs."""
1822 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 1823 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
1824
1825 #property
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\function.py in _filtered_call(self, args, kwargs)
1139 if isinstance(t, (ops.Tensor,
1140 resource_variable_ops.BaseResourceVariable))),
-> 1141 self.captured_inputs)
1142
1143 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1222 if executing_eagerly:
1223 flat_outputs = forward_function.call(
-> 1224 ctx, args, cancellation_manager=cancellation_manager)
1225 else:
1226 gradient_name = self._delayed_rewrite_functions.register()
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\function.py in call(self, ctx, args, cancellation_manager)
509 inputs=args,
510 attrs=("executor_type", executor_type, "config_proto", config),
--> 511 ctx=ctx)
512 else:
513 outputs = execute.execute_with_cancellation(
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\six.py in raise_from(value, from_value)
CancelledError: [_Derived_]RecvAsync is cancelled.
[[{{node Reshape_11/_38}}]] [Op:__inference_distributed_function_16087]
Function call stack:
distributed_function
Thanks for anyone help me!
Try to reduce the batch_size, it should work.