Related
I'm working on a simple mlp model. The input shape for model fitting is here.
fea_train_np.shape = (6000, 1, 15, 21, 512)
fea_val_np.shape = (1500, 1, 15, 21, 512)
y_train_np.shape = (6000, 2)
y_val_np.shape = (1500, 2)
And here is the mlp I'm working on. The last layer using linear activation as I want to do regression instead of classification.
mlp1 = keras.Sequential(
[
layers.Flatten(),
layers.Dense(256, activation='relu'), # Add a fully-connecte layer with 16 units and relu activation function as the hidden layer
layers.Dense(10, activation='linear')
],
)
mlp1.compile(optimizer = optimizers.Adam(learning_rate = 0.001),
loss = keras.losses.MeanSquaredError(),
metrics = [keras.metrics.MeanSquaredError()])
mlp = mlp1.fit(fea_train_np, y_train_np, epochs=20, batch_size=8, validation_data=(fea_val_np, y_val_np))
result = mlp.predict(fea_val_np, y_val_np)
And I got this error when I was trying to fit my model:
Train on 6000 samples, validate on 1500 samples
Epoch 1/20
8/6000 [..............................] - ETA: 12s
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1618 try:
-> 1619 c_op = c_api.TF_FinishOperation(op_desc)
1620 except errors.InvalidArgumentError as e:
InvalidArgumentError: Dimensions must be equal, but are 10 and 2 for 'loss/output_1_loss/SquaredDifference' (op: 'SquaredDifference') with input shapes: [8,10], [8,2].
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-32-37335a6a8cd3> in <module>
11 metrics = [keras.metrics.MeanSquaredError()])
12
---> 13 mlp = mlp1.fit(fea_train_np, y_train_np, epochs=20, batch_size=8, validation_data=(fea_val_np, y_val_np))
14 result = mlp.predict(fea_val_np, y_val_np)
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
340 mode=ModeKeys.TRAIN,
341 training_context=training_context,
--> 342 total_epochs=epochs)
343 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
344
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
126 step=step, mode=mode, size=current_batch_size) as batch_logs:
127 try:
--> 128 batch_outs = execution_function(iterator)
129 except (StopIteration, errors.OutOfRangeError):
130 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
96 # `numpy` translates Tensors to values in Eager mode.
97 return nest.map_structure(_non_none_constant_value,
---> 98 distributed_function(input_fn))
99
100 return execution_function
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
566 xla_context.Exit()
567 else:
--> 568 result = self._call(*args, **kwds)
569
570 if tracing_count == self._get_tracing_count():
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
613 # This is the first call of __call__, so we have to initialize.
614 initializers = []
--> 615 self._initialize(args, kwds, add_initializers_to=initializers)
616 finally:
617 # At this point we know that the initialization is complete (or less
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
495 self._concrete_stateful_fn = (
496 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 497 *args, **kwds))
498
499 def invalid_creator_scope(*unused_args, **unused_kwds):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2387 args, kwargs = None, None
2388 with self._lock:
-> 2389 graph_function, _, _ = self._maybe_define_function(args, kwargs)
2390 return graph_function
2391
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\function.py in _maybe_define_function(self, args, kwargs)
2701
2702 self._function_cache.missed.add(call_context_key)
-> 2703 graph_function = self._create_graph_function(args, kwargs)
2704 self._function_cache.primary[cache_key] = graph_function
2705 return graph_function, args, kwargs
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
2591 arg_names=arg_names,
2592 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2593 capture_by_value=self._capture_by_value),
2594 self._function_attributes,
2595 # Tell the ConcreteFunction to clean up its graph once it goes out of
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
976 converted_func)
977
--> 978 func_outputs = python_func(*func_args, **func_kwargs)
979
980 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\eager\def_function.py in wrapped_fn(*args, **kwds)
437 # __wrapped__ allows AutoGraph to swap in a converted function. We give
438 # the function a weak reference to itself to avoid a reference cycle.
--> 439 return weak_wrapped_fn().__wrapped__(*args, **kwds)
440 weak_wrapped_fn = weakref.ref(wrapped_fn)
441
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in distributed_function(input_iterator)
83 args = _prepare_feed_values(model, input_iterator, mode, strategy)
84 outputs = strategy.experimental_run_v2(
---> 85 per_replica_function, args=args)
86 # Out of PerReplica outputs reduce or pick values to return.
87 all_outputs = dist_utils.unwrap_output_dict(
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
761 fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
762 convert_by_default=False)
--> 763 return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
764
765 def reduce(self, reduce_op, value, axis):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in call_for_each_replica(self, fn, args, kwargs)
1817 kwargs = {}
1818 with self._container_strategy().scope():
-> 1819 return self._call_for_each_replica(fn, args, kwargs)
1820
1821 def _call_for_each_replica(self, fn, args, kwargs):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs)
2162 self._container_strategy(),
2163 replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
-> 2164 return fn(*args, **kwargs)
2165
2166 def _reduce_to(self, reduce_op, value, destinations):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\autograph\impl\api.py in wrapper(*args, **kwargs)
290 def wrapper(*args, **kwargs):
291 with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
--> 292 return func(*args, **kwargs)
293
294 if inspect.isfunction(func) or inspect.ismethod(func):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics, standalone)
431 y,
432 sample_weights=sample_weights,
--> 433 output_loss_metrics=model._output_loss_metrics)
434
435 if reset_metrics:
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
310 sample_weights=sample_weights,
311 training=True,
--> 312 output_loss_metrics=output_loss_metrics))
313 if not isinstance(outs, list):
314 outs = [outs]
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
251 output_loss_metrics=output_loss_metrics,
252 sample_weights=sample_weights,
--> 253 training=training))
254 if total_loss is None:
255 raise ValueError('The model cannot be run '
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
165
166 if hasattr(loss_fn, 'reduction'):
--> 167 per_sample_losses = loss_fn.call(targets[i], outs[i])
168 weighted_losses = losses_utils.compute_weighted_loss(
169 per_sample_losses,
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\losses.py in call(self, y_true, y_pred)
219 y_pred, y_true = tf_losses_util.squeeze_or_expand_dimensions(
220 y_pred, y_true)
--> 221 return self.fn(y_true, y_pred, **self._fn_kwargs)
222
223 def get_config(self):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\keras\losses.py in mean_squared_error(y_true, y_pred)
769 y_pred = ops.convert_to_tensor(y_pred)
770 y_true = math_ops.cast(y_true, y_pred.dtype)
--> 771 return K.mean(math_ops.squared_difference(y_pred, y_true), axis=-1)
772
773
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\ops\gen_math_ops.py in squared_difference(x, y, name)
10037 try:
10038 _, _, _op, _outputs = _op_def_library._apply_op_helper(
> 10039 "SquaredDifference", x=x, y=y, name=name)
10040 except (TypeError, ValueError):
10041 result = _dispatch.dispatch(
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\op_def_library.py in _apply_op_helper(op_type_name, name, **keywords)
740 op = g._create_op_internal(op_type_name, inputs, dtypes=None,
741 name=scope, input_types=input_types,
--> 742 attrs=attr_protos, op_def=op_def)
743
744 # `outputs` is returned as a separate return value so that the output
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\func_graph.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
593 return super(FuncGraph, self)._create_op_internal( # pylint: disable=protected-access
594 op_type, inputs, dtypes, input_types, name, attrs, op_def,
--> 595 compute_device)
596
597 def capture(self, tensor, name=None, shape=None):
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
3320 input_types=input_types,
3321 original_op=self._default_original_op,
-> 3322 op_def=op_def)
3323 self._create_op_helper(ret, compute_device=compute_device)
3324 return ret
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
1784 op_def, inputs, node_def.attr)
1785 self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1786 control_input_ops)
1787 name = compat.as_str(node_def.name)
1788 # pylint: enable=protected-access
C:\ForHDD\Anaconda\envs\CV\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1620 except errors.InvalidArgumentError as e:
1621 # Convert to ValueError for backwards compatibility.
-> 1622 raise ValueError(str(e))
1623
1624 return c_op
ValueError: Dimensions must be equal, but are 10 and 2 for 'loss/output_1_loss/SquaredDifference' (op: 'SquaredDifference') with input shapes: [8,10], [8,2].
I tried to change loss = keras.losses.MeanSquaredError() to loss = [keras.losses.MeanSquaredError()] and the error keeps the same.
Can someone tell me what I did wrong here? Any suggestion will be appreciated.
I think the problem doesnt have to do with the loss function you use but with the dimensions of the data you use.
I see that y_val_np.shape has 2 dimensions (shape[1]), but in the model mlp1 the last layer returns output of 10 dimensions.
If this is helpful, and that is what u need to do, i believe changing the dims on the last layer of mlp1 to 2 instead of 10 will solve the problem
This question already has answers here:
Failed to get convolution algorithm. This is probably because cuDNN failed to initialize,
(30 answers)
Closed 2 years ago.
Here is my code:
model = Sequential()
# CONVOLUTIONAL LAYER
model.add(Conv2D(filters=32, kernel_size=(4,4),input_shape=(28, 28, 1), activation='relu',))
# POOLING LAYER
model.add(MaxPool2D(pool_size=(2, 2)))
# FLATTEN IMAGES FROM 28 by 28 to 764 BEFORE FINAL LAYER
model.add(Flatten())
# 128 NEURONS IN DENSE HIDDEN LAYER (YOU CAN CHANGE THIS NUMBER OF NEURONS)
model.add(Dense(128, activation='relu'))
# LAST LAYER IS THE CLASSIFIER, THUS 10 POSSIBLE CLASSES
model.add(Dense(10, activation='softmax'))
# https://keras.io/metrics/
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']) # we can add in additional metrics https://keras.io/metrics/
model.fit(x_train,y_cat_train,epochs=10,validation_data=(x_test,y_cat_test))
And I get the following when I run the model.fit line:
UnknownError Traceback (most recent call last)
<ipython-input-37-0eafbb732ade> in <module>
----> 1 model.fit(x_train,y_cat_train,epochs=10,validation_data=(x_test,y_cat_test))
~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
340 mode=ModeKeys.TRAIN,
341 training_context=training_context,
--> 342 total_epochs=epochs)
343 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
344
~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
126 step=step, mode=mode, size=current_batch_size) as batch_logs:
127 try:
--> 128 batch_outs = execution_function(iterator)
129 except (StopIteration, errors.OutOfRangeError):
130 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
96 # `numpy` translates Tensors to values in Eager mode.
97 return nest.map_structure(_non_none_constant_value,
---> 98 distributed_function(input_fn))
99
100 return execution_function
~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
566 xla_context.Exit()
567 else:
--> 568 result = self._call(*args, **kwds)
569
570 if tracing_count == self._get_tracing_count():
~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
597 # In this case we have created variables on the first call, so we run the
598 # defunned version which is guaranteed to never create variables.
--> 599 return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
600 elif self._stateful_fn is not None:
601 # Release the lock early so that multiple threads can perform the call
~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\function.py in __call__(self, *args, **kwargs)
2361 with self._lock:
2362 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 2363 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
2364
2365 #property
~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\function.py in _filtered_call(self, args, kwargs)
1609 if isinstance(t, (ops.Tensor,
1610 resource_variable_ops.BaseResourceVariable))),
-> 1611 self.captured_inputs)
1612
1613 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1690 # No tape is watching; skip to running the function.
1691 return self._build_call_outputs(self._inference_function.call(
-> 1692 ctx, args, cancellation_manager=cancellation_manager))
1693 forward_backward = self._select_forward_and_backward_functions(
1694 args,
~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\function.py in call(self, ctx, args, cancellation_manager)
543 inputs=args,
544 attrs=("executor_type", executor_type, "config_proto", config),
--> 545 ctx=ctx)
546 else:
547 outputs = execute.execute_with_cancellation(
~\anaconda3\envs\GPU\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
~\anaconda3\envs\GPU\lib\site-packages\six.py in raise_from(value, from_value)
UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
[[node sequential/conv2d/Conv2D (defined at <ipython-input-36-6ec5ba2e7ade>:1) ]] [Op:__inference_distributed_function_788]
Function call stack:
distributed_function
I would just like to clarify that I have run this CNN before on regular tensorflow without issues, and this newly installed tensorflow-gpu works on ANN's and RNN's. Additionally here are my specs and installation versions:
Tensorflow 2.1
Keras 2.3.1
CudNN 7.6.5
cudatoolkit 10.1.243
GPU: Nvidia 2070
Let me know if any other information is necessary, thank you!
I have had this error happen periodically. Not sure of the details of the cause but this happens when I have to many instances of jupyter notebook or python running at the same time that are running tensorflow. If you are on windows go task manager and check how many instances of notebook.exe or python are running and shut down all of them then reload the notebook.This always fixes the problem for me
Closed. This question is not reproducible or was caused by typos. It is not currently accepting answers.
This question was caused by a typo or a problem that can no longer be reproduced. While similar questions may be on-topic here, this one was resolved in a way less likely to help future readers.
Closed 2 years ago.
Improve this question
I'm sure this code doesn't have any problem.
I got this code from my professor, and I checked its work.
But because of some reason, it doesn't work on my system(Anaconda).
history = model.fit(train_data, train_labels, epochs=500, validation_split=0.2, callbacks=[early_stop])
Train on 323 samples, validate on 81 samples
Epoch 1/500
32/323 [=>............................] - ETA: 5sWARNING:tensorflow:Early stopping conditioned on metric `val_loss` which is not available. Available metrics are:
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
<ipython-input-15-b496e4002de6> in <module>
----> 1 history = model.fit(train_data, train_labels, epochs=500, validation_split=0.2, callbacks=[early_stop])
~\anaconda3\envs\AINN\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
~\anaconda3\envs\AINN\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
340 mode=ModeKeys.TRAIN,
341 training_context=training_context,
--> 342 total_epochs=epochs)
343 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
344
~\anaconda3\envs\AINN\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
126 step=step, mode=mode, size=current_batch_size) as batch_logs:
127 try:
--> 128 batch_outs = execution_function(iterator)
129 except (StopIteration, errors.OutOfRangeError):
130 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
~\anaconda3\envs\AINN\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
96 # `numpy` translates Tensors to values in Eager mode.
97 return nest.map_structure(_non_none_constant_value,
---> 98 distributed_function(input_fn))
99
100 return execution_function
~\anaconda3\envs\AINN\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
566 xla_context.Exit()
567 else:
--> 568 result = self._call(*args, **kwds)
569
570 if tracing_count == self._get_tracing_count():
~\anaconda3\envs\AINN\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
630 # Lifting succeeded, so variables are initialized and we can run the
631 # stateless function.
--> 632 return self._stateless_fn(*args, **kwds)
633 else:
634 canon_args, canon_kwds = \
~\anaconda3\envs\AINN\lib\site-packages\tensorflow_core\python\eager\function.py in __call__(self, *args, **kwargs)
2361 with self._lock:
2362 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 2363 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
2364
2365 #property
~\anaconda3\envs\AINN\lib\site-packages\tensorflow_core\python\eager\function.py in _filtered_call(self, args, kwargs)
1609 if isinstance(t, (ops.Tensor,
1610 resource_variable_ops.BaseResourceVariable))),
-> 1611 self.captured_inputs)
1612
1613 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
~\anaconda3\envs\AINN\lib\site-packages\tensorflow_core\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1690 # No tape is watching; skip to running the function.
1691 return self._build_call_outputs(self._inference_function.call(
-> 1692 ctx, args, cancellation_manager=cancellation_manager))
1693 forward_backward = self._select_forward_and_backward_functions(
1694 args,
~\anaconda3\envs\AINN\lib\site-packages\tensorflow_core\python\eager\function.py in call(self, ctx, args, cancellation_manager)
543 inputs=args,
544 attrs=("executor_type", executor_type, "config_proto", config),
--> 545 ctx=ctx)
546 else:
547 outputs = execute.execute_with_cancellation(
~\anaconda3\envs\AINN\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
~\anaconda3\envs\AINN\lib\site-packages\six.py in raise_from(value, from_value)
InternalError: Blas GEMM launch failed : a.shape=(32, 13), b.shape=(13, 64), m=32, n=64, k=13
[[node sequential/dense/MatMul (defined at <ipython-input-15-b496e4002de6>:1) ]] [Op:__inference_distributed_function_747]
Function call stack:
distributed_function
Any idea about solving this problem?
Thank you.
That error often ocurs (atleast on my machine), when there are some kind of memory issues during training with gpu. That happens, as jakub already mentioned in the comments. You might want to restart your kernel and try again. If that does not help, try to decrease the batch size. Also check your gpu memory usage to see if the gpu has memory left (which would prove this).
The machine of your professor might have more gpu memory than yours, unfortunately tensorflow not also shows a proper out of memory exception.
I created and trained a model to classify beer names from invoice strings encoded as integer sequences from characters.
batch_size = 512 # Batch size for training.
epochs = 5 # Number of epochs to train for.
model = Sequential()
model.add(Dense(512, activation='relu'))
model.add(Dropout(rate=0.2, noise_shape=None, seed=None))
model.add(Dense(512, activation='relu'))
model.add(Dropout(rate=0.2, noise_shape=None, seed=None))
model.add(Dense(train_beer['product_name'].nunique(), activation='softmax'))
optimizer = RMSprop(learning_rate=0.001)
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=optimizer, metrics=['accuracy'])
model.fit(train_encoded, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(test_encoded,test_labels))
Now I want to use the first two layers as pre-training for another model, so I remove the activation layer and add a new one and re-compile the model. (Note for testing purposes I re-added the same laye
model.pop()
model.add(Dense(train_beer['product_name'].nunique(), activation='softmax'))
optimizer = RMSprop(learning_rate=0.001)
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=optimizer, metrics=['accuracy'])
batch_size = 512 # Batch size for training.
epochs = 5 # Number of epochs to train for.
model.fit(train_encoded, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(test_encoded,test_labels))
but I get the error:
Train on 313213 samples, validate on 16323 samples Epoch 1/5 512/313213 [..............................] - ETA: 29s
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-13-e341e0cd9a82> in <module>
2 epochs = 5 # Number of epochs to train for.
3
----> 4 model.fit(train_encoded, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(test_encoded,test_labels))
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
726 max_queue_size=max_queue_size,
727 workers=workers,
--> 728 use_multiprocessing=use_multiprocessing)
729
730 def evaluate(self,
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
322 mode=ModeKeys.TRAIN,
323 training_context=training_context,
--> 324 total_epochs=epochs)
325 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
326
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
121 step=step, mode=mode, size=current_batch_size) as batch_logs:
122 try:
--> 123 batch_outs = execution_function(iterator)
124 except (StopIteration, errors.OutOfRangeError):
125 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
84 # `numpy` translates Tensors to values in Eager mode.
85 return nest.map_structure(_non_none_constant_value,
---> 86 distributed_function(input_fn))
87
88 return execution_function
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
455
456 tracing_count = self._get_tracing_count()
--> 457 result = self._call(*args, **kwds)
458 if tracing_count == self._get_tracing_count():
459 self._call_counter.called_without_tracing()
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
501 # This is the first call of __call__, so we have to initialize.
502 initializer_map = object_identity.ObjectIdentityDictionary()
--> 503 self._initialize(args, kwds, add_initializers_to=initializer_map)
504 finally:
505 # At this point we know that the initialization is complete (or less
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\def_function.py in _initialize(self, args, kwds, add_initializers_to)
406 self._concrete_stateful_fn = (
407 self._stateful_fn._get_concrete_function_internal_garbage_collected(
# pylint: disable=protected-access
--> 408 *args, **kwds))
409
410 def invalid_creator_scope(*unused_args, **unused_kwds):
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args,
**kwargs) 1846 if self.input_signature: 1847 args, kwargs = None, None
-> 1848 graph_function, _, _ = self._maybe_define_function(args, kwargs) 1849 return graph_function 1850
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\function.py in _maybe_define_function(self, args, kwargs) 2148 graph_function = self._function_cache.primary.get(cache_key, None) 2149 if graph_function is None:
-> 2150 graph_function = self._create_graph_function(args, kwargs) 2151 self._function_cache.primary[cache_key] = graph_function 2152 return graph_function, args, kwargs
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes) 2039 arg_names=arg_names, 2040 override_flat_arg_shapes=override_flat_arg_shapes,
-> 2041 capture_by_value=self._capture_by_value), 2042 self._function_attributes, 2043 # Tell the ConcreteFunction to clean up its graph once it goes out of
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
913 converted_func)
914
--> 915 func_outputs = python_func(*func_args, **func_kwargs)
916
917 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\eager\def_function.py in wrapped_fn(*args, **kwds)
356 # __wrapped__ allows AutoGraph to swap in a converted function. We give
357 # the function a weak reference to itself to avoid a reference cycle.
--> 358 return weak_wrapped_fn().__wrapped__(*args, **kwds)
359 weak_wrapped_fn = weakref.ref(wrapped_fn)
360
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in distributed_function(input_iterator)
71 strategy = distribution_strategy_context.get_strategy()
72 outputs = strategy.experimental_run_v2(
---> 73 per_replica_function, args=(model, x, y, sample_weights))
74 # Out of PerReplica outputs reduce or pick values to return.
75 all_outputs = dist_utils.unwrap_output_dict(
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in experimental_run_v2(self, fn, args, kwargs)
758 fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx(),
759 convert_by_default=False)
--> 760 return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
761
762 def reduce(self, reduce_op, value, axis):
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in call_for_each_replica(self, fn, args, kwargs) 1785 kwargs
= {} 1786 with self._container_strategy().scope():
-> 1787 return self._call_for_each_replica(fn, args, kwargs) 1788 1789 def _call_for_each_replica(self, fn, args, kwargs):
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\distribute\distribute_lib.py in _call_for_each_replica(self, fn, args, kwargs) 2130 self._container_strategy(), 2131 replica_id_in_sync_group=constant_op.constant(0, dtypes.int32)):
-> 2132 return fn(*args, **kwargs) 2133 2134 def _reduce_to(self, reduce_op, value, destinations):
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\autograph\impl\api.py in wrapper(*args, **kwargs)
290 def wrapper(*args, **kwargs):
291 with ag_ctx.ControlStatusCtx(status=ag_ctx.Status.DISABLED):
--> 292 return func(*args, **kwargs)
293
294 if inspect.isfunction(func) or inspect.ismethod(func):
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics)
262 y,
263 sample_weights=sample_weights,
--> 264 output_loss_metrics=model._output_loss_metrics)
265
266 if reset_metrics:
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
309 sample_weights=sample_weights,
310 training=True,
--> 311 output_loss_metrics=output_loss_metrics))
312 if not isinstance(outs, list):
313 outs = [outs]
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
250 output_loss_metrics=output_loss_metrics,
251 sample_weights=sample_weights,
--> 252 training=training))
253 if total_loss is None:
254 raise ValueError('The model cannot be run '
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
125 inputs = nest.map_structure(ops.convert_to_tensor, inputs)
126
--> 127 outs = model(inputs, **kwargs)
128 outs = nest.flatten(outs)
129
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs)
845 outputs = base_layer_utils.mark_as_return(outputs, acd)
846 else:
--> 847 outputs = call_fn(cast_inputs, *args, **kwargs)
848
849 except errors.OperatorNotAllowedInGraphError as e:
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\sequential.py in call(self, inputs, training, mask)
268 kwargs['training'] = training
269
--> 270 outputs = layer(inputs, **kwargs)
271
272 # `outputs` will be the inputs to the next layer.
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs)
810 # are casted, not before.
811 input_spec.assert_input_compatibility(self.input_spec, inputs,
--> 812 self.name)
813 graph = backend.get_graph()
814 with graph.as_default(), backend.name_scope(self._name_scope()):
~\.conda\envs\fintech_ml\lib\site-packages\tensorflow_core\python\keras\engine\input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
211 ' incompatible with the layer: expected axis ' + str(axis) +
212 ' of input shape to have value ' + str(value) +
--> 213 ' but received input with shape ' + str(shape))
214 # Check shape.
215 if spec.shape is not None:
ValueError: Input 0 of layer dense_3 is incompatible with the layer: expected axis -1 of input shape to have value 6022 but received input with shape [None, 512]
I can't really tell you why that is happening (I'll probably have to look into the source code). I'm suspecting that the layers get re-wired incorrectly somewhere down the line. But you can do the following to get this working.
new_model = Sequential()
for l in model.layers[:-1]:
new_model.add(l)
new_model.add(Dense(100, activation='softmax'))
When i reading Google tensorflow2.0 tutorials, i meet an surprised error when i try to test in my jupyter. It is so strange!It run fluently in Google colab! The tutorials is this
My computer GPU is gtx1060 6G, and memory is 16G, I think my computer is ok to run this tutorials.
I try run it on Jupyter, and it run error.But run fluently on Google colab!
You can see the error code following or go to the web:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds
# tfds.disable_progress_bar()
(train_data, test_data), info = tfds.load(
'imdb_reviews/subwords8k',
split = (tfds.Split.TRAIN, tfds.Split.TEST),
with_info=True, as_supervised=True)
encoder = info.features['text'].encoder
padded_shapes = ([None],())
train_batches = train_data.shuffle(1000).padded_batch(10, padded_shapes = padded_shapes)
test_batches = test_data.shuffle(1000).padded_batch(10, padded_shapes = padded_shapes)
embedding_dim=16
model = keras.Sequential([
layers.Embedding(encoder.vocab_size, embedding_dim,mask_zero=True),
layers.Bidirectional(tf.keras.layers.LSTM(32)),
layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
history = model.fit(
train_batches,
epochs=10,
validation_data=test_batches, validation_steps=20,verbose=2)
It is my first time meeting this error, and I don't know how to fix it, but it run fluently on Google colab, I don't know why?
The error following:
Epoch 1/10
---------------------------------------------------------------------------
CancelledError Traceback (most recent call last)
<ipython-input-2-8f27353fef79> in <module>
31 train_batches,
32 epochs=10,
---> 33 validation_data=test_batches, validation_steps=20,verbose=2)
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
726 max_queue_size=max_queue_size,
727 workers=workers,
--> 728 use_multiprocessing=use_multiprocessing)
729
730 def evaluate(self,
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
322 mode=ModeKeys.TRAIN,
323 training_context=training_context,
--> 324 total_epochs=epochs)
325 cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
326
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
121 step=step, mode=mode, size=current_batch_size) as batch_logs:
122 try:
--> 123 batch_outs = execution_function(iterator)
124 except (StopIteration, errors.OutOfRangeError):
125 # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in execution_function(input_fn)
84 # `numpy` translates Tensors to values in Eager mode.
85 return nest.map_structure(_non_none_constant_value,
---> 86 distributed_function(input_fn))
87
88 return execution_function
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\def_function.py in __call__(self, *args, **kwds)
455
456 tracing_count = self._get_tracing_count()
--> 457 result = self._call(*args, **kwds)
458 if tracing_count == self._get_tracing_count():
459 self._call_counter.called_without_tracing()
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\def_function.py in _call(self, *args, **kwds)
485 # In this case we have created variables on the first call, so we run the
486 # defunned version which is guaranteed to never create variables.
--> 487 return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
488 elif self._stateful_fn is not None:
489 # Release the lock early so that multiple threads can perform the call
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\function.py in __call__(self, *args, **kwargs)
1821 """Calls a graph function specialized to the inputs."""
1822 graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 1823 return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
1824
1825 #property
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\function.py in _filtered_call(self, args, kwargs)
1139 if isinstance(t, (ops.Tensor,
1140 resource_variable_ops.BaseResourceVariable))),
-> 1141 self.captured_inputs)
1142
1143 def _call_flat(self, args, captured_inputs, cancellation_manager=None):
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
1222 if executing_eagerly:
1223 flat_outputs = forward_function.call(
-> 1224 ctx, args, cancellation_manager=cancellation_manager)
1225 else:
1226 gradient_name = self._delayed_rewrite_functions.register()
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\function.py in call(self, ctx, args, cancellation_manager)
509 inputs=args,
510 attrs=("executor_type", executor_type, "config_proto", config),
--> 511 ctx=ctx)
512 else:
513 outputs = execute.execute_with_cancellation(
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
65 else:
66 message = e.message
---> 67 six.raise_from(core._status_to_exception(e.code, message), None)
68 except TypeError as e:
69 keras_symbolic_tensors = [
c:\users\sha\anaconda3\envs\tensorflow2\lib\site-packages\six.py in raise_from(value, from_value)
CancelledError: [_Derived_]RecvAsync is cancelled.
[[{{node Reshape_11/_38}}]] [Op:__inference_distributed_function_16087]
Function call stack:
distributed_function
Thanks for anyone help me!
Try to reduce the batch_size, it should work.