Model.fit tensorflow Issue - python

model.fit(X_train, y_train, batch_size=128, epochs=30)
i am using this and i got this error
Epoch 1/30
Output exceeds the size limit. Open the full output data in a text editor
UnimplementedError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_1768\4221927022.py in
----> 1 model.fit(X_train, y_train, batch_size=128, epochs=30)
c:\Users\decil\anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
68 # To get the full stack trace, call:
69 # tf.debugging.disable_traceback_filtering()
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
c:\Users\decil\anaconda3\lib\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
50 try:
51 ctx.ensure_initialized()
---> 52 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
53 inputs, attrs, num_outputs)
54 except core._NotOkStatusException as e:
UnimplementedError: Graph execution error:
Detected at node 'sequential/Cast' defined at (most recent call last):
File "c:\Users\decil\anaconda3\lib\runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "c:\Users\decil\anaconda3\lib\runpy.py", line 87, in _run_code
...
File "c:\Users\decil\anaconda3\lib\site-packages\keras\engine\functional.py", line 762, in _conform_to_reference_input
tensor = tf.cast(tensor, dtype=ref_input.dtype)
Node: 'sequential/Cast'
Cast string to float is not supported
[[{{node sequential/Cast}}]] [Op:__inference_train_function_529]
Please help me in this issue

I see the problem is here Cast string to float is not supported basically you're trying to pass a string (maybe the labels?) when the model expects a number (float). But I don't have enough info to help you any further.

Related

InvalidArgumentError: Graph execution error: Detected at node

My idea is to train a collaborative filter model for arts. I'm trying to train my model like this:
def utils_plot_keras_training(training):
metrics = [k for k in training.history.keys() if ("loss" not in k) and ("val" not in k)]
fig, ax = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(15,3))
ax[0].set(title="Training")
ax11 = ax[0].twinx()
ax[0].plot(training.history['loss'], color='black')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Loss', color='black')
for metric in metrics:
ax11.plot(training.history[metric], label=metric)
ax11.set_ylabel("Score", color='steelblue')
ax11.legend()
ax[1].set(title="Validation")
ax22 = ax[1].twinx()
ax[1].plot(training.history['val_loss'], color='black')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Loss', color='black')
for metric in metrics:
ax22.plot(training.history['val_'+metric], label=metric)
ax22.set_ylabel("Score", color="steelblue")
plt.show()
training = model.fit(x=[train["user_id"], train["art_id"]], y=train["y"],
epochs=100, batch_size=128, shuffle=True, verbose=0, validation_split=0.3)
model = training.model
utils_plot_keras_training(training)
And getting next error:
--------------------------------------------------------------------------- InvalidArgumentError Traceback (most recent call
last) Input In [30], in <cell line: 2>()
1 # train
----> 2 training = model.fit(x=[train["user_id"], train["art_id"]], y=train["y"],
3 epochs=100, shuffle=True, verbose=0, validation_split=0.3)
4 model = training.model
5 utils_plot_keras_training(training)
File
~\DataspellProjects\Arts\venv\lib\site-packages\keras\utils\traceback_utils.py:67,
in filter_traceback..error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.traceback)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
File
~\DataspellProjects\Arts\venv\lib\site-packages\tensorflow\python\eager\execute.py:54,
in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
52 try:
53 ctx.ensure_initialized()
---> 54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
Detected at node 'CollaborativeFiltering/xusers_emb/embedding_lookup'
defined at (most recent call last):
File "C:\Python310\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Python310\lib\runpy.py", line 86, in _run_code
exec(code, run_globals) ........
Node: 'CollaborativeFiltering/xusers_emb/embedding_lookup'
indices[28,0] = 1000 is not in [0, 1000) [[{{node
CollaborativeFiltering/xusers_emb/embedding_lookup}}]]
[Op:__inference_test_function_2209]
Any thoughts on how to resolve it? Full code and datasets are here: Github.

how to pass train_generator and test_generator to autoencoder.fit

I have a data generator as follows:
def datagenerator(x1,x2,batchsize):
n1 = x1.shape[0]
n2 = x2.shape[0]
while True:
num1 = np.random.randint(0, n1, batchsize)
num2 = np.random.randint(0, n2, batchsize)
x_data = (x1[num1] + x2[num2]) / 2.0
y_data = np.concatenate((x1[num1], x2[num2]), axis=2)
yield x_data, y_data
which gets two images and returns their average. Then I pass two datasets to this datagenerator ('mnist' and 'fashion_mnist') as follows:
train_generator = datagenerator(mnist_x_train,fashion_mnist_x_train,1)
test_generator = datagenerator(mnist_x_test,fashion_mnist_x_test,1)
but when I want to fit them using autoencoder:
autoencoder.fit(
train_generator,
epochs=100,
batch_size=128,
shuffle=True,
validation_data=test_generator,
)
it throws the error below:
quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
Detected at node 'binary_crossentropy/mul' defined at (most recent call last):
File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in
<module>
app.launch_new_instance()
what is wrong with my code?

AttributeError: 'int' object has no attribute 'ndim' when doing model.fit()

def tf_data(path, batch_size=32):
paths = tf.data.Dataset.list_files(path)
paths = paths.batch(64)
dataset = paths.map(prepare_data, tf.data.experimental.AUTOTUNE)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
dataset = dataset.unbatch()
dataset = dataset.batch(batch_size)
dataset = dataset.repeat()
return dataset
data_train = tf_data('C:/Users/krajat/Desktop/New folder/FYP/New folder/output/train/*/*.jpg', batch_size)
data_test = tf_data('C:/Users/krajat/Desktop/New folder/FYP/New folder/output/test/*/*.jpg', batch_size)
data_train is of RepeatDataset Type.
history = model.fit(data_train,
epochs=5,
steps_per_epoch = p[0]//batch_size,
validation_data = data_test,
validation_steps = p[2]//batch_size,
callbacks=[cp, csv_logger, reduce_lr])
After running model.fit(), it throws an error :
Epoch 1/5
---------------------------------------------------------------------------
> UnknownError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_20152/1127474368.py in <module>
----> 1 history = model.fit(data_train,
2 epochs=5,
3 steps_per_epoch = p[0]//batch_size,
4 validation_data = data_test,
5 validation_steps = p[2]//batch_size,
> ~\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
> ~\Anaconda3\lib\site-packages\tensorflow\python\eager\execute.py in
quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
52 try:
53 ctx.ensure_initialized()
---> 54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
> **UnknownError: Graph execution error:**
> **AttributeError: 'int' object has no attribute 'ndim'** Traceback (most recent call last):
> File "C:\Users\krajat\Anaconda3\lib\site-packages\tensorflow\python\ops\script_ops.py", line 271, in __call__
ret = func(*args)
> File "C:\Users\krajat\Anaconda3\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 642, in wrapper
return func(*args, **kwargs)
> File "C:\Users\krajat\Anaconda3\lib\site-packages\numpy\lib\function_base.py", line 2113, in __call__
return self._vectorize_call(func=func, args=vargs)
> File "C:\Users\krajat\Anaconda3\lib\site-packages\numpy\lib\function_base.py", line 2187, in _vectorize_call
res = self._vectorize_call_with_signature(func, args)
> File "C:\Users\krajat\Anaconda3\lib\site-packages\numpy\lib\function_base.py", line 2242, in _vectorize_call_with_signature
_update_dim_sizes(dim_sizes, result, core_dims)
> File "C:\Users\krajat\Anaconda3\lib\site-packages\numpy\lib\function_base.py", line 1841, in _update_dim_sizes
if arg.ndim < num_core_dims:
> **AttributeError: 'int' object has no attribute 'ndim'**
[[{{node PyFunc}}]] [[IteratorGetNext]] [Op:__inference_train_function_400484]

Training a CNN model UnimplementedError: Graph execution error:

I am a bit confused as I never encountered such an error before. I am tryiing to train my CNN model on images. Below you can see a picture of my code, and then the error message. As you can see it starts at epoch 1 then it stops :(
Does anyone have any idea where does the problem comes from? If anyone had a similar error message before when training your CNN?
Any help is welcome,
Thanks
history = modelA.fit(train_data,
validation_data = test_data,
epochs = 60,
callbacks = [best_model, reduce_lr, es])
ERROR MESSAGE
Epoch 1/60
---------------------------------------------------------------------------
UnimplementedError Traceback (most recent call last)
<ipython-input-68-4b47ff852a2a> in <module>()
2 validation_data = test_data,
3 epochs = 60,
----> 4 callbacks = [best_model, reduce_lr, es])
1 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
53 ctx.ensure_initialized()
54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
UnimplementedError: Graph execution error:

Tensorflow Keras - UnknownError: UnidentifiedImageError

I've downloaded dataset for Malaria detection from this website. Afterwards, I've updated images to my google drive and tried to train neural net with in-built fit() function as follows:
train_gen = train_aug.flow_from_directory(
training_data_dir,
class_mode="categorical",
target_size=(64, 64),
color_mode="rgb",
shuffle=True,
batch_size=BATCH_SIZE)
val_gen = val_aug.flow_from_directory(
validation_data_dir,
class_mode="categorical",
target_size=(64, 64),
color_mode="rgb",
shuffle=False,
batch_size=BATCH_SIZE)
history = model.fit(x=train_gen, steps_per_epoch=steps_per_epoch, epochs=EPOCH_NUM,
validation_data=val_gen, validation_steps=val_steps, callbacks=CALLBACKS)
In the middle of training I am getting following error message:
Epoch 1/100
302/603 [==============>...............] - ETA: 44:54 - loss: 8.3442 - binary_accuracy: 0.4935
---------------------------------------------------------------------------
UnknownError Traceback (most recent call last)
<ipython-input-45-2fe1e94cba86> in <module>()
1 history = model.fit(x=train_gen, steps_per_epoch=steps_per_epoch, epochs=EPOCH_NUM,
----> 2 validation_data=val_gen, validation_steps=val_steps, callbacks=CALLBACKS)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
UnknownError: UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f42ff5c2518>
Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/script_ops.py", line 243, in __call__
ret = func(*args)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/impl/api.py", line 309, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 785, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/data_adapter.py", line 801, in wrapped_generator
for data in generator_fn():
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/data_adapter.py", line 932, in generator_fn
yield x[i]
File "/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/iterator.py", line 65, in __getitem__
return self._get_batches_of_transformed_samples(index_array)
File "/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/iterator.py", line 230, in _get_batches_of_transformed_samples
interpolation=self.interpolation)
File "/usr/local/lib/python3.6/dist-packages/keras_preprocessing/image/utils.py", line 114, in load_img
img = pil_image.open(io.BytesIO(f.read()))
File "/usr/local/lib/python3.6/dist-packages/PIL/Image.py", line 2862, in open
"cannot identify image file %r" % (filename if filename else fp)
PIL.UnidentifiedImageError: cannot identify image file <_io.BytesIO object at 0x7f42ff5c2518>
[[{{node PyFunc}}]]
[[IteratorGetNext]] [Op:__inference_train_function_35711]
Function call stack:
train_function
What is this error exactly, and how can I properly handle it? Do I need to use custom training loops with GradientTape object and then use try/catch blocks or is there another way?
Thing that confuses me is that it seems like some image cannot be decoded or something like that. But, how come ImageDataGenerator did not report any error before training?
Deleting all images and re-uploading them did the trick for me. Closing this question.
Reducing the number of workers in model.fit helped me solve this issue.

Categories

Resources