Invalid Argument Error: Graph Execution Error on Tensorflow NN - python

I'm trying to fit the following neural network:
def make_model():
input = tf.keras.Input(shape=train_df.shape[1:])
x = tf.keras.layers.Flatten()(input)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dense(64, activation='relu')(x)
x = tf.keras.layers.Dense(32, activation='relu')(x)
output = tf.keras.layers.Dense(8, activation='softmax')(x)
model = tf.keras.models.Model(input,output)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
loss= tf.keras.losses.CategoricalCrossentropy(),
metrics=[tf.keras.metrics.CategoricalAccuracy(),
tf.keras.metrics.FalseNegatives(),
tf.keras.metrics.AUC(name='prc', curve='PR')])
return model
model = make_model()
model.fit(x=train_features, y=train_labels, epochs=2)
where:
model.summary()
print(train_features.shape, train_labels.shape)
outputs the following:
Model: "model_8"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_9 (InputLayer) [(None, 17)] 0
flatten_8 (Flatten) (None, 17) 0
dense_32 (Dense) (None, 128) 2304
dense_33 (Dense) (None, 64) 8256
dense_34 (Dense) (None, 32) 2080
dense_35 (Dense) (None, 8) 264
=================================================================
Total params: 12,904
Trainable params: 12,904
Non-trainable params: 0
_________________________________________________________________
Train Features Shape: (64140, 17)
Train Labels Shape: (64140, 8)
However, it keeps getting this error mid-epoch:
Epoch 1/2
444/2005 [=====>........................] - ETA: 1s - loss: 1.1139 - categorical_accuracy: 0.4904 - false_negatives_6: 10143.0000 - prc: 0.5232
Output exceeds the size limit. Open the full output data in a text editor
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
c:\Users\nrtc\OneDrive\Documentos\AI Summer School\Competition\Cópia_de_imbalanced_data.ipynb Cell 34' in <module>
----> 1 model.fit(x=train_features, y=train_labels, epochs=2)
File c:\Python39\lib\site-packages\keras\utils\traceback_utils.py:67, in filter_traceback.<locals>.error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
File ~\AppData\Roaming\Python\Python39\site-packages\tensorflow\python\eager\execute.py:54, in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
52 try:
53 ctx.ensure_initialized()
---> 54 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
55 inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57 if name is not None:
InvalidArgumentError: Graph execution error:
Detected at node 'assert_greater_equal/Assert/AssertGuard/Assert' defined at (most recent call last):
File "c:\Python39\lib\runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
...
File "c:\Python39\lib\site-packages\keras\utils\metrics_utils.py", line 602, in update_confusion_matrix_variables
tf.debugging.assert_greater_equal(
Node: 'assert_greater_equal/Assert/AssertGuard/Assert'
assertion failed: [predictions must be >= 0] [Condition x >= y did not hold element-wise:] [x (model_6/dense_27/Softmax:0) = ] [[0.101199746 0.358387947 0.118633337...]...] [y (Cast_4/x:0) = ] [0]
[[{{node assert_greater_equal/Assert/AssertGuard/Assert}}]] [Op:__inference_train_function_10341]
Any idea what might be the error? I've seen other threads for the same error (*), but I do think that the last layer is correctly set up for 8 output labels.
*Other stack overflow threads for a similar problem
https://stackoverflow.com/questions/62606345/tensorflow-2-2-0-error-predictions-must-be-0-condition-x-y-did-not-hold
https://stackoverflow.com/questions/71153492/invalid-argument-error-graph-execution-error

Some labels on the training set are NaN values, so plotting the labels does not make the error clear.
train_df.dropna(inplace=True)
does the trick.

Related

Internal Error: EagerConst: Dst tensor is not initialized

I have tried to use efficient net as shown below but before training model throws an error as mentioned after the code
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB0
NUM_CLASSES = 5
IMG_SIZE = 224
size = (IMG_SIZE, IMG_SIZE)
inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
# Using model without transfer learning
outputs = EfficientNetB0(include_top=True, weights=None, classes=NUM_CLASSES)(inputs)
model = tf.keras.Model(inputs, outputs)
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"] )
model.summary()
hist = model.fit(train_x, train_y, epochs=30,batch_size=5, verbose=2)
The output and error of the above code is as follows
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 224, 224, 3)] 0
efficientnetb0 (Functional) (None, 5) 4055976
=================================================================
Total params: 4,055,976
Trainable params: 4,013,953
Non-trainable params: 42,023
_________________________________________________________________
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_400/3980470361.py in <module>
5 model.summary()
6
----> 7 hist = model.fit(train_x, train_y, epochs=30,batch_size=5, verbose=2)
c:\Users\Hp\anaconda3\envs\myenv\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
c:\Users\Hp\anaconda3\envs\myenv\lib\site-packages\tensorflow\python\framework\constant_op.py in convert_to_eager_tensor(value, ctx, dtype)
104 dtype = dtypes.as_dtype(dtype).as_datatype_enum
105 ctx.ensure_initialized()
--> 106 return ops.EagerTensor(value, ctx.device_name, dtype)
107
108
InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.
I have tried this code after clearing RAM also but no progress

Keras ValueError: Dimensions must be equal LSTM

I'm creating a Bidirectional LSTM but I faced following error
ValueError: Dimensions must be equal, but are 5 and 250 for '{{node Equal}} = Equal[T=DT_INT64, incompatible_shape_error=true](ArgMax, ArgMax_1)' with input shapes: [?,5], [?,250]
I have no idea what is wrong and how to fix it!
I have a text dataset with 59k row for train the model and i would divid them into 15 classes which then I would use for text similarity base on classes for the received new text.
Based on the other post I played with loss but still it doesn't solve the issue.
Here is the model plot:
Also sequential model would be as follow:
model_lstm = Sequential()
model_lstm.add(InputLayer(250,))
model_lstm.add(Embedding(input_dim=max_words+1, output_dim=200, weights=[embedding_matrix],
mask_zero=True, trainable= True, name='corpus_embed'))
enc_lstm = Bidirectional(LSTM(128, activation='sigmoid', return_sequences=True, name='LSTM_Encod'))
model_lstm.add(enc_lstm)
model_lstm.add(Dropout(0.25))
model_lstm.add(Bidirectional(LSTM( 128, activation='sigmoid',dropout=0.25, return_sequences=True, name='LSTM_Decod')))
model_lstm.add(Dropout(0.25))
model_lstm.add(Dense(15, activation='softmax'))
model_lstm.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['Accuracy'])
## Feed the model
history = model_lstm.fit(x=corpus_seq_train,
y=target_seq_train,
batch_size=128,
epochs=50,
validation_data=(corpus_seq_test,target_seq_test),
callbacks=[tensorboard],
sample_weight= sample_wt_mat)
This is the model summary:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
corpus_embed (Embedding) (None, 250, 200) 4000200
bidirectional (Bidirectiona (None, 250, 256) 336896
l)
dropout (Dropout) (None, 250, 256) 0
bidirectional_1 (Bidirectio (None, 250, 256) 394240
nal)
dropout_1 (Dropout) (None, 250, 256) 0
dense (Dense) (None, 250, 15) 3855
=================================================================
Total params: 4,735,191
Trainable params: 4,735,191
Non-trainable params: 0
_________________________________
and dataset shape:
corpus_seq_train.shape, target_seq_train.shape
((59597, 250), (59597, 5, 8205))
Finally, here is the error:
Epoch 1/50
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
C:\Users\AMIRSH~1\AppData\Local\Temp/ipykernel_10004/3838451254.py in <module>
9 ## Feed the model
10
---> 11 history = model_lstm.fit(x=corpus_seq_train,
12 y=target_seq_train,
13 batch_size=128,
C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py in tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
ValueError: in user code:
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1051, in train_function *
return step_function(self, iterator)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1040, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1030, in run_step **
outputs = model.train_step(data)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 894, in train_step
return self.compute_metrics(x, y, y_pred, sample_weight)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 987, in compute_metrics
self.compiled_metrics.update_state(y, y_pred, sample_weight)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\compile_utils.py", line 501, in update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\metrics_utils.py", line 70, in decorated
update_op = update_state_fn(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\metrics\base_metric.py", line 140, in update_state_fn
return ag_update_state(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\metrics\base_metric.py", line 646, in update_state **
matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\metrics\metrics.py", line 3295, in categorical_accuracy
return metrics_utils.sparse_categorical_matches(
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\metrics_utils.py", line 893, in sparse_categorical_matches
matches = tf.cast(tf.equal(y_true, y_pred), backend.floatx())
ValueError: Dimensions must be equal, but are 5 and 250 for '{{node Equal}} = Equal[T=DT_INT64, incompatible_shape_error=true](ArgMax, ArgMax_1)' with input shapes: [?,5], [?,250].
the problem is because of the Loss function and y-label shape.
we should not pad y_label and it should fit the model directly without any other process

tensorflow: error with parallel channel model

I'm debugging a model I created to accept a variable number of input channels (each channel is an RGB image). I suspect that not all the channels are properly connected.
Model code:
IMG_SHAPE = (160, 160, 3)
def get_ch_model_simple():
i_input = tf.keras.Input(shape=IMG_SHAPE)
# scale pixels to float
x = tf.keras.layers.Rescaling(1.0 / 255)(i_input)
x = tf.keras.layers.Conv2D(32, kernel_size=(3,3), activation="relu")(x)
x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x)
return tf.keras.Model(i_input, x)
def get_model(n_chan=2):
inputs = tf.keras.Input(shape=(n_chan, 160, 160, 3))
ch_features = []
for ch in range(n_chan):
ch_model = get_ch_model_simple()
# select specific channel
ch_model_input = inputs[:,ch,:,:,:]
i_ch_features = tf.keras.layers.Flatten()(ch_model(ch_model_input))
i_ch_features = tf.keras.layers.Dropout(0.5)(i_ch_features)
ch_features.append(i_ch_features)
all_ch_features = tf.keras.layers.concatenate(ch_features)
outputs = tf.keras.layers.Dense(2, activation = "softmax")(all_ch_features)
return tf.keras.Model(inputs, outputs)
Checking the model:
m = get_model(n_chan=2)
m.summary()
Summary output:
Model: "model_49"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_25 (InputLayer) [(None, 2, 160, 160 0 []
, 3)]
tf.__operators__.getitem_16 (S (None, 160, 160, 3) 0 ['input_25[0][0]']
licingOpLambda)
tf.__operators__.getitem_17 (S (None, 160, 160, 3) 0 ['input_25[0][0]']
licingOpLambda)
model_47 (Functional) (None, 79, 79, 32) 896 ['tf.__operators__.getitem_16[0][
0]']
model_48 (Functional) (None, 79, 79, 32) 896 ['tf.__operators__.getitem_17[0][
0]']
flatten_19 (Flatten) (None, 199712) 0 ['model_47[0][0]']
flatten_20 (Flatten) (None, 199712) 0 ['model_48[0][0]']
dropout_19 (Dropout) (None, 199712) 0 ['flatten_19[0][0]']
dropout_20 (Dropout) (None, 199712) 0 ['flatten_20[0][0]']
concatenate_8 (Concatenate) (None, 399424) 0 ['dropout_19[0][0]',
'dropout_20[0][0]']
dense_10 (Dense) (None, 2) 798850 ['concatenate_8[0][0]']
==================================================================================================
Total params: 800,642
Trainable params: 800,642
Non-trainable params: 0
__________________________________________________________________________________________________
I'm concerned that the 2 slicing operators are connected to input_25[0][0], they seem to be getting the same channel slice instead of getting a different channel each.
In addition if I try to create a submodel to check the ch_model input and output I get error:
r_m = tf.keras.Model(model.inputs, model.layers[3].input) works fine
however r_m2 = tf.keras.Model(model.inputs, model.layers[3].output) errors out:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [175], in <cell line: 1>()
----> 1 r_m2 = tf.keras.Model(model.inputs, model.layers[3].output)
File ~/PycharmProjects/venv39/lib/python3.9/site-packages/tensorflow/python/training/tracking/base.py:629, in no_automatic_dependency_tracking.<locals>._method_wrapper(self, *args, **kwargs)
627 self._self_setattr_tracking = False # pylint: disable=protected-access
628 try:
--> 629 result = method(self, *args, **kwargs)
630 finally:
631 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
File ~/PycharmProjects/venv39/lib/python3.9/site-packages/keras/engine/functional.py:146, in Functional.__init__(self, inputs, outputs, name, trainable, **kwargs)
143 if not all([functional_utils.is_input_keras_tensor(t)
144 for t in tf.nest.flatten(inputs)]):
145 inputs, outputs = functional_utils.clone_graph_nodes(inputs, outputs)
--> 146 self._init_graph_network(inputs, outputs)
File ~/PycharmProjects/venv39/lib/python3.9/site-packages/tensorflow/python/training/tracking/base.py:629, in no_automatic_dependency_tracking.<locals>._method_wrapper(self, *args, **kwargs)
627 self._self_setattr_tracking = False # pylint: disable=protected-access
628 try:
--> 629 result = method(self, *args, **kwargs)
630 finally:
631 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
File ~/PycharmProjects/venv39/lib/python3.9/site-packages/keras/engine/functional.py:229, in Functional._init_graph_network(self, inputs, outputs)
226 self._input_coordinates.append((layer, node_index, tensor_index))
228 # Keep track of the network's nodes and layers.
--> 229 nodes, nodes_by_depth, layers, _ = _map_graph_network(
230 self.inputs, self.outputs)
231 self._network_nodes = nodes
232 self._nodes_by_depth = nodes_by_depth
File ~/PycharmProjects/venv39/lib/python3.9/site-packages/keras/engine/functional.py:1036, in _map_graph_network(inputs, outputs)
1034 for x in tf.nest.flatten(node.keras_inputs):
1035 if id(x) not in computable_tensors:
-> 1036 raise ValueError(
1037 f'Graph disconnected: cannot obtain value for tensor {x} '
1038 f'at layer "{layer.name}". The following previous layers '
1039 f'were accessed without issue: {layers_with_complete_input}')
1040 for x in tf.nest.flatten(node.outputs):
1041 computable_tensors.add(id(x))
ValueError: Graph disconnected: cannot obtain value for tensor KerasTensor(type_spec=TensorSpec(shape=(None, 160, 160, 3), dtype=tf.float32, name='input_8'), name='input_8', description="created by layer 'input_8'") at layer "rescaling_4". The following previous layers were accessed without issue: []

image captioning with deep learning

can anyone help me ..where is the error here?? can anyone help me please. i am trying to train a model for image captioning in persian language.i try to build my model .. below is the code and the model summary.....................
`embeddings_dim = 256
input_image_dim = X_train_image.shape[1]
keras.backend.clear_session()
# image model
input_image = keras.layers.Input(shape=(input_image_dim,), name='input_image')
#input_image_dropout = keras.layers.Dropout(0.4)(input_image)
image_embeddings = keras.layers.Dense(embeddings_dim, activation='tanh', name='image_embeddings') (input_image)
# text model
# Set up the decoder, using `image_embeddings` as initial state.
decoder_inputs = keras.layers.Input(shape=(max_len,))
embeddings = keras.layers.Embedding(len(word2index), embeddings_dim, mask_zero=True)(decoder_inputs)
#embeddings_dropout = keras.layers.Dropout(0.3)(embeddings)
gru = keras.layers.GRU(embeddings_dim)(embeddings, initial_state=image_embeddings) # , return_sequences=True
#flat = keras.layers.Flatten()(gru)
dense = keras.layers.Dense(embeddings_dim, activation='relu')(gru)
#dense_dropout = keras.layers.Dropout(0.3)(dense)
decoder_outputs = keras.layers.Dense(len(word2index), activation='softmax')(dense)
seq2seq = keras.Model([input_image, decoder_inputs], decoder_outputs)
seq2seq.summary()
# prepare callback
#histories = My_Callback()
model_checkpoint_path = 'models/model.{epoch:02d}-{val_loss:.3f}--{b1:.3f}.hdf5'
checkpoint_callback = keras.callbacks.ModelCheckpoint(model_checkpoint_path, monitor='val_loss', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)
callbacks = [TQDMNotebookCallback(), Bleu_Callback(), checkpoint_callback] #[checkpoint_callback, TQDMNotebookCallback(), My_Callback()]
seq2seq.compile(optimizer=keras.optimizers.Adam(), loss='categorical_crossentropy')
seq2seq.fit([X_train_image, X_train_text], y_train_text,
validation_data=([X_test_image, X_test_text], y_test_text),
batch_size=1024,
epochs=10,
verbose=2,
callbacks=callbacks) # add My_Callback() to callbacks to calculate & display BLEU score after each epoch`
this is the error after running my code .. gives me KeyError: 'metrics'... thanks
` Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 20)] 0 []
input_image (InputLayer) [(None, 1000)] 0 []
embedding (Embedding) (None, 20, 256) 730112 ['input_1[0][0]']
image_embeddings (Dense) (None, 256) 256256 ['input_image[0][0]']
gru (GRU) (None, 256) 394752 ['embedding[0][0]',
'image_embeddings[0][0]']
dense (Dense) (None, 256) 65792 ['gru[0][0]']
dense_1 (Dense) (None, 2852) 732964 ['dense[0][0]']
==================================================================================================
Total params: 2,179,876
Trainable params: 2,179,876
Non-trainable params: 0
__________________________________________________________________________________________________
WARNING:tensorflow:`period` argument is deprecated. Please use `save_freq` to specify the frequency in number of batches seen.
Training: 0%
0/20 [00:00<?, ?it/s]
Epoch 0: 0%
0/237 [00:00<?, ?it/s]
Epoch 1/20
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-32-0fc16141d8f6> in <module>
32 seq2seq.compile(optimizer=keras.optimizers.Adam(), loss='categorical_crossentropy')
33
---> 34 seq2seq.fit([X_train_image, X_train_text], y_train_text,
35 validation_data=([X_test_image, X_test_text], y_test_text),
36 batch_size=1024,
~\anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
~\anaconda3\lib\site-packages\keras_tqdm\tqdm_callback.py in append_logs(self, logs)
134
135 def append_logs(self, logs):
--> 136 metrics = self.params['metrics']
137 for metric, value in six.iteritems(logs):
138 if metric in metrics:
KeyError: 'metrics'
`

error when using Mirrored strategy in Tensorflow

I read the data and processed it using the following code :
data = pd.read_csv('Step1_output.csv')
data = data.sample(frac=1).reset_index(drop=True)
data1 = pd.DataFrame(data, columns=['Res_pair'])
# creating instance of labelencoder
labelencoder = LabelEncoder()
# Assigning numerical values and storing in another column
data1['Res_pair_ID'] = labelencoder.fit_transform(data1['Res_pair'])
data['Res_pair'] = data1['Res_pair_ID']
data = data.to_numpy()
train_X = data[0:data.shape[0],0:566]
train_y = data[0:data.shape[0],566:data.shape[1]]
train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], 1))
I build the model using following code where I have tried to distribute the dataset using mirrored strategy of Tensorflow :
print("Hyper-parameter values:\n")
print('Momentum Rate =',momentum_rate,'\n')
print('learning rate =',learning_rate,'\n')
print('Number of neurons =',neurons,'\n')
strategy = tensorflow.distribute.MirroredStrategy()
with strategy.scope():
model = tf.keras.Sequential([
tf.keras.layers.Conv1D(64,kernel_size = 3,activation='relu',input_shape=train_X.shape[1:]),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(neurons,activation='relu'),
tf.keras.layers.Dense(neurons,activation='relu'),
tf.keras.layers.Dense(neurons,activation='relu'),
tf.keras.layers.Dense(neurons,activation='relu'),
tf.keras.layers.Dense(10, activation='softmax'),])
sgd = optimizers.SGD(lr=learning_rate, decay=1e-6, momentum=momentum_rate, nesterov=True)
model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy',tensorflow.keras.metrics.Precision()])
results = model.fit(train_X,train_y,validation_split = 0.2,epochs=10,batch_size = 100)
print(results)
path = 'saved_model/'
model.save(path, save_format='tf')
for k in range(100):
momentum_rate = random.random()
learning_rate = random.uniform(0,0.2)
neurons = random.randint(10,50)
I tried to run the code on GPU but it runs for some time and then throws this error :
Hyper-parameter values:
Momentum Rate = 0.6477407029392913
learning rate = 0.03988890117492503
Number of neurons = 35
Epoch 1/10
1/270110 [..............................] - ETA: 28s - loss: nan - accuracy: 0.0100 - precision: 0.0100Traceback (most recent call last):
File "parallelised_script_realdata2.py", line 56, in <module>
results = model.fit(train_X,train_y,validation_split = 0.2,epochs=10,batch_size = 100)
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 108, in _method_wrapper
return method(self, *args, **kwargs)
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 1098, in fit
tmp_logs = train_function(iterator)
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 780, in __call__
result = self._call(*args, **kwds)
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 807, in _call
return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/function.py", line 2829, in __call__
return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/function.py", line 1848, in _filtered_call
cancellation_manager=cancellation_manager)
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/function.py", line 1924, in _call_flat
ctx, args, cancellation_manager=cancellation_manager))
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/function.py", line 550, in call
ctx=ctx)
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/execute.py", line 60, in quick_execute
inputs, attrs, num_outputs)
tensorflow.python.framework.errors_impl.InvalidArgumentError: assertion failed: [predictions must be >= 0] [Condition x >= y did not hold element-wise:] [x (sequential/dense_4/Softmax:0) = ] [[nan nan nan...]...] [y (Cast_6/x:0) = ] [0]
[[{{node assert_greater_equal/Assert/AssertGuard/else/_21/assert_greater_equal/Assert/AssertGuard/Assert}}]] [Op:__inference_train_function_1270]
Function call stack:
train_function
Update: The code works well if I don't use strategy = tensorflow.distribute.MirroredStrategy(). Like the code below (but will fail for larger datasets for memory shortage):
def convolutional_neural_network(x, y):
print("Hyper-parameter values:\n")
print('Momentum Rate =',momentum_rate,'\n')
print('learning rate =',learning_rate,'\n')
print('Number of neurons =',neurons,'\n')
model = Sequential()
model.add(Conv1D(filters=64,input_shape=train_X.shape[1:],activation='relu',kernel_size = 3))
model.add(Flatten())
model.add(Dense(neurons,activation='relu')) # first hidden layer
model.add(Dense(neurons, activation='relu')) # second hidden layer
model.add(Dense(neurons, activation='relu'))
model.add(Dense(neurons, activation='relu'))
model.add(Dense(10, activation='softmax'))
sgd = optimizers.SGD(lr=learning_rate, decay=1e-6, momentum=momentum_rate, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy',tensorflow.keras.metrics.Precision()])
history = model.fit(train_X, train_y, validation_split=0.2, epochs=10, batch_size=100)
momentum_rate = 0.09
learning_rate = 0.01
neurons = 40
print(convolutional_neural_network(train_X, train_y))
Update 2: Still facing a similar issue with smaller dataset
_________________________________________________________________
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d (Conv1D) (None, 564, 64) 256
_________________________________________________________________
flatten (Flatten) (None, 36096) 0
_________________________________________________________________
dense (Dense) (None, 50) 1804850
_________________________________________________________________
dense_1 (Dense) (None, 50) 2550
_________________________________________________________________
dense_2 (Dense) (None, 50) 2550
_________________________________________________________________
dense_3 (Dense) (None, 50) 2550
_________________________________________________________________
dense_4 (Dense) (None, 10) 510
=================================================================
Total params: 1,813,266
Trainable params: 1,813,266
Non-trainable params: 0
The model definition seems fine, so does the strategy.
Can you just verify train_Y for sanity check? Mostly I'm sure the error lies there.
If that's not the case, try running model.fit and latter ones outside the scope.

Categories

Resources