Related
I have a simple 2 layer Tensorflow model that I am trying to train on a dataset of equal-sized stereo audio files to tell me if the sound is coming more from the left side or the right side. This means the input is an array of 3072 by 2 arrays and the output is an array of 1's and 0's to represent left and right.
The problem is that when I run the program, it fails at model.fit() with an invalid argument error.
Code:
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 18 15:51:56 2022
#author: andre
"""
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from datetime import datetime
from sklearn import metrics
from scipy.io import wavfile
import os
import glob
# Load in Right Side .WAV Data.
X1 = []
count1 = 0
database_path = "C:\\Users\\andre\\OneDrive\\Documents\\ESI2022\\MLDatabases\\Right\\"
for filename in glob.glob(os.path.join(database_path, '*.wav')):
X1.append(wavfile.read(filename)[1])
count1 = count1 + 1
# Load in Left side .WAV Data.
X2 = []
count2 = 0
database_path2 = "C:\\Users\\andre\\OneDrive\\Documents\\ESI2022\\MLDatabases\\Right\\"
for filename2 in glob.glob(os.path.join(database_path2, '*.wav')):
X2.append(wavfile.read(filename2)[1])
count2 = count2 + 1
# Get the smallest size audio file (this will be sample size input to model)
sample_size = len(X1[0])
for data in X1:
if len(data) < sample_size:
sample_size = len(data)
# Make audio data into equal size chunks
X1e = []
for i in X1:
num_chunks = len(i)//sample_size
for j in range(num_chunks):
X1e.append(i[(j+1)*sample_size-sample_size:(j+1)*sample_size])
X1 = X1e
X2e = []
for i in X2:
num_chunks = len(i)//sample_size
for j in range(num_chunks):
X2e.append(i[(j+1)*sample_size-sample_size:(j+1)*sample_size])
X2=X2e
del X1e
del X2e
# Create Output data that is the same length as the input data.
Y1 = np.ones([X1.__len__()],dtype='float32').tolist()
Y2 = np.zeros([X2.__len__()],dtype='float32').tolist()
# Concatenate Left and Right .WAV data and output data as numpy arrays.
X1.extend(X2)
X = np.asarray(X1)
Y = np.asarray(Y1+Y2).astype(np.int16)
#X=list(X)
#Y=list(Y)
# Split data into test training data.
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=0,shuffle=True)
'''
print(X[1])
time = np.linspace(0.,33792, 33792)
plt.plot(time, X[1][:,1], label="Left channel")
plt.plot(time, X[1][:,0], label="Right channel")
plt.legend()
plt.xlabel("Time [s]")
plt.ylabel("Amplitude")
plt.show()
'''
# Create the Model
model = Sequential()
# Add a LSTM layer with 1 output, and ambiguous input data length.
model.add(layers.LSTM(1,batch_input_shape=(1,sample_size,2),return_sequences=True))
model.add(layers.LSTM(1,return_sequences=False))
# Compile Model
#history = model.compile(loss='mean_absolute_error', metrics=['accuracy'],optimizer='adam',output='sparse_categorical_crossentropy')
optimizer = Adam(learning_rate=2*1e-4)
'''
history = model.compile(optimizer=optimizer, loss={
'output': 'sparse_categorical_crossentropy', },
metrics={
'output': 'sparse_categorical_accuracy', },
sample_weight_mode='temporal')
'''
history = model.compile(
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer="adam",
metrics=["accuracy"],
)
model.summary()
# Define Training Parameters
num_epochs = 200
num_batch_size = 1
# Save the most accurate model to file. (Verbosity Gives more information)
checkpointer = ModelCheckpoint(filepath="SavedModels/checkpointModel.hdf5", verbose=1,save_best_only=True)
# Start the timer
start = datetime.now()
# Train the model
model.fit(X_train,Y_train,batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test,Y_test), callbacks=[checkpointer],verbose=1)
# Get and Print Model Validation Accuracy
test_accuracy=model.evaluate(X_test,Y_test,verbose=0)
print(test_accuracy[1])
Output & error:
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_2 (LSTM) (1, 3072, 1) 16
lstm_3 (LSTM) (1, 1) 12
=================================================================
Total params: 28
Trainable params: 28
Non-trainable params: 0
_________________________________________________________________
Epoch 1/200
2022-02-07 09:40:36.348127: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2022-02-07 09:40:36.348459: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2022-02-07 09:40:43.978976: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'nvcuda.dll'; dlerror: nvcuda.dll not found
2022-02-07 09:40:43.979029: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-02-07 09:40:43.985710: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: DESKTOP-0FFTIDB
2022-02-07 09:40:43.986092: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: DESKTOP-0FFTIDB
2022-02-07 09:40:43.990164: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-02-07 09:40:48.470415: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at sparse_xent_op.cc:103 : INVALID_ARGUMENT: Received a label value of 1 which is outside the valid range of [0, 1). Label values: 1
2022-02-07 09:58:29.070767: W tensorflow/core/framework/op_kernel.cc:1745] OP_REQUIRES failed at sparse_xent_op.cc:103 : INVALID_ARGUMENT: Received a label value of 1 which is outside the valid range of [0, 1). Label values: 1
Traceback (most recent call last):
File "C:\Users\andre\OneDrive\Documents\ESI2022\PythonScripts\BeltML\testML.py", line 127, in <module>
model.fit(X_train,Y_train,batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test,Y_test), callbacks=[checkpointer],verbose=1)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\eager\execute.py", line 58, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
InvalidArgumentError: Received a label value of 1 which is outside the valid range of [0, 1). Label values: 1
[[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits
(defined at C:\ProgramData\Anaconda3\lib\site-packages\keras\backend.py:5113)
]] [Op:__inference_train_function_9025]
Errors may have originated from an input operation.
Input Source operations connected to node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits:
In[0] sparse_categorical_crossentropy/Reshape_1 (defined at C:\ProgramData\Anaconda3\lib\site-packages\keras\backend.py:5109)
In[1] sparse_categorical_crossentropy/Reshape (defined at C:\ProgramData\Anaconda3\lib\site-packages\keras\backend.py:3561)
Operation defined at: (most recent call last)
File "C:\ProgramData\Anaconda3\lib\runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\ProgramData\Anaconda3\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "C:\ProgramData\Anaconda3\lib\site-packages\spyder_kernels\console\__main__.py", line 23, in <module>
start.main()
File "C:\ProgramData\Anaconda3\lib\site-packages\spyder_kernels\console\start.py", line 328, in main
kernel.start()
File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
self.io_loop.start()
File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
self.asyncio_loop.run_forever()
File "C:\ProgramData\Anaconda3\lib\asyncio\base_events.py", line 570, in run_forever
self._run_once()
File "C:\ProgramData\Anaconda3\lib\asyncio\base_events.py", line 1859, in _run_once
handle._run()
File "C:\ProgramData\Anaconda3\lib\asyncio\events.py", line 81, in _run
self._context.run(self._callback, *self._args)
File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
await self.process_one()
File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
await dispatch(*args)
File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
await result
File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
reply_content = await reply_content
File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2901, in run_cell
result = self._run_cell(
File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2947, in _run_cell
return runner(coro)
File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
coro.send(None)
File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3172, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3364, in run_ast_nodes
if (await self.run_code(code, result, async_=asy)):
File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "C:\Users\andre\AppData\Local\Temp/ipykernel_3604/1229251547.py", line 1, in <module>
runfile('C:/Users/andre/OneDrive/Documents/ESI2022/PythonScripts/BeltML/testML.py', wdir='C:/Users/andre/OneDrive/Documents/ESI2022/PythonScripts/BeltML')
File "C:\ProgramData\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 577, in runfile
exec_code(file_code, filename, ns_globals, ns_locals,
File "C:\ProgramData\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 465, in exec_code
exec(compiled, ns_globals, ns_locals)
File "C:\Users\andre\OneDrive\Documents\ESI2022\PythonScripts\BeltML\testML.py", line 127, in <module>
model.fit(X_train,Y_train,batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test,Y_test), callbacks=[checkpointer],verbose=1)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
return fn(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 1216, in fit
tmp_logs = self.train_function(iterator)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 878, in train_function
return step_function(self, iterator)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 867, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 860, in run_step
outputs = model.train_step(data)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 809, in train_step
loss = self.compiled_loss(
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\losses.py", line 141, in __call__
losses = call_fn(y_true, y_pred)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\losses.py", line 245, in call
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\losses.py", line 1737, in sparse_categorical_crossentropy
return backend.sparse_categorical_crossentropy(
File "C:\ProgramData\Anaconda3\lib\site-packages\keras\backend.py", line 5113, in sparse_categorical_crossentropy
res = tf.nn.sparse_softmax_cross_entropy_with_logits(
According to the documentation, the argument labels must be a batch_size vector with values in [0, num_classes)
From your logs:
received label value of 1 which is outside the valid range of [0, 1)
Perhaps the framework thinks that you have only one class because I also see that your Neural Network also has just 1 output.
Maybe for applying that SparseSoftmaxCrossEntropyWithLogits loss function you need 2 outputs. And your labels must be either 0 or 1.
I'm trying to use a sigmoid to join the output of two models with different embedding matrix. but I keep getting the error at the concatenate line. I have tried other suggestions from similar questions but it keeps giving the same error. I feel I'm missing something but I can't find it. please help explain. Thanks
############################ MODEL 1 ######################################
input_tensor=Input(shape=(35,))
input_layer= Embedding(vocab_size, 300, input_length=35, weights=[embedding_matrix],trainable=True)(input_tensor)
conv_blocks = []
filter_sizes = (2,3,4)
for fx in filter_sizes:
conv_layer= Conv1D(100, kernel_size=fx, activation='relu', data_format='channels_first')(input_layer) #filters=100, kernel_size=3
maxpool_layer = MaxPooling1D(pool_size=4)(conv_layer)
flat_layer= Flatten()(maxpool_layer)
conv_blocks.append(flat_layer)
conc_layer=concatenate(conv_blocks, axis=1)
graph = Model(inputs=input_tensor, outputs=conc_layer)
model = Sequential()
model.add(graph)
model.add(Dropout(0.2))
############################ MODEL 2 ######################################
input_tensor_1=Input(shape=(35,))
input_layer_1= Embedding(vocab_size, 300, input_length=35, weights=[embedding_matrix_1],trainable=True)(input_tensor_1)
conv_blocks_1 = []
filter_sizes_1 = (2,3,4)
for fx in filter_sizes_1:
conv_layer_1= Conv1D(100, kernel_size=fx, activation='relu', data_format='channels_first')(input_layer_1) #filters=100, kernel_size=3
maxpool_layer_1 = MaxPooling1D(pool_size=4)(conv_layer_1)
flat_layer_1= Flatten()(maxpool_layer_1)
conv_blocks_1.append(flat_layer_1)
conc_layer_1=concatenate(conv_blocks_1, axis=1)
graph_1 = Model(inputs=input_tensor_1, outputs=conc_layer_1)
model_1 = Sequential()
model_1.add(graph_1)
model_1.add(Dropout(0.2))
fused = concatenate([graph, graph_1], axis=-1)
prediction = Dense(3, activation='sigmoid')(fused)
model = Model(inputs=[input_tensor,input_tensor_1], outputs=[prediction])
model.compile(loss='sparse_categorical_crossentropy',optimizer='Adagrad', metrics=['accuracy'])
model.summary()
This is the error trace
Traceback (most recent call last):
File "DL_Ensemble.py", line 145, in <module>
fused = concatenate([graph, graph_1], axis= 1 )
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/keras/layers/merge.py", line 705, in concatenate
return Concatenate(axis=axis, **kwargs)(inputs)
File "/usr/pkg/lib/python3.8/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 887, in __call__
self._maybe_build(inputs)
File "/usr/pkg/lib/python3.8/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 2141, in _maybe_build
self.build(input_shapes)
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/keras/utils/tf_utils.py", line 306, in wrapper
output_shape = fn(instance, input_shape)
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/keras/layers/merge.py", line 378, in build
raise ValueError('A `Concatenate` layer should be called '
ValueError: A `Concatenate` layer should be called on a list of at least 2 inputs
UPDATE: I have reflected the answers given by #VivekMehta, however, I have this error.
File "DL_Ensemble.py", line 165, in <module>
model.fit([train_sequences,train_sequences], train_y, epochs=10,
verbose=False, batch_size=32, class_weight={0: 6.0, 1: 1.0, 2: 2.0})
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/keras/engine/training.py", line 709, in fit
return func.fit(
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/keras/engine/training_v2.py", line 313, in fit
training_result = run_one_epoch(
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/keras/engine/training_v2.py", line 123, in run_one_epoch
batch_outs = execution_function(iterator)
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/keras/engine/training_v2_utils.py",
line
86, in execution_function
distributed_function(input_fn))
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/eager/def_function.py", line 457, in __call__
result = self._call(*args, **kwds)
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/eager/def_function.py", line 520, in _call
return self._stateless_fn(*args, **kwds)
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/eager/function.py", line 1823, in __call__
return graph_function._filtered_call(args, kwargs) # pylint:
disable=protected-access
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/eager/function.py", line 1137, in _filtered_call
return self._call_flat(
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/eager/function.py", line 1223, in _call_flat
flat_outputs = forward_function.call(
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/eager/function.py", line 506, in call
outputs = execute.execute(
File "/usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/eager/execute.py", line 67, in quick_execute
six.raise_from(core._status_to_exception(e.code, message), None)
File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.InvalidArgumentError:
Conv2DCustomBackpropInputOp only supports NHWC.
[[node Conv2DBackpropInput (defined at /usr/pkg/lib/python3.8/site-
packages/tensorflow_core/python/framework/ops.py:1751) ]] [Op:__inference_distributed_function_2250]
Function call stack:
distributed_function
I also wanted to add that when the code is run on a GPU as opposed to a CPU, the error occurs on the same line as before but the message changes to :
File "DL_Ensemble.py", line 166, in <module>
model.fit([train_sequences,train_sequences], train_y, epochs=10, verbose=False, batch_size=32, class_weight={0: 6.0, 1: 1.0, 2: 2.0})
File "/home/kosimadukwe/.local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 880, in fit
validation_steps=validation_steps)
File "/home/kosimadukwe/.local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_arrays.py", line 329, in model_iteration
batch_outs = f(ins_batch)
File "/home/kosimadukwe/.local/lib/python3.7/site-packages/tensorflow/python/keras/backend.py", line 3073, in __call__
self._make_callable(feed_arrays, feed_symbols, symbol_vals, session)
File "/home/kosimadukwe/.local/lib/python3.7/site-packages/tensorflow/python/keras/backend.py", line 3019, in _make_callable
callable_fn = session._make_callable_from_options(callable_opts)
File "/home/kosimadukwe/.local/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1471, in _make_callable_from_options
return BaseSession._Callable(self, callable_options)
File "/home/kosimadukwe/.local/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1425, in __init__
session._session, options_ptr, status)
File "/home/kosimadukwe/.local/lib/python3.7/site-packages/tensorflow/python/framework/errors_impl.py", line 528, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Conv2DCustomBackpropInputOp only supports NHWC.
[[{{node training/Adagrad/gradients/conv1d_5/conv1d/Conv2D_grad/Conv2DBackpropInput}}]]
Exception ignored in: <function BaseSession._Callable.__del__ at 0x7fe4dd06a730>
Traceback (most recent call last):
File "/home/kosimadukwe/.local/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1455, in __del__
self._session._session, self._handle, status)
File "/home/kosimadukwe/.local/lib/python3.7/site-packages/tensorflow/python/framework/errors_impl.py", line 528, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: No such callable handle: 94697914208640
So from you stack trace, code is throwing error at:
fused = concatenate([graph, graph_1], axis= 1 )
print(type(graph))
# output: <class 'tensorflow.python.keras.engine.training.Model'>
This error is coming because concatenate expects list of tensors to be concatenated. While you are passing graph and graph_1 which is not tensor but a Model instance.
So from your code I assume that you want to concatenate output of these two models. In that case you'll have to change above line to:
fused = concatenate([graph.outputs[0], graph_1.outputs[0]], axis=-1)
Here, graph.outputs gives list of outputs by given by Model. Since each model is giving us one output, we will take 0th index from each output.
Change this part and you'll get model summary as you are expecting.
I want to pass an intermediate tensor result into a fully connected neural network inside the body of a while-loop.
My issues are with the instantiating of said neural network.
My first try involved making a placeholder for the NN, then feed it using a Session instance. It does not work, because the feed_dict doesn't accept tensors. Fair enough, given the data-flow nature of the graph.
On my second attempt, I instantiated my NN inside the loop body and pass the intermediate tensor directly.
However, when I do so, the following stacktrace shows up:
Caused by op u'net/fc1_W/read', defined at:
File "./main.py", line 176, in <module>
offline_indexing(sys.argv[1])
File "./main.py", line 128, in offline_indexing
test.run(a, f)
File "/home/lsv/Desktop/gitlab/Gencoding/test.py", line 86, in run
print sess.run(graph_embed(), feed_dict={adj: x, features: y})
File "/home/lsv/Desktop/gitlab/Gencoding/test.py", line 73, in graph_embed
final_mus = tf.while_loop(cond, body, [mus, features, adj, 0])[0]
File "/home/lsv/.local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 3291, in while_loop
return_same_structure)
File "/home/lsv/.local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 3004, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/lsv/.local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2939, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/lsv/Desktop/gitlab/Gencoding/test.py", line 63, in body
h2 = fc_NN(mu_neigh_sum)
File "/home/lsv/Desktop/gitlab/Gencoding/test.py", line 53, in fc_NN
fc1 = fc_layer(ac2, 64, "fc1")
File "/home/lsv/Desktop/gitlab/Gencoding/test.py", line 43, in fc_layer
W = tf.get_variable(name+'_W', dtype=tf.float32, shape=[embedding_size, embedding_size], initializer=initer)
File "/home/lsv/.local/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 1487, in get_variable
aggregation=aggregation)
File "/home/lsv/.local/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 1237, in get_variable
aggregation=aggregation)
File "/home/lsv/.local/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 540, in get_variable
aggregation=aggregation)
File "/home/lsv/.local/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 492, in _true_getter
aggregation=aggregation)
File "/home/lsv/.local/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 922, in _get_single_variable
aggregation=aggregation)
File "/home/lsv/.local/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 183, in __call__
return cls._variable_v1_call(*args, **kwargs)
File "/home/lsv/.local/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 146, in _variable_v1_call
aggregation=aggregation)
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value net/fc1_W
[[node net/fc1_W/read (defined at /home/lsv/Desktop/gitlab/Gencoding/test.py:43) = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](net/fc1_W)]]
Note that the use of Dense layers result in the same error as well.
Here is my code:
def fc_layer(bottom, n_weight, name):
assert len(bottom.get_shape()) == 2
n_prev_weight = bottom.get_shape()[1]
initer = tf.truncated_normal_initializer(stddev=0.01)
W = tf.get_variable(name+'_W', dtype=tf.float32, shape=[embedding_size, embedding_size], initializer=initer)
b = tf.get_variable(name+'_b', dtype=tf.float32, shape=[embedding_size], initializer=tf.zeros_initializer)
fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
return fc
def fc_NN(x):
fc2 = fc_layer(x, 64, "fc2")
ac2 = tf.nn.relu(fc2)
fc1 = fc_layer(ac2, 64, "fc1")
return fc1
fcnn = fc_NN()
def cond(m, f, a, i):
return tf.less(i, T)
def body(m, f, a, i):
mu_neigh_sum = tf.tensordot(a, m, 1)
h1 = tf.matmul(f, W1)
# First incriminating try - with x as a placeholder
h2 = tf.Session().run(fcnn, {x: mu_neigh_sum})
# Second incriminating try - with x as a tensor
h2bis = fc_NN(mu_neigh_sum)
return tf.tanh(h1 + h2), f, a, i+1
final_mus = tf.while_loop(cond, body, [mus, features, adj, 0])[0]
I was trying to build multi-scale object detection inference code based on Tensorflow Object Detection API. However I don't quite know how to get around with variable reuse issue when predicting boxes on difference scale of a same image in one session. Here's what I did and where I don't understand:
In https://github.com/tensorflow/models/blob/master/research/object_detection/evaluator.py, I duplicate the line 'prediction_dict = model.predict(preprocessed_image, true_image_shapes)' one more time as you can see below.
def _extract_predictions_and_losses(model,
create_input_dict_fn,
ignore_groundtruth=False):
"""Constructs tensorflow detection graph and returns output tensors.
Args:
model: model to perform predictions with.
create_input_dict_fn: function to create input tensor dictionaries.
ignore_groundtruth: whether groundtruth should be ignored.
Returns:
prediction_groundtruth_dict: A dictionary with postprocessed tensors (keyed
by standard_fields.DetectionResultsFields) and optional groundtruth
tensors (keyed by standard_fields.InputDataFields).
losses_dict: A dictionary containing detection losses. This is empty when
ignore_groundtruth is true.
"""
input_dict = create_input_dict_fn()
prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
input_dict = prefetch_queue.dequeue()
original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
preprocessed_image, true_image_shapes = model.preprocess(
tf.to_float(original_image))
prediction_dict1 = model.predict(preprocessed_image, true_image_shapes)
/****Some code to resize preprocessed_image****/
prediction_dict2 = model.predict(preprocessed_image, true_image_shapes)
detections = model.postprocess(prediction_dict, true_image_shapes)
groundtruth = None
losses_dict = {}
if not ignore_groundtruth:
This gives me the following error:
Traceback (most recent call last):
File "object_detection/eval_fddb.py", line 167, in <module>
tf.app.run()
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "object_detection/eval_fddb.py", line 163, in main
FLAGS.checkpoint_dir, FLAGS.eval_dir)
File "/local/mnt/workspace/chris/projects/models/object_detection/evaluator_fddb.py", line 261, in evaluate
create_input_dict_fn=create_input_dict_fn)
File "/local/mnt/workspace/chris/projects/models/object_detection/evaluator_fddb.py", line 187, in _extract_prediction_tensors
prediction_dict = model.predict(preprocessed_image)
File "/local/mnt/workspace/chris/projects/models/object_detection/meta_architectures/umd_meta_arch.py", line 362, in predict
image_shape) = self._extract_rpn_feature_maps(preprocessed_inputs)
File "/local/mnt/workspace/chris/projects/models/object_detection/meta_architectures/umd_meta_arch.py", line 278, in _extract_rpn_feature_maps
preprocessed_inputs, scope=self.first_stage_feature_extractor_scope)
File "/local/mnt/workspace/chris/projects/models/object_detection/meta_architectures/faster_rcnn_meta_arch.py", line 154, in extract_proposal_features_w_internal_layers
return self._extract_proposal_features_w_internal_layers(preprocessed_inputs, scope)
File "/local/mnt/workspace/chris/projects/models/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py", line 173, in _extract_proposal_features_w_internal_layers
scope=var_scope)
File "/local/mnt/workspace/chris/projects/models/slim/nets/resnet_v1.py", line 300, in resnet_v1_101
reuse=reuse, scope=scope)
File "/local/mnt/workspace/chris/projects/models/slim/nets/resnet_v1.py", line 214, in resnet_v1
net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
File "/local/mnt/workspace/chris/projects/models/slim/nets/resnet_utils.py", line 122, in conv2d_same
rate=rate, padding='VALID', scope=scope)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1027, in convolution
outputs = layer.apply(inputs)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 503, in apply
return self.__call__(inputs, *args, **kwargs)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 443, in __call__
self.build(input_shapes[0])
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/python/layers/convolutional.py", line 137, in build
dtype=self.dtype)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 383, in add_variable
trainable=trainable and self.trainable)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 360, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1561, in layer_variable_getter
return _model_variable_getter(getter, *args, **kwargs)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1553, in _model_variable_getter
custom_getter=getter, use_resource=use_resource)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/framework/python/ops/variables.py", line 261, in model_variable
use_resource=use_resource)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/framework/python/ops/variables.py", line 216, in variable
use_resource=use_resource)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 664, in _get_single_variable
name, "".join(traceback.format_list(tb))))
ValueError: Variable FirstStageFeatureExtractor/resnet_v1_101/conv1/weights already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/framework/python/ops/variables.py", line 216, in variable
use_resource=use_resource)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
return func(*args, **current_args)
File "/local/mnt/workspace/chris/anaconda2/lib/python2.7/site-packages/tensorflow/contrib/framework/python/ops/variables.py", line 261, in model_variable
use_resource=use_resource)
My understanding is that when each time I call model.predict(), it creates a model with all variables. But in the second time, I can't create all the variables because all of them have been existing with the same name. I tried to add an variable scope with 'reuse=True' for the second model.predict(), but it wouldn't load the variables at all because mismatch names.
My questions is:
How to get around with this by running the same model on two different scale images in one sess.run()?
Thank you very much.
Have you tried using tf.variable_scope() before you build the model?
with tf.variable_scope('first_prediction'):
prediction_dict1 = model.predict(preprocessed_image, true_image_shapes)
with tf.variable_scope('second_prediction'):
prediction_dict2 = model.predict(preprocessed_image, true_image_shapes)
This way both model should have different prefix to their original variable name, hence preventing the reuse problem.
EDIT: After trying several things, I have added the following to my code:
with tf.Session(graph=self.graph) as session:
session.run(tf.initialize_all_variables())
try:
session.run(tf.assert_variables_initialized())
except tf.errors.FailedPreconditionError:
raise RuntimeError("Not all variables initialized!")
Now, occasionally this fails, i.e. tf.assert_variables_initialized() will raise FailedPreconditionError, even though immediately before it, tf.initialize_all_variables() was executed. Does anyone have any idea how this can happen?
Original question:
Background
I'm running cross-validated (CV) hyperparameter search on a basic neural net created through Tensorflow, with GradientDescentOptimizer. At seemingly random moments I'm getting a FailedPreconditionError, for different Variables. For example (full stack trace at end of post):
FailedPreconditionError: Attempting to use uninitialized value Variable_5
[[Node: Variable_5/read = Identity[T=DT_FLOAT, _class=["loc:#Variable_5"], _device="/job:localhost/replica:0/task:0/gpu:0"](Variable_5)]]
Some runs fail fairly fast, others not -- one has been running for 15 hours now without problems. I'm running this in parallel on multiple GPUs - not the optimization itself, but each CV fold.
What I've checked
From this and this post I understand that this error occurs when attempting to use Variables that haven't been initialized using tf.initialize_all_variables(). However, I am 99% certain that I'm doing this (and if not, I'd expect it to always fail) - I'll post code below.
The API doc says that
This exception is most commonly raised when running an operation that
reads a tf.Variable before it has been initialized.
"Most commonly" suggests that it can also be raised in different scenarios. So, for now the main question:
Question:
are there other scenarios under which this exception may be raised, and what are they?
Code
MLP class:
class MLP(object):
def __init__(self, n_in, hidden_config, n_out, optimizer, f_transfer=tf.nn.tanh, f_loss=mean_squared_error,
f_out=tf.identity, seed=None, global_step=None, graph=None, dropout_keep_ratio=1):
self.graph = tf.Graph() if graph is None else graph
# all variables defined below
with self.graph.as_default():
self.X = tf.placeholder(tf.float32, shape=(None, n_in))
self.y = tf.placeholder(tf.float32, shape=(None, n_out))
self._init_weights(n_in, hidden_config, n_out, seed)
self._init_computations(f_transfer, f_loss, f_out)
self._init_optimizer(optimizer, global_step)
def fit_validate(self, X, y, val_X, val_y, val_f, iters=100, val_step=1):
[snip]
with tf.Session(graph=self.graph) as session:
VAR INIT HERE-->tf.initialize_all_variables().run() #<-- VAR INIT HERE
for i in xrange(iters):
[snip: get minibatch here]
_, l = session.run([self.optimizer, self.loss], feed_dict={self.X:X_batch, self.y:y_batch})
# validate
if i % val_step == 0:
val_yhat = self.validation_yhat.eval(feed_dict=val_feed_dict, session=session)
As you can see, tf.init_all_variables().run() is always called before anything else is done. The net is initialized as:
def estimator_getter(params):
[snip]
graph = tf.Graph()
with graph.as_default():
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(params.get('learning_rate',0.1), global_step, decay_steps, decay_rate)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
net = MLP(config_num_inputs[config_id], hidden, 1, optimizer, seed=params.get('seed',100), global_step=global_step, graph=graph, dropout_keep_ratio=dropout)
Full example stack trace:
FailedPreconditionError: Attempting to use uninitialized value Variable_5
[[Node: Variable_5/read = Identity[T=DT_FLOAT, _class=["loc:#Variable_5"], _device="/job:localhost/replica:0/task:0/gpu:0"](Variable_5)]]
Caused by op u'Variable_5/read', defined at:
File "tf_paramsearch.py", line 373, in <module>
randomized_search_params(int(sys.argv[1]))
File "tf_paramsearch.py", line 356, in randomized_search_params
hypersearch.fit()
File "/home/centos/ODQ/main/python/odq/cv.py", line 430, in fit
return self._fit(sampled_params)
File "/home/centos/ODQ/main/python/odq/cv.py", line 190, in _fit
for train_key, test_key in self.cv)
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 766, in __call__
n_jobs = self._initialize_pool()
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 537, in _initialize_pool
self._pool = MemmapingPool(n_jobs, **poolargs)
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/site-packages/sklearn/externals/joblib/pool.py", line 580, in __init__
super(MemmapingPool, self).__init__(**poolargs)
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/site-packages/sklearn/externals/joblib/pool.py", line 418, in __init__
super(PicklingPool, self).__init__(**poolargs)
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/multiprocessing/pool.py", line 159, in __init__
self._repopulate_pool()
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/multiprocessing/pool.py", line 223, in _repopulate_pool
w.start()
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/multiprocessing/process.py", line 130, in start
self._popen = Popen(self)
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/multiprocessing/forking.py", line 126, in __init__
code = process_obj._bootstrap()
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/multiprocessing/pool.py", line 113, in worker
result = (True, func(*args, **kwds))
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 130, in __call__
return self.func(*args, **kwargs)
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py", line 72, in __call__
return [func(*args, **kwargs) for func, args, kwargs in self.items]
File "/home/centos/ODQ/main/python/odq/cv.py", line 131, in _fold_runner
estimator = estimator_getter(parameters)
File "tf_paramsearch.py", line 264, in estimator_getter
net = MLP(config_num_inputs[config_id], hidden, 1, optimizer, seed=params.get('seed',100), global_step=global_step, graph=graph, dropout_keep_ratio=dropout)
File "tf_paramsearch.py", line 86, in __init__
self._init_weights(n_in, hidden_config, n_out, seed)
File "tf_paramsearch.py", line 105, in _init_weights
self.out_weights = tf.Variable(tf.truncated_normal([hidden_config[-1], n_out], stddev=stdev))
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 206, in __init__
dtype=dtype)
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 275, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 523, in identity
return _op_def_lib.apply_op("Identity", input=input, name=name)
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/ops/op_def_library.py", line 655, in apply_op
op_def=op_def)
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2117, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/home/centos/miniconda2/envs/tensorflow/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
self._traceback = _extract_stack()
Ok, I've found the problem. There was a rare condition in my code that resulted in one of the hidden layers to be created with shape (0, N), i.e. no inputs. In this case, Tensorflow apparently fails to initialize the variables pertaining to that layer.
While this makes sense, it might be useful for Tensorflow to log a warning message in such cases (btw, I also tried to set Tensorflow logging to debug mode, but couldn't find how -- tf.logging.set_verbosity() didn't seem to have an effect).
BTW, for efficiency/less bugs, you could follow following pattern.
tf.reset_default_graph()
a = tf.constant(1)
<add more operations to your graph>
b = tf.Variable(1)
init_op = tf.initialize_all_variables()
tf.get_default_graph().finalize()
sess = tf.InteractiveSession()
sess.run(init_op)
sess.run(compute_op)
The finalize prevents you from modifying graph between runs which is slow in the current version. Also, because there's one session/one graph, you don't need with blocks.
For me the solution was
with sess.as_default():
result = compute_fn([seed_input,1])
check FailedPreconditionError: Attempting to use uninitialized in Tensorflow
for other options and my explanation
Strangely session.run() is not the same as running a function with sess.as_default(), I tried both.