I'm trying to recreate the results in the Cleaning Up Dirty Scanned Documents with Deep Learning, using the ImageDataGenerator to read in the images and model.fit_generator to train my model, but when I try to fit, I get the following error and stack trace:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-34-2dedade68c7a> in <module>()
4 epochs=epochs,
5 validation_data=validation_generator,
----> 6 validation_steps=nb_validation_samples // batch_size)
/usr/local/lib/python3.6/dist-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name + '` call to the ' +
90 'Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
/usr/local/lib/python3.6/dist-packages/keras/engine/training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
1656 use_multiprocessing=use_multiprocessing,
1657 shuffle=shuffle,
-> 1658 initial_epoch=initial_epoch)
1659
1660 #interfaces.legacy_generator_methods_support
/usr/local/lib/python3.6/dist-packages/keras/engine/training_generator.py in fit_generator(model, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
213 outs = model.train_on_batch(x, y,
214 sample_weight=sample_weight,
--> 215 class_weight=class_weight)
216
217 outs = to_list(outs)
/usr/local/lib/python3.6/dist-packages/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight)
1441 x, y,
1442 sample_weight=sample_weight,
-> 1443 class_weight=class_weight)
1444 if self._uses_dynamic_learning_phase():
1445 ins = x + y + sample_weights + [1.]
/usr/local/lib/python3.6/dist-packages/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)
793 feed_output_shapes,
794 check_batch_axis=False, # Don't enforce the batch size.
--> 795 exception_prefix='target')
796
797 # Generate sample-wise weight values given the `sample_weight` and
/usr/local/lib/python3.6/dist-packages/keras/engine/training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
139 ': expected ' + names[i] + ' to have shape ' +
140 str(shape) + ' but got array with shape ' +
--> 141 str(data_shape))
142 return data
143
ValueError: Error when checking target: expected conv2d_10 to have shape (1, 1, 1) but got array with shape (258, 540, 3)
Here is my script:
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, Activation, BatchNormalization, LeakyReLU, MaxPooling2D, UpSampling2D
import numpy as np
from keras import backend as K
img_width, img_height = 258,540
train_data_dir = 'drive/My Drive/train'
validation_data_dir = 'drive/My Drive/train_cleaned'
nb_train_samples = 144
nb_validation_samples = 144
epochs = 200
batch_size = 20
if K.image_data_format() == 'channels_first':
input_shape = (3, img_width, img_height)
else:
input_shape = (img_width, img_height, 3)
model = Sequential([
Conv2D(input_shape=input_shape, filters=64, kernel_size=(258, 540), padding='same'),
LeakyReLU(),
BatchNormalization(),
Conv2D(filters=64, kernel_size=(258, 540), padding='same'),
LeakyReLU(),
MaxPooling2D((2,2), padding='same'),
Conv2D(filters=64, kernel_size=(129, 270), padding='same'),
LeakyReLU(),
BatchNormalization(),
Conv2D(filters=64, kernel_size=(129, 270), padding='same'),
LeakyReLU(),
UpSampling2D((2, 2)),
Conv2D(filters=1, kernel_size=(258,540), activation='sigmoid')
])
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
train_datagen = ImageDataGenerator(
rescale=1. / 255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
class_mode='input',
batch_size=batch_size)
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
class_mode='input',
batch_size=batch_size)
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size)
The output of model.summary() can be found below:
Model: "sequential_2"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_6 (Conv2D) (None, 258, 540, 64) 26749504
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU) (None, 258, 540, 64) 0
_________________________________________________________________
batch_normalization_3 (Batch (None, 258, 540, 64) 256
_________________________________________________________________
conv2d_7 (Conv2D) (None, 258, 540, 64) 570654784
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU) (None, 258, 540, 64) 0
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 129, 270, 64) 0
_________________________________________________________________
conv2d_8 (Conv2D) (None, 129, 270, 64) 142663744
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU) (None, 129, 270, 64) 0
_________________________________________________________________
batch_normalization_4 (Batch (None, 129, 270, 64) 256
_________________________________________________________________
conv2d_9 (Conv2D) (None, 129, 270, 64) 142663744
_________________________________________________________________
leaky_re_lu_8 (LeakyReLU) (None, 129, 270, 64) 0
_________________________________________________________________
up_sampling2d_2 (UpSampling2 (None, 258, 540, 64) 0
_________________________________________________________________
conv2d_10 (Conv2D) (None, 1, 1, 1) 8916481
=================================================================
Total params: 891,648,769
Trainable params: 891,648,513
Non-trainable params: 256
_________________________________________________________________
None
I'm not very familiar with Keras and it seems like the error is with my last layer's input shape, but I don't know how to change the input size that's expected. I tried passing in the parameter input_shape=input_shape to the last layer and that didn't change anything.
I'd appreciate any other critique on the code that I've written, even if it's not directly related to answering my question. Thanks!
EDIT: Here is an image of the network I'm trying to recreate:
Change all these:
kernel_size=(258, 540)
to this:
kernel_size=(3, 3)
Keras is confused because your convolutional filters are the same size than your images. So after one iteration your input is effectively (1, 1, 1). That is because the filters slide onto the input image, multiplying every corresponding pixel, and then summing up. If the pixels and filters are the same size, all pixels and filter units will be multiplied, and summed to one number.
Related
I was trying to train a simple Keras network for classification when I faced the following error. I know there is something wrong with my inputs but I couldn't figure out how to fix it. Here is my code
my data set shape :
x_train : float32 0.0 1.0 (2444, 64, 64, 1)
y_train : float32 0.0 1.0 (2444, 2)
x_test : float32 0.0 1.0 (9123, 64, 64, 1)
y_test : float32 0.0 1.0 (9123, 2)
the model :
inputs = keras.Input(shape=(64,64,1), dtype='float32')
x = keras.layers.Conv2D(12,(9,9), padding="same",input_shape=(64,64,1), dtype='float32',activation='relu')(inputs)
x = keras.layers.Conv2D(18,(7,7), padding="same", activation='relu')(x)
x = keras.layers.MaxPool2D(pool_size=(2,2))(x)
x = keras.layers.Dropout(0.25)(x)
x = keras.layers.Dense(50, activation='relu')(x)
x = keras.layers.Dropout(0.4)(x)
outputs = keras.layers.Dense(2, activation='softmax')(x)
model = keras.Model(inputs, outputs)
model summary :
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 64, 64, 1)] 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 64, 64, 12) 984
_________________________________________________________________
conv2d_3 (Conv2D) (None, 64, 64, 18) 10602
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 18) 0
_________________________________________________________________
dropout_2 (Dropout) (None, 32, 32, 18) 0
_________________________________________________________________
dense_2 (Dense) (None, 32, 32, 50) 950
_________________________________________________________________
dropout_3 (Dropout) (None, 32, 32, 50) 0
_________________________________________________________________
dense_3 (Dense) (None, 32, 32, 2) 102
=================================================================
Total params: 12,638
Trainable params: 12,638
Non-trainable params: 0
________________________
compiler and fitter which error occurs when I wanna fit the model
model.compile(
loss=keras.losses.SparseCategoricalCrossentropy(),
optimizer=keras.optimizers.Adam(0.01),
metrics=["acc"],
)
model.fit(x_train, y_train, batch_size=32, epochs = 20, validation_split= 0.3,
callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)])
and finally the error:
ValueError Traceback (most recent call last)
<ipython-input-31-e4cade46a08c> in <module>()
1 model.fit(x_train, y_train, batch_size=32, epochs = 20, validation_split= 0.3,
----> 2 callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)])
9 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
992 except Exception as e: # pylint:disable=broad-except
993 if hasattr(e, "ag_error_metadata"):
--> 994 raise e.ag_error_metadata.to_exception(e)
995 else:
996 raise
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:853 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:842 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:835 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/keras/engine/training.py:792 train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
/usr/local/lib/python3.7/dist-packages/keras/engine/compile_utils.py:457 update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
/usr/local/lib/python3.7/dist-packages/keras/utils/metrics_utils.py:73 decorated
update_op = update_state_fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/metrics.py:177 update_state_fn
return ag_update_state(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/metrics.py:681 update_state **
matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/keras/metrics.py:3537 sparse_categorical_accuracy
return tf.cast(tf.equal(y_true, y_pred), backend.floatx())
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/math_ops.py:1864 equal
return gen_math_ops.equal(x, y, name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/ops/gen_math_ops.py:3219 equal
name=name)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/op_def_library.py:750 _apply_op_helper
attrs=attr_protos, op_def=op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py:601 _create_op_internal
compute_device)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:3569 _create_op_internal
op_def=op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:2042 __init__
control_input_ops, op_def)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/ops.py:1883 _create_c_op
raise ValueError(str(e))
ValueError: Dimensions must be equal, but are 2 and 32 for '{{node Equal}} = Equal[T=DT_FLOAT, incompatible_shape_error=true](IteratorGetNext:1, Cast_1)' with input shapes: [?,2], [?,32,32].
As you can see in the model summary, the output shape of the model is (None,32,32,2), while based on target values it should be (None,2), Try to add Flatten layer before Dense layers:
x = keras.layers.Dropout(0.25)(x)
x = keras.layers.Flatten()(x) # Add this
x = keras.layers.Dense(50, activation='relu')(x)
I am trying to implement an image classifier using "The Street View House Numbers (SVHN) Dataset" from this link. I am using format 2 which contains 32x32 RGB centered digit images from 0 to 9. When I try to compile and fit the model I get the following error:
Epoch 1/10
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-37-31870b6986af> in <module>()
3
4 model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
----> 5 model.fit(trainX, trainY, validation_data=(validX, validY), batch_size=128, epochs=10)
9 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
975 except Exception as e: # pylint:disable=broad-except
976 if hasattr(e, "ag_error_metadata"):
--> 977 raise e.ag_error_metadata.to_exception(e)
978 else:
979 raise
ValueError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:756 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:152 __call__
losses = call_fn(y_true, y_pred)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:256 call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/losses.py:1537 categorical_crossentropy
return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/backend.py:4833 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/tensor_shape.py:1134 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 1) and (None, 10) are incompatible
The code is:
model = Sequential([
Conv2D(filters=64, kernel_size=3, strides=2, activation='relu', input_shape=(32,32,3)),
MaxPooling2D(pool_size=(2, 2), strides=1, padding='same'),
Conv2D(filters=32, kernel_size=3, strides=1, activation='relu'),
MaxPooling2D(pool_size=(2, 2), strides=1, padding='same'),
Flatten(),
Dense(10, activation='softmax')
])
model.summary()
Model: "sequential_10"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_23 (Conv2D) (None, 15, 15, 64) 1792
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 15, 15, 64) 0
_________________________________________________________________
conv2d_24 (Conv2D) (None, 13, 13, 32) 18464
_________________________________________________________________
max_pooling2d_24 (MaxPooling (None, 13, 13, 32) 0
_________________________________________________________________
flatten_10 (Flatten) (None, 5408) 0
_________________________________________________________________
dense_13 (Dense) (None, 10) 54090
=================================================================
Total params: 74,346
Trainable params: 74,346
Non-trainable params: 0
_________________________________________________________________
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(trainX, trainY, validation_data=(validX, validY), batch_size=128, epochs=10)
I was unable to solve the error, does anyone have any ideas on how to fix it?
As i could not see your coding for trainY; seems like - your trainY has only one column and your model output have 10 neurons, so Shapes (None, 1) and (None, 10) are incompatible. you can try this on your trainY(i.e one-hot encoding)
from sklearn.preprocessing import LabelBinarizer
label_as_binary = LabelBinarizer()
train__y_labels = label_as_binary.fit_transform(trainY)
and compile will look like as (look for train__y_labels)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_X_input, train__y_labels, batch_size=128, epochs=1)
note: if your valid also throws the error, same would be needed on all y(s).
Change the compile statement so that
loss = 'sparse_categorical_cross_entropy'
The "sparse" indicates that the y values are numeric rather than one-hot
I read the data and processed it using the following code :
data = pd.read_csv('Step1_output.csv')
data = data.sample(frac=1).reset_index(drop=True)
data1 = pd.DataFrame(data, columns=['Res_pair'])
# creating instance of labelencoder
labelencoder = LabelEncoder()
# Assigning numerical values and storing in another column
data1['Res_pair_ID'] = labelencoder.fit_transform(data1['Res_pair'])
data['Res_pair'] = data1['Res_pair_ID']
data = data.to_numpy()
train_X = data[0:data.shape[0],0:566]
train_y = data[0:data.shape[0],566:data.shape[1]]
train_X = train_X.reshape((train_X.shape[0], train_X.shape[1], 1))
I build the model using following code where I have tried to distribute the dataset using mirrored strategy of Tensorflow :
print("Hyper-parameter values:\n")
print('Momentum Rate =',momentum_rate,'\n')
print('learning rate =',learning_rate,'\n')
print('Number of neurons =',neurons,'\n')
strategy = tensorflow.distribute.MirroredStrategy()
with strategy.scope():
model = tf.keras.Sequential([
tf.keras.layers.Conv1D(64,kernel_size = 3,activation='relu',input_shape=train_X.shape[1:]),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(neurons,activation='relu'),
tf.keras.layers.Dense(neurons,activation='relu'),
tf.keras.layers.Dense(neurons,activation='relu'),
tf.keras.layers.Dense(neurons,activation='relu'),
tf.keras.layers.Dense(10, activation='softmax'),])
sgd = optimizers.SGD(lr=learning_rate, decay=1e-6, momentum=momentum_rate, nesterov=True)
model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy',tensorflow.keras.metrics.Precision()])
results = model.fit(train_X,train_y,validation_split = 0.2,epochs=10,batch_size = 100)
print(results)
path = 'saved_model/'
model.save(path, save_format='tf')
for k in range(100):
momentum_rate = random.random()
learning_rate = random.uniform(0,0.2)
neurons = random.randint(10,50)
I tried to run the code on GPU but it runs for some time and then throws this error :
Hyper-parameter values:
Momentum Rate = 0.6477407029392913
learning rate = 0.03988890117492503
Number of neurons = 35
Epoch 1/10
1/270110 [..............................] - ETA: 28s - loss: nan - accuracy: 0.0100 - precision: 0.0100Traceback (most recent call last):
File "parallelised_script_realdata2.py", line 56, in <module>
results = model.fit(train_X,train_y,validation_split = 0.2,epochs=10,batch_size = 100)
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 108, in _method_wrapper
return method(self, *args, **kwargs)
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/keras/engine/training.py", line 1098, in fit
tmp_logs = train_function(iterator)
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 780, in __call__
result = self._call(*args, **kwds)
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/def_function.py", line 807, in _call
return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/function.py", line 2829, in __call__
return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/function.py", line 1848, in _filtered_call
cancellation_manager=cancellation_manager)
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/function.py", line 1924, in _call_flat
ctx, args, cancellation_manager=cancellation_manager))
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/function.py", line 550, in call
ctx=ctx)
File "/usr/local/lib64/python3.6/site-packages/tensorflow/python/eager/execute.py", line 60, in quick_execute
inputs, attrs, num_outputs)
tensorflow.python.framework.errors_impl.InvalidArgumentError: assertion failed: [predictions must be >= 0] [Condition x >= y did not hold element-wise:] [x (sequential/dense_4/Softmax:0) = ] [[nan nan nan...]...] [y (Cast_6/x:0) = ] [0]
[[{{node assert_greater_equal/Assert/AssertGuard/else/_21/assert_greater_equal/Assert/AssertGuard/Assert}}]] [Op:__inference_train_function_1270]
Function call stack:
train_function
Update: The code works well if I don't use strategy = tensorflow.distribute.MirroredStrategy(). Like the code below (but will fail for larger datasets for memory shortage):
def convolutional_neural_network(x, y):
print("Hyper-parameter values:\n")
print('Momentum Rate =',momentum_rate,'\n')
print('learning rate =',learning_rate,'\n')
print('Number of neurons =',neurons,'\n')
model = Sequential()
model.add(Conv1D(filters=64,input_shape=train_X.shape[1:],activation='relu',kernel_size = 3))
model.add(Flatten())
model.add(Dense(neurons,activation='relu')) # first hidden layer
model.add(Dense(neurons, activation='relu')) # second hidden layer
model.add(Dense(neurons, activation='relu'))
model.add(Dense(neurons, activation='relu'))
model.add(Dense(10, activation='softmax'))
sgd = optimizers.SGD(lr=learning_rate, decay=1e-6, momentum=momentum_rate, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy',tensorflow.keras.metrics.Precision()])
history = model.fit(train_X, train_y, validation_split=0.2, epochs=10, batch_size=100)
momentum_rate = 0.09
learning_rate = 0.01
neurons = 40
print(convolutional_neural_network(train_X, train_y))
Update 2: Still facing a similar issue with smaller dataset
_________________________________________________________________
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv1d (Conv1D) (None, 564, 64) 256
_________________________________________________________________
flatten (Flatten) (None, 36096) 0
_________________________________________________________________
dense (Dense) (None, 50) 1804850
_________________________________________________________________
dense_1 (Dense) (None, 50) 2550
_________________________________________________________________
dense_2 (Dense) (None, 50) 2550
_________________________________________________________________
dense_3 (Dense) (None, 50) 2550
_________________________________________________________________
dense_4 (Dense) (None, 10) 510
=================================================================
Total params: 1,813,266
Trainable params: 1,813,266
Non-trainable params: 0
The model definition seems fine, so does the strategy.
Can you just verify train_Y for sanity check? Mostly I'm sure the error lies there.
If that's not the case, try running model.fit and latter ones outside the scope.
I'm using Python 3.7.7. and Tensorflow 2.1.0 with Functional API and Eager Execution.
I'm trying to do custom training, with an encoder extracted from a U-Net pretrained network:
I get the U-Net model without compile it.
I have loaded the weights into the model.
I have extracted the encoder and decoder from that model.
Then I want to use the encoder with this summary:
Model: "encoder"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 200, 200, 1)] 0
_________________________________________________________________
conv1_1 (Conv2D) (None, 200, 200, 64) 1664
_________________________________________________________________
conv1_2 (Conv2D) (None, 200, 200, 64) 102464
_________________________________________________________________
pool1 (MaxPooling2D) (None, 100, 100, 64) 0
_________________________________________________________________
conv2_1 (Conv2D) (None, 100, 100, 96) 55392
_________________________________________________________________
conv2_2 (Conv2D) (None, 100, 100, 96) 83040
_________________________________________________________________
pool2 (MaxPooling2D) (None, 50, 50, 96) 0
_________________________________________________________________
conv3_1 (Conv2D) (None, 50, 50, 128) 110720
_________________________________________________________________
conv3_2 (Conv2D) (None, 50, 50, 128) 147584
_________________________________________________________________
pool3 (MaxPooling2D) (None, 25, 25, 128) 0
_________________________________________________________________
conv4_1 (Conv2D) (None, 25, 25, 256) 295168
_________________________________________________________________
conv4_2 (Conv2D) (None, 25, 25, 256) 1048832
_________________________________________________________________
pool4 (MaxPooling2D) (None, 12, 12, 256) 0
_________________________________________________________________
conv5_1 (Conv2D) (None, 12, 12, 512) 1180160
_________________________________________________________________
conv5_2 (Conv2D) (None, 12, 12, 512) 2359808
=================================================================
Total params: 5,384,832
Trainable params: 5,384,832
Non-trainable params: 0
_________________________________________________________________
I use this function to do the custom training:
def train_encoder_unet_custom(model, dataset):
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
for episode in range(num_episodes):
selected = np.random.permutation(no_of_samples)[:num_shot + num_query]
# Create our Support Set.
support_set = np.array(dataset[selected[:num_shot]])
X_train = support_set[:,0,:]
y_train = support_set[:,1,:]
loss_value, grads = grad(model, X_train, y_train)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
The grad function is:
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
def loss(model, x, y, training):
# training=training is needed only if there are layers with different
# behavior during training versus inference (e.g. Dropout).
y_ = model(x, training=training)
return loss_object(y_true=y, y_pred=y_)
def grad(model, inputs, targets):
with tf.GradientTape() as tape:
loss_value = loss(model, inputs, targets, training=False)
return loss_value, tape.gradient(loss_value, model.trainable_variables)
But when I try to run it I get the error:
InvalidArgumentError: Shapes of all inputs must match: values[0].shape = [5,12,12,512] != values[1].shape = [5,25,25,256] [Op:Pack] name: packed
In loss function, I have checked the values for y_ variable. y_ is a list of 6 elements with these shapes:
(5, 12, 12, 512)
(5, 25, 25, 256)
(5, 50, 50, 128)
(5, 100, 100, 96)
(5, 200, 200, 64)
(5, 200, 200, 1)
Any idea about what is it happening?
If you need more details, please ask me.
This is the full call stack:
<ipython-input-133-22827956a9f6> in train_encoder_unet_custom(model, dataset, feat_type, show)
22 y_valid = query_set[:,1,:]
23
---> 24 loss_value, grads = grad(model, X_train, y_train)
25
26 optimizer.apply_gradients(zip(grads, model.trainable_variables))
<ipython-input-143-58ff4de686d6> in grad(model, inputs, targets)
10 def grad(model, inputs, targets):
11 with tf.GradientTape() as tape:
---> 12 loss_value = loss(model, inputs, targets, training=False)
13 return loss_value, tape.gradient(loss_value, model.trainable_variables)
<ipython-input-143-58ff4de686d6> in loss(model, x, y, training)
6 y_ = model(x, training=training)
7
----> 8 return loss_object(y_true=y, y_pred=y_)
9
10 def grad(model, inputs, targets):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py in __call__(self, y_true, y_pred, sample_weight)
147 with K.name_scope(self._name_scope), graph_ctx:
148 ag_call = autograph.tf_convert(self.call, ag_ctx.control_status_ctx())
--> 149 losses = ag_call(y_true, y_pred)
150 return losses_utils.compute_weighted_loss(
151 losses, sample_weight, reduction=self._get_reduction())
/usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
253 try:
254 with conversion_ctx:
--> 255 return converted_call(f, args, kwargs, options=options)
256 except Exception as e: # pylint:disable=broad-except
257 if hasattr(e, 'ag_error_metadata'):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/impl/api.py in converted_call(f, args, kwargs, caller_fn_scope, options)
455 if conversion.is_in_whitelist_cache(f, options):
456 logging.log(2, 'Whitelisted %s: from cache', f)
--> 457 return _call_unconverted(f, args, kwargs, options, False)
458
459 if ag_ctx.control_status_ctx().status == ag_ctx.Status.DISABLED:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/impl/api.py in _call_unconverted(f, args, kwargs, options, update_cache)
337
338 if kwargs is not None:
--> 339 return f(*args, **kwargs)
340 return f(*args)
341
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py in call(self, y_true, y_pred)
251 y_pred, y_true)
252 ag_fn = autograph.tf_convert(self.fn, ag_ctx.control_status_ctx())
--> 253 return ag_fn(y_true, y_pred, **self._fn_kwargs)
254
255 def get_config(self):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py in wrapper(*args, **kwargs)
199 """Call target, and fall back on dispatchers if there is a TypeError."""
200 try:
--> 201 return target(*args, **kwargs)
202 except (TypeError, ValueError):
203 # Note: convert_to_eager_tensor currently raises a ValueError, not a
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/losses.py in sparse_categorical_crossentropy(y_true, y_pred, from_logits, axis)
1562 Sparse categorical crossentropy loss value.
1563 """
-> 1564 y_pred = ops.convert_to_tensor_v2(y_pred)
1565 y_true = math_ops.cast(y_true, y_pred.dtype)
1566 return K.sparse_categorical_crossentropy(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in convert_to_tensor_v2(value, dtype, dtype_hint, name)
1380 name=name,
1381 preferred_dtype=dtype_hint,
-> 1382 as_ref=False)
1383
1384
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in convert_to_tensor(value, dtype, name, as_ref, preferred_dtype, dtype_hint, ctx, accepted_result_types)
1497
1498 if ret is None:
-> 1499 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
1500
1501 if ret is NotImplemented:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/array_ops.py in _autopacking_conversion_function(v, dtype, name, as_ref)
1500 elif dtype != inferred_dtype:
1501 v = nest.map_structure(_cast_nested_seqs_to_dtype(dtype), v)
-> 1502 return _autopacking_helper(v, dtype, name or "packed")
1503
1504
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/array_ops.py in _autopacking_helper(list_or_tuple, dtype, name)
1406 # checking.
1407 if all(isinstance(elem, core.Tensor) for elem in list_or_tuple):
-> 1408 return gen_array_ops.pack(list_or_tuple, name=name)
1409 must_pack = False
1410 converted_elems = []
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_array_ops.py in pack(values, axis, name)
6457 return _result
6458 except _core._NotOkStatusException as e:
-> 6459 _ops.raise_from_not_ok_status(e, name)
6460 except _core._FallbackException:
6461 pass
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in raise_from_not_ok_status(e, name)
6841 message = e.message + (" name: " + name if name is not None else "")
6842 # pylint: disable=protected-access
-> 6843 six.raise_from(core._status_to_exception(e.code, message), None)
6844 # pylint: enable=protected-access
6845
/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value, from_value)
I am using the following, fairly simple code to predict an output variable which may have 3 categories:
n_factors = 20
np.random.seed = 42
def embedding_input(name, n_in, n_out, reg):
inp = Input(shape=(1,), dtype='int64', name=name)
return inp, Embedding(n_in, n_out, input_length=1, W_regularizer=l2(reg))(inp)
user_in, u = embedding_input('user_in', n_users, n_factors, 1e-4)
artifact_in, a = embedding_input('artifact_in', n_artifacts, n_factors, 1e-4)
mt = Input(shape=(31,))
mr = Input(shape=(1,))
sub = Input(shape=(24,))
def onehot(featurename):
onehot_encoder = OneHotEncoder(sparse=False)
onehot_encoded = onehot_encoder.fit_transform(Modality_Durations[featurename].reshape(-1, 1))
trn_onehot_encoded = onehot_encoded[msk]
val_onehot_encoded = onehot_encoded[~msk]
return trn_onehot_encoded, val_onehot_encoded
trn_onehot_encoded_mt, val_onehot_encoded_mt = onehot('modality_type')
trn_onehot_encoded_mr, val_onehot_encoded_mr = onehot('roleid')
trn_onehot_encoded_sub, val_onehot_encoded_sub = onehot('subject')
trn_onehot_encoded_quartile, val_onehot_encoded_quartile = onehot('quartile')
# Model
x = merge([u, a], mode='concat')
x = Flatten()(x)
x = merge([x, mt], mode='concat')
x = merge([x, mr], mode='concat')
x = merge([x, sub], mode='concat')
x = Dense(10, activation='relu')(x)
BatchNormalization()
x = Dense(3, activation='softmax')(x)
nn = Model([user_in, artifact_in, mt, mr, sub], x)
nn.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
nn.optimizer.lr = 0.001
nn.fit([trn.member_id, trn.artifact_id, trn_onehot_encoded_mt, trn_onehot_encoded_mr, trn_onehot_encoded_sub], trn_onehot_encoded_quartile,
batch_size=256,
epochs=2,
validation_data=([val.member_id, val.artifact_id, val_onehot_encoded_mt, val_onehot_encoded_mr, val_onehot_encoded_sub], val_onehot_encoded_quartile)
)
Here's the summary of the model:
____________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
====================================================================================================
user_in (InputLayer) (None, 1) 0
____________________________________________________________________________________________________
artifact_in (InputLayer) (None, 1) 0
____________________________________________________________________________________________________
embedding_9 (Embedding) (None, 1, 20) 5902380 user_in[0][0]
____________________________________________________________________________________________________
embedding_10 (Embedding) (None, 1, 20) 594200 artifact_in[0][0]
____________________________________________________________________________________________________
merge_25 (Merge) (None, 1, 40) 0 embedding_9[0][0]
embedding_10[0][0]
____________________________________________________________________________________________________
flatten_7 (Flatten) (None, 40) 0 merge_25[0][0]
____________________________________________________________________________________________________
input_13 (InputLayer) (None, 31) 0
____________________________________________________________________________________________________
merge_26 (Merge) (None, 71) 0 flatten_7[0][0]
input_13[0][0]
____________________________________________________________________________________________________
input_14 (InputLayer) (None, 1) 0
____________________________________________________________________________________________________
merge_27 (Merge) (None, 72) 0 merge_26[0][0]
input_14[0][0]
____________________________________________________________________________________________________
input_15 (InputLayer) (None, 24) 0
____________________________________________________________________________________________________
merge_28 (Merge) (None, 96) 0 merge_27[0][0]
input_15[0][0]
____________________________________________________________________________________________________
dense_13 (Dense) (None, 10) 970 merge_28[0][0]
____________________________________________________________________________________________________
dense_14 (Dense) (None, 3) 33 dense_13[0][0]
====================================================================================================
Total params: 6,497,583
Trainable params: 6,497,583
Non-trainable params: 0
_____________________________
But on the fit statement, I get the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-71-7de0782d7d5d> in <module>()
5 batch_size=256,
6 epochs=2,
----> 7 validation_data=([val.member_id, val.artifact_id, val_onehot_encoded_mt, val_onehot_encoded_mr, val_onehot_encoded_sub], val_onehot_encoded_quartile)
8 )
9 # nn.fit([trn.member_id, trn.artifact_id, trn_onehot_encoded_mt, trn_onehot_encoded_mr, trn_onehot_encoded_sub], trn.duration_new,
/home/prateek_dl/anaconda3/lib/python3.5/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1520 class_weight=class_weight,
1521 check_batch_axis=False,
-> 1522 batch_size=batch_size)
1523 # Prepare validation data.
1524 do_validation = False
/home/prateek_dl/anaconda3/lib/python3.5/site-packages/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_batch_axis, batch_size)
1380 output_shapes,
1381 check_batch_axis=False,
-> 1382 exception_prefix='target')
1383 sample_weights = _standardize_sample_weights(sample_weight,
1384 self._feed_output_names)
/home/prateek_dl/anaconda3/lib/python3.5/site-packages/keras/engine/training.py in _standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
142 ' to have shape ' + str(shapes[i]) +
143 ' but got array with shape ' +
--> 144 str(array.shape))
145 return arrays
146
ValueError: Error when checking target: expected dense_14 to have shape (None, 1) but got array with shape (1956554, 3)
How do I resolve this error? Why is the final layer expecting (None,1) when according to the summary() it has to output (None,3)?
Any help would be greatly appreciated.
I fixed the error using categorical_entropy instead of sparse_categorical_entropy.