Related
I'm trying to make model (VGG-16) that uses Fast R-CNN for object detection.
In short, I want to find object on image and put bounding box where object is.
I already tried multiple way's of getting that, but all the time I'm getting some error's, basically most of them are with RoiPoolingLayer and loss function's.
Can you guys guide what I'm doing wrong?
So let me introduce you:
This is my code atm:
import pickle
import numpy
import tensorflow
from keras import Input, Model
from keras.initializers.initializers_v1 import RandomNormal
from keras.layers import Flatten, TimeDistributed, Dense, Dropout
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.optimizers import Adam
from tensorflow.python.keras.regularizers import l2
from data import get_data, get_train_data
from rcnn.config import Config
import tensorflow as tf
from tensorflow.keras.layers import Layer
class RoiPoolingConv(Layer):
def __init__(self, pool_size, **kwargs):
self.pool_size = pool_size
super(RoiPoolingConv, self).__init__(**kwargs)
def build(self, input_shape):
self.nb_channels = input_shape[0][3]
super(RoiPoolingConv, self).build(input_shape)
def compute_output_shape(self, input_shape):
return None, None, self.pool_size, self.pool_size, self.nb_channels
def crop_and_resize(self, image, boxes):
box_ind = tf.range(tf.shape(boxes)[0])
box_ind = tf.reshape(box_ind, (-1, 1))
box_ind = tf.tile(box_ind, [1, tf.shape(boxes)[1]])
boxes = tf.keras.backend.cast(
tf.reshape(boxes, (-1, 4)), "float32"
)
box_ind = tf.reshape(box_ind, (1, -1))[0]
result = tf.image.crop_and_resize(image, boxes, box_ind, [self.pool_size, self.pool_size])
result = tf.reshape(result, (tf.shape(image)[0], -1, self.pool_size, self.pool_size, self.nb_channels))
return result
def call(self, x, mask=None):
assert (len(x) == 2)
img = x[0]
rois = x[1]
print(x)
print(img)
print(rois)
x1 = rois[:, 0]
y1 = rois[:, 1]
x2 = rois[:, 2]
y2 = rois[:, 3]
boxes = tf.stack([y1, x1, y2, x2], axis=-1)
print(boxes)
rs = self.crop_and_resize(img, boxes)
print(rs)
return rs
def get_config(self):
config = {'pool_size': self.pool_size}
base_config = super(RoiPoolingConv, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
PROPERTIES = Config()
def prepare_model(
model_path="model\\FastRCNN.h5"
):
roi_input = Input(shape=(None, 4), name="input_2")
model_cnn = tensorflow.keras.applications.VGG16(
include_top=True,
weights='imagenet'
)
model_cnn.trainable = True
x = model_cnn.layers[17].output
x = RoiPoolingConv(7)([x, roi_input])
x = TimeDistributed(Flatten())(x)
softmaxhead = Dense(4096, activation='relu', kernel_initializer=RandomNormal(stddev=0.01), kernel_regularizer=l2(0.0005), bias_regularizer=l2(0.0005))(x)
softmaxhead = Dropout(0.5)(softmaxhead)
softmaxhead = Dense(4096, activation='relu', kernel_initializer=RandomNormal(stddev=0.01), kernel_regularizer=l2(0.0005), bias_regularizer=l2(0.0005))(softmaxhead)
softmaxhead = Dropout(0.5)(softmaxhead)
softmaxhead = Dense(20, activation='softmax', kernel_initializer='zero', name='class_label')(softmaxhead)
bboxhead = Dense(128, activation='relu')(x)
bboxhead = Dense(64, activation='relu')(bboxhead)
bboxhead = Dense(32, activation='relu')(bboxhead)
bboxhead = Dense(4, activation='sigmoid', name='bounding_box')(bboxhead)
model_final = Model(inputs=[model_cnn.input, roi_input], outputs=(bboxhead, softmaxhead))
opt = Adam(learning_rate=0.0001)
losses = {
"class_label": PROPERTIES.CLASS_LABEL_LOSSES,
"bounding_box": PROPERTIES.BOUNDING_BOX_LOSSES
}
lossWeights = {
"class_label": PROPERTIES.LOSS_WEIGHTS,
"bounding_box": PROPERTIES.LOSS_WEIGHTS
}
model_final.compile(
loss=losses,
optimizer=opt,
metrics=["accuracy"],
loss_weights=lossWeights
)
tensorflow.keras.utils.plot_model(
model_final,
"model.png",
show_shapes=True,
show_layer_names=False,
rankdir='TB'
)
model_final.save(model_path)
return model_final
def train_RCNN_VGG(path):
# get voc data
all_data, classes_count, class_mapping = get_data(path)
tr_images, tr_labels_rois, tr_bboxes_rois, tr_bboxes_gt = get_train_data(all_data)
#val_images, val_labels, val_bboxes = get_validation_data(all_data)
# delete unnecessary data
del classes_count
del class_mapping
del all_data
# convert to numpy array
tr_images = numpy.array(tr_images, dtype="float32")
tr_bboxes_rois = numpy.array(tr_bboxes_rois, dtype="float32")
tr_bboxes_gt = numpy.array(tr_bboxes_gt, dtype="float32")
tr_labels_rois = numpy.array(tr_labels_rois)
print(tr_images.shape)
print(tr_bboxes_rois.shape)
print(tr_bboxes_gt.shape)
print(tr_labels_rois.shape)
# same for validation data
#val_images = numpy.array(val_images, dtype="float32")
#val_bboxes = numpy.array(val_bboxes, dtype="float32")
#val_labels = numpy.array(val_labels)
# use label binarizer for signing which class/label if for image
labelBinarizer = LabelBinarizer()
tr_labels_rois = labelBinarizer.fit_transform(tr_labels_rois)
#val_labels = labelBinarizer.fit_transform(val_labels)
classes = len(labelBinarizer.classes_)
# load model, provide number of classes
#model_vgg = load_model_or_construct(classes)
model_vgg = prepare_model()
# define a dictionary to set the loss methods
losses = {
"class_label": PROPERTIES.CLASS_LABEL_LOSSES,
"bounding_box": PROPERTIES.BOUNDING_BOX_LOSSES
}
# define a dictionary that specifies the weights per loss
lossWeights = {
"class_label": PROPERTIES.LOSS_WEIGHTS,
"bounding_box": PROPERTIES.LOSS_WEIGHTS
}
# initialize the optimizer, compile the model, and show the model
opt = Adam(learning_rate=PROPERTIES.LEARNING_RATE)
model_vgg.compile(loss=losses, optimizer=opt, metrics=["accuracy"], loss_weights=lossWeights)
# construct a dictionary for our target training outputs, for our target testing
trainTargets = {
"class_label": tr_labels_rois,
"bounding_box": tr_bboxes_gt
}
#validationTargets = {
# "class_label": val_labels,
# "bounding_box": val_bboxes
#}
# train the network for bounding box regression and class label
H = model_vgg.fit(
[tr_images, tr_bboxes_rois], trainTargets,
# validation_data=(val_images, validationTargets),
batch_size=PROPERTIES.BATCH_SIZE,
epochs=PROPERTIES.EPOCHS,
verbose=PROPERTIES.VERBOSE)
# save model, print summary
model_vgg.save(PROPERTIES.RCNN_MODEL_NAME, save_format=PROPERTIES.RCNN_MODEL_FORMAT)
model_vgg.summary()
# save binarizer
f = open(PROPERTIES.BINARIZER_NAME, "wb")
f.write(pickle.dumps(labelBinarizer))
f.close()
if __name__ == '__main__':
# load rcnn
train_RCNN_VGG(PROPERTIES.DATASET_PATH)
I'm creating RoiPooling Layer, VGG-16 architecture, loading pre-trained weights, making my own output layers, cause I have 20 classes (basing on VOC Data from 2012) that's why first output has 20, second has 4 - cause of bounding box's coordinates.
In train method, you can see I'm printing shape's of data I'm delivering, they are:
(1048, 224, 224, 3)
(1048, 4)
(1048, 4)
(1048,)
First one, it's 1048 images of 224x224 rgb
Second, it's 1048 rois coordinates prepared for 224x224
Third, it's 1048 ground truth's bboxes
Fourth, it's 1048 times 20 labels. Label's are like this: [[0, 0, 0, 0, 0, 0, ... 1, 0, 0,](19's zeros, and one 1 - correct label), [0, ....]]
I was basing on this: https://www.pyimagesearch.com/2020/10/12/multi-class-object-detection-and-bounding-box-regression-with-keras-tensorflow-and-deep-learning/
Currently I have this error:
Traceback (most recent call last):
File "C:\Users\Karol\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\Karol\anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 1129, in autograph_handler
raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:
File "C:\Users\Karol\anaconda3\lib\site-packages\keras\engine\training.py", line 878, in train_function *
return step_function(self, iterator)
File "C:\Users\Karol\anaconda3\lib\site-packages\keras\engine\training.py", line 867, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "C:\Users\Karol\anaconda3\lib\site-packages\keras\engine\training.py", line 860, in run_step **
outputs = model.train_step(data)
File "C:\Users\Karol\anaconda3\lib\site-packages\keras\engine\training.py", line 809, in train_step
loss = self.compiled_loss(
File "C:\Users\Karol\anaconda3\lib\site-packages\keras\engine\compile_utils.py", line 201, in __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
File "C:\Users\Karol\anaconda3\lib\site-packages\keras\losses.py", line 141, in __call__
losses = call_fn(y_true, y_pred)
File "C:\Users\Karol\anaconda3\lib\site-packages\keras\losses.py", line 245, in call **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
File "C:\Users\Karol\anaconda3\lib\site-packages\keras\losses.py", line 1664, in categorical_crossentropy
return backend.categorical_crossentropy(
File "C:\Users\Karol\anaconda3\lib\site-packages\keras\backend.py", line 4994, in categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
ValueError: Shapes (None, 20) and (None, None, 20) are incompatible
python-BaseException
So, my question is: What am I missing, is my preprocessing-data incorrect? I'm trying to teach my model recognition 20 classes and pointing where on image this object probably is. But I have to make wrong data delivering I guess.
Just to make something clear, I'm using categorical cross entropy and mean average precision for "class label" and "bounding boxes".
Maybe I'm just using wrong loss function's?
Please help.
Try to use the loss tf.keras.losses.SparseCategoricalCrossEntropy instead, and make sure you have labels in one hot encoding format, for the reasons pointed here:
Getting a ValueError in tensorflow saying that my shapes are incompatible
The explanation of RoiPooligLayer said that
Shape of inputs must be:
[(batch_size, pooled_height, pooled_width, n_channels), for featur map
and (batch_size, num_rois, 4)] for region of interest
but in your work you did not add the batch_size dimension
try with this:
model_cnn.trainable = True
x = model_cnn.layers[17].output
x = np.expand_dims(x, axis=0)
x = RoiPoolingConv(7)([x, roi_input])
x = TimeDistributed(Flatten())(x)
I have around 550K samples, each sample being 200x50x1. The size of this dataset is around 57GB.
I want to train a network on this set but I am having trouble reading it.
batch_size=8
def _read_py_function(filename,labels_slice):
with h5py.File(filename, 'r') as f:
data_slice = np.asarray(f['feats'])
print(data_slice.shape)
return data_slice, labels_slice
placeholder_files = tf.placeholder(tf.string, [None])
placeholder_labels = tf.placeholder(tf.int32, [None])
dataset = tf.data.Dataset.from_tensor_slices((placeholder_files,placeholder_labels))
dataset = dataset.map(
lambda filename, label: tuple(tf.py_func(
_read_py_function, [filename,label], [tf.uint8, tf.int32])))
dataset = dataset.shuffle(buffer_size=50000)
dataset = dataset.batch(batch_size)
iterator = tf.data.Iterator.from_structure(dataset.output_types, dataset.output_shapes)
data_X, data_y = iterator.get_next()
data_y = tf.cast(data_y, tf.int32)
net = conv_layer(inputs=data_X,num_outputs=8, kernel_size=3, stride=2, scope='rcl_0')
net = pool_layer(inputs=net,kernel_size=2,scope='pl_0')
net = dropout_layer(inputs=net,scope='dl_0')
net = flatten_layer(inputs=net,scope='flatten_0')
net = dense_layer(inputs=net,num_outputs=256,scope='dense_0')
net = dense_layer(inputs=net,num_outputs=64,scope='dense_1')
out = dense_layer(inputs=net,num_outputs=10,scope='dense_2')
And I run the session using :
sess.run(train_iterator, feed_dict = {placeholder_files: filenames, placeholder_labels: ytrain})
try:
while True:
_, loss, acc = sess.run([train_op, loss_op, accuracy_op])
train_loss += loss
train_accuracy += acc
except tf.errors.OutOfRangeError:
pass
But I am getting the error even before running the session :
Traceback (most recent call last):
File "SFCC-trial-134.py", line 297, in <module>
net = rcnn_layer(inputs=data_X,num_outputs=8, kernel_size=3, stride=2, scope='rcl_0')
File "SFCC-trial-134.py", line 123, in rcnn_layer
reuse=False)
File "SFCC-trial-134.py", line 109, in conv_layer
reuse = reuse
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args
return func(*args, **current_args)
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1154, in convolution2d
conv_dims=2)
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args
return func(*args, **current_args)
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1025, in convolution
(conv_dims + 2, input_rank))
TypeError: %d format: a number is required, not NoneType
I though about using TFRecords but had a hard time creating those. Couldn't find a good post where I learn to create them for my kind of dataset.
conv_layer is defined as follows :
def conv_layer(inputs, num_outputs, kernel_size, stride, normalizer_fn=None, activation_fn=nn.relu, trainable=True, scope='noname', reuse=False):
net = slim.conv2d(inputs = inputs,
num_outputs = num_outputs,
kernel_size = kernel_size,
stride = stride,
normalizer_fn = normalizer_fn,
activation_fn = activation_fn,
trainable = trainable,
scope = scope,
reuse = reuse
)
return net
Do not pass tf.py_func inside your map function. You can read the file image by passing the function name directly inside your map function. I am posing only the relevant parts of the code.
def _read_py_function(filename, label):
return tf.zeros((224, 224, 3), dtype=tf.float32), tf.ones((1,), dtype=tf.int32)
dataset = dataset.map(lambda filename, label: _read_py_function(filename, label))
Another change is your iterator will expect only floating point of input. So you will have to change your tf.uint8 type of output to float.
I have built a Binary Image classifier using Convolutional Neural Networks using TensorFlow.It is running fine, however, each time it takes too long to train from scratch. So, I want to save the trained model and load it next time. I can't seem to understand how to implement these guides in my program as shown in the TensorFlow documentation.
Here's the full code:
# Python program to create
# Image Classifier using CNN
# Importing the required libraries
import cv2
import os
import numpy as np
from random import shuffle
from tqdm import tqdm
from keras.models import Sequential
'''Setting up the env'''
TRAIN_DIR = 'D:\\Project\\Final_Project\\chest_xray\\train\\'
TEST_DIR = 'D:\\Project\\Final_Project\\chest_xray\\test0\\'
check_point = 'D:\\Project\\Final_Project\\chest_xray\\chkpt\\'
IMG_SIZE = 80
LR = 1e-4
'''Setting up the model which will help with tensorflow models'''
MODEL_NAME = 'NormalVsAbnormalXRays-{}-{}.model'.format(LR, '6conv-basic')
'''Labelling the dataset'''
def label_img(img):
word_label = img.split('.')[-3]
# DIY One hot encoder
if word_label == 'Nor':
return [1, 0]
elif word_label == 'Pne':
return [0, 1]
else :
return[0, 0]
'''Creating the training data'''
def create_train_data():
# Creating an empty list where we should the store the training data
# after a little preprocessing of the data
training_data = []
# tqdm is only used for interactive loading
# loading the training data
for img in tqdm(os.listdir(TRAIN_DIR)):
# labeling the images
label = label_img(img)
path = os.path.join(TRAIN_DIR, img)
# loading the image from the path and then converting them into
# greyscale for easier covnet prob
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
# resizing the image for processing them in the covnet
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
# final step-forming the training data list with numpy array of the images
training_data.append([np.array(img), np.array(label)])
# shuffling of the training data to preserve the random state of our data
shuffle(training_data)
# saving our trained data for further uses if required
np.save('train_data.npy', training_data)
return training_data
'''Processing the given test data'''
# Almost same as processing the traning data but
# we dont have to label it.
def process_test_data():
testing_data = []
for img in tqdm(os.listdir(TEST_DIR)):
path = os.path.join(TEST_DIR, img)
img_num = img.split('.')[0]
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
testing_data.append([np.array(img), img_num])
shuffle(testing_data)
np.save('test_data.npy', testing_data)
return testing_data
'''Running the training and the testing in the dataset for our model'''
#train_data = create_train_data()
#test_data = process_test_data()
train_data = np.load('train_data.npy')
test_data = np.load('test_data.npy')
'''Creating the neural network using tensorflow'''
# Importing the required libraries
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
import tensorflow as tf
model = Sequential()
tf.reset_default_graph()
saver = tf.train.import_meta_graph('D:\\Project\\Final_Project\\chest_xray\\check_point-78.meta')
convnet = input_data(shape=[None,IMG_SIZE, IMG_SIZE, 1], name='input')
convnet = conv_2d(convnet, 32, 4, activation='relu')
convnet = max_pool_2d(convnet, 2)
convnet = conv_2d(convnet, 64, 4, activation='relu')
convnet = max_pool_2d(convnet, 2)
convnet = conv_2d(convnet, 128, 4, activation='relu')
convnet = max_pool_2d(convnet, 2)
convnet = conv_2d(convnet, 64, 4, activation='relu')
convnet = max_pool_2d(convnet, 2)
convnet = conv_2d(convnet, 32, 4, activation='relu')
convnet = max_pool_2d(convnet, 2)
convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.3)
convnet = fully_connected(convnet, 2, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR,
loss='categorical_crossentropy', name='targets')
model = tflearn.DNN(convnet, tensorboard_dir='log', checkpoint_path='check_point',best_checkpoint_path= 'check_point',max_checkpoints= 5)
# Splitting the testing data and training data
train = train_data
test = train_data
'''Setting up the features and lables'''
# X-Features & Y-Labels
X = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
Y = [i[1] for i in train]
test_x = np.array([i[0] for i in test]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
test_y = [i[1] for i in test]
'''Fitting the data into our model'''
# epoch = 40 taken
model.fit({'input': X}, {'targets': Y}, n_epoch=1,
validation_set=0.05,
snapshot_step=500, show_metric=True, run_id=MODEL_NAME)
model.save(MODEL_NAME)
'''Testing the data'''
import matplotlib.pyplot as plt
# if you need to create the data:
# test_data = process_test_data()
# if you already have some saved:
test_data = np.load('test_data.npy')
fig = plt.figure(figsize=(80,80))
for num, data in enumerate(test_data[:1]):
img_num = data[1]
img_data = data[0]
y = fig.add_subplot(1, 1, num + 1)
orig = img_data
data = img_data.reshape(IMG_SIZE, IMG_SIZE, 1)
# model_out = model.predict([data])[0]
model_out = model.predict([data])[0]
if np.argmax(model_out) == 1:
str_label = 'Abnormal'
else:
str_label = 'Normal'
y.imshow(orig, cmap='gray')
plt.title(str_label,fontsize=20)
y.axes.get_xaxis().set_visible(False)
y.axes.get_yaxis().set_visible(False)
plt.show()
I have tried to use saver = tf.train.import_meta_graph('D:\\Project\\Final_Project\\chest_xray\\check_point-78.meta') to import the graph but I get this error
Traceback (most recent call last):
File "D:/Project/Final_Project/chest_xray/Final_CNN.py", line 104, in <module>
saver = tf.train.import_meta_graph('D:\\Project\\Final_Project\\chest_xray\\check_point-78.meta')
File "C:\Users\waqar\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\training\saver.py", line 1674, in import_meta_graph
meta_graph_or_file, clear_devices, import_scope, **kwargs)[0]
File "C:\Users\waqar\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\training\saver.py", line 1696, in _import_meta_graph_with_return_elements
**kwargs))
File "C:\Users\waqar\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\meta_graph.py", line 852, in import_scoped_meta_graph_with_return_elements
ops.prepend_name_scope(value, scope_to_prepend_to_names))
File "C:\Users\waqar\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 3490, in as_graph_element
return self._as_graph_element_locked(obj, allow_tensor, allow_operation)
File "C:\Users\waqar\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 3550, in _as_graph_element_locked
"graph." % repr(name))
KeyError: "The name 'Adam' refers to an Operation not in the graph."
Process finished with exit code 1
I tried to replace the training and validation data with local images. But when running the training code, it came up with the error :
ValueError: Can not squeeze dim[1], expected a dimension of 1, got 3 for 'sparse_softmax_cross_entropy_loss/remove_squeezable_dimensions/Squeeze' (op: 'Squeeze') with input shapes: [100,3].
I don't know how to fix it up. There is no visible variable in the model definition code. The code was modified from TensorFlow tutorial. The images are jpgs.
Here is the detail Error message:
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_log_step_count_steps': 100, '_is_chief': True, '_model_dir': '/tmp/mnist_convnet_model', '_tf_random_seed': None, '_session_config': None, '_save_checkpoints_secs': 600, '_num_worker_replicas': 1, '_save_checkpoints_steps': None, '_service': None, '_keep_checkpoint_max': 5, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x00000288088D50F0>, '_keep_checkpoint_every_n_hours': 10000, '_task_type': 'worker', '_master': '', '_save_summary_steps': 100, '_num_ps_replicas': 0, '_task_id': 0}
Traceback (most recent call last):
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\common_shapes.py", line 686, in _call_cpp_shape_fn_impl
input_tensors_as_shapes, status)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 473, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Can not squeeze dim[1], expected a dimension of 1, got 3 for 'sparse_softmax_cross_entropy_loss/remove_squeezable_dimensions/Squeeze' (op: 'Squeeze') with input shapes: [100,3].
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\tf_exe_5_make_image_lables\cnn_mnist.py", line 214, in <module>
tf.app.run()
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\platform\app.py", line 124, in run
_sys.exit(main(argv))
File "D:\tf_exe_5_make_image_lables\cnn_mnist.py", line 203, in main
hooks=[logging_hook])
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\estimator\estimator.py", line 314, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\estimator\estimator.py", line 743, in _train_model
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\estimator\estimator.py", line 725, in _call_model_fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "D:\tf_exe_5_make_image_lables\cnn_mnist.py", line 67, in cnn_model_fn
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\losses\losses_impl.py", line 790, in sparse_softmax_cross_entropy
labels, logits, weights, expected_rank_diff=1)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\losses\losses_impl.py", line 720, in _remove_squeezable_dimensions
labels, predictions, expected_rank_diff=expected_rank_diff)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\confusion_matrix.py", line 76, in remove_squeezable_dimensions
labels = array_ops.squeeze(labels, [-1])
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\array_ops.py", line 2490, in squeeze
return gen_array_ops._squeeze(input, axis, name)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 7049, in _squeeze
"Squeeze", input=input, squeeze_dims=axis, name=name)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 3162, in create_op
compute_device=compute_device)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 3208, in _create_op_helper
set_shapes_for_outputs(op)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 2427, in set_shapes_for_outputs
return _set_shapes_for_outputs(op)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 2400, in _set_shapes_for_outputs
shapes = shape_func(op)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 2330, in call_with_requiring
return call_cpp_shape_fn(op, require_shape_fn=True)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\common_shapes.py", line 627, in call_cpp_shape_fn
require_shape_fn)
File "C:\Users\ASUS\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\common_shapes.py", line 691, in _call_cpp_shape_fn_impl
raise ValueError(err.message)
ValueError: Can not squeeze dim[1], expected a dimension of 1, got 3 for 'sparse_softmax_cross_entropy_loss/remove_squeezable_dimensions/Squeeze' (op: 'Squeeze') with input shapes: [100,3].
>>>
Here is my code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
#imports
import numpy as np
import tensorflow as tf
import glob
import cv2
import random
import matplotlib.pylab as plt
import pandas as pd
import sys as system
from mlxtend.preprocessing import one_hot
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
tf.logging.set_verbosity(tf.logging.INFO)
def cnn_model_fn(features, labels, mode):
"""Model function for CNN"""
#Input Layer
input_layer = tf.reshape(features["x"], [-1,320,320,3])
#Convolutional Layer #1
conv1 = tf.layers.conv2d(
inputs = input_layer,
filters = 32,
kernel_size=[5,5],
padding = "same",
activation=tf.nn.relu)
#Pooling Layer #1
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2,2], strides=2)
#Convolutional Layer #2 and Pooling Layer #2
conv2 = tf.layers.conv2d(
inputs=pool1,
filters=64,
kernel_size=[5,5],
padding="same",
activation=tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2,2], strides=2)
#Dense Layer
pool2_flat = tf.reshape(pool2, [-1,80*80*64])
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
dropout = tf.layers.dropout(
inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
#Logits Layer
logits = tf.layers.dense(inputs=dropout, units=3)
predictions = {
#Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
#Add 'softmax_tensor' to the graph. It is used for PREDICT and by the
#'logging_hook'
"probabilities": tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss,eval_metric_ops=eval_metric_ops)
def main(unused_argv):
'''
#Load training and eval data
mnist = tf.contrib.learn.datasets.load_dataset("mnist")
train_data = mnist.train.images
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)
'''
#Load cats, dogs and cars image in local folder
X_data = []
files = glob.glob("data/cats/*.jpg")
for myFile in files:
image = cv2.imread (myFile)
imgR = cv2.resize(image, (320, 320))
imgNR = imgR/255
X_data.append(imgNR)
files = glob.glob("data/dogs/*.jpg")
for myFile in files:
image = cv2.imread (myFile)
imgR = cv2.resize(image, (320, 320))
imgNR = imgR/255
X_data.append(imgNR)
files = glob.glob ("data/cars/*.jpg")
for myFile in files:
image = cv2.imread (myFile)
imgR = cv2.resize(image, (320, 320))
imgNR = imgR/255
X_data.append (imgNR)
#print('X_data count:', len(X_data))
X_data_Val = []
files = glob.glob ("data/Validation/cats/*.jpg")
for myFile in files:
image = cv2.imread (myFile)
imgR = cv2.resize(image, (320, 320))
imgNR = imgR/255
X_data_Val.append (imgNR)
files = glob.glob ("data/Validation/dogs/*.jpg")
for myFile in files:
image = cv2.imread (myFile)
imgR = cv2.resize(image, (320, 320))
imgNR = imgR/255
X_data_Val.append (imgNR)
files = glob.glob ("data/Validation/cars/*.jpg")
for myFile in files:
image = cv2.imread (myFile)
imgR = cv2.resize(image, (320, 320))
imgNR = imgR/255
X_data_Val.append (imgNR)
#Feed One hot lables
Y_Label = np.zeros(shape=(300,1))
for el in range(0,100):
Y_Label[el]=[0]
for el in range(101,200):
Y_Label[el]=[1]
for el in range(201,300):
Y_Label[el]=[2]
onehot_encoder = OneHotEncoder(sparse=False)
#Y_Label_RS = Y_Label.reshape(len(Y_Label), 1)
Y_Label_Encode = onehot_encoder.fit_transform(Y_Label)
#print('Y_Label_Encode shape:', Y_Label_Encode.shape)
Y_Label_Val = np.zeros(shape=(30,1))
for el in range(0, 10):
Y_Label_Val[el]=[0]
for el in range(11, 20):
Y_Label_Val[el]=[1]
for el in range(21, 30):
Y_Label_Val[el]=[2]
#Y_Label_Val_RS = Y_Label_Val.reshape(len(Y_Label_Val), 1)
Y_Label_Val_Encode = onehot_encoder.fit_transform(Y_Label_Val)
#print('Y_Label_Val_Encode shape:', Y_Label_Val_Encode.shape)
train_data = np.array(X_data)
train_data = train_data.astype(np.float32)
train_labels = np.asarray(Y_Label_Encode, dtype=np.int32)
eval_data = np.array(X_data_Val)
eval_data = eval_data.astype(np.float32)
eval_labels = np.asarray(Y_Label_Val_Encode, dtype=np.int32)
print(train_data.shape)
print(train_labels.shape)
#Create the Estimator
mnist_classifier = tf.estimator.Estimator(
model_fn=cnn_model_fn, model_dir="/tmp/mnist_convnet_model")
# Set up logging for predictions
tensor_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(
tensors=tensor_to_log, every_n_iter=50)
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": train_data},
y=train_labels,
batch_size=100,
num_epochs=None,
shuffle=True)
mnist_classifier.train(
input_fn=train_input_fn,
#original steps are 20000
steps=1,
hooks=[logging_hook])
# Evaluate the model and print results
eval_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": eval_data},
y=eval_labels,
num_epochs=1,
shuffle=False)
eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
print(eval_results)
if __name__ == "__main__":
tf.app.run()
The error here is from tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits).
The TensorFlow documentation clearly states that "labels vector must provide a single specific index for the true class for each row of logits". So your labels vector must include only class-indices like 0,1,2 and not their respective one-hot-encodings like [1,0,0], [0,1,0], [0,0,1].
Reproducing the error to explain further:
import numpy as np
import tensorflow as tf
# Create random-array and assign as logits tensor
np.random.seed(12345)
logits = tf.convert_to_tensor(np.random.sample((4,4)))
print logits.get_shape() #[4,4]
# Create random-labels (Assuming only 4 classes)
labels = tf.convert_to_tensor(np.array([2, 2, 0, 1]))
loss_1 = tf.losses.sparse_softmax_cross_entropy(labels, logits)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
print 'Loss: {}'.format(sess.run(loss_1)) # 1.44836854
# Now giving one-hot-encodings in place of class-indices for labels
wrong_labels = tf.convert_to_tensor(np.array([[0,0,1,0], [0,0,1,0], [1,0,0,0],[0,1,0,0]]))
loss_2 = tf.losses.sparse_softmax_cross_entropy(wrong_labels, logits)
# This should give you a similar error as soon as you define it
So try giving class-indices instead of one-hot encodings in your Y_Labels vector.
Hope this clears your doubt.
If you used Keras' ImageDataGenerator, you can add class_mode="sparse" to obtain the correct levels:
train_datagen = keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
train_generator = train_datagen.flow_from_directory(
'data/train',
target_size=(150, 150),
batch_size=32,
class_mode="sparse")
Alternatively, you might be able to use softmax_cross_entropy, which seems to use onehot encoding for the labels.
Changing
loss='sparse_categorical_crossentropy'
to
loss='categorical_crossentropy'
worked for me.
I have solved this error. The labels were in onehot encoding, so it was in dimension of [,10], rather than [,1]. So I used tf.argmax().
You can change to loss='categorical_crossentropy' for one hot encoding or the other option as mentioned earlier is
tf.losses.sparse_softmax_cross_entropy(labels, logits),
In simple english, your loss function should be categorical_crossentropy if you have applied labelbinarizer (for hot encoding) to your test data. If you have not hot encoded your test data, you should use'sparse_categorical_crossentropy'.
i write the code that change [1,0,0], [0,1,0], [0,0,1] to 0,1,2.
import numpy as np
import tensorflow as tf
def change_to_right(wrong_labels):
right_labels=[]
for x in wrong_labels:
for i in range(0,len(wrong_labels[0])):
if x[i]==1:
right_labels.append(i)
return right_labels
wrong_labels =np.array([[0,0,1,0], [0,0,1,0], [1,0,0,0],[0,1,0,0]])
right_labels =tf.convert_to_tensor(np.array(change_to_right(wrong_labels)))
I was following the CNN Mnist tutorial on https://www.tensorflow.org/tutorials/layers for my personal image classification task. My input image size is 224 * 224 * 3 instead of 28 * 28 from tutorial, and I have only 5 classes rather than 10. I read previous posts on this problem, and many people pointed out that either a too big learning rate or use of cross_entropy_loss could potentially be problem, but I am not sure if that is the case here.
When I started training, I immediately get this NaN loss training error:
ERROR:tensorflow:Model diverged with loss = NaN.
Traceback (most recent call last):
File "cnn_model.py", line 75, in <module>
main(sys.argv[1], sys.argv[2])
File "cnn_model.py", line 68, in main
classifier.train(input_fn = train_input_fn, steps = 2000, hooks = [logging_hook])
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 241, in train
loss = self._train_model(input_fn=input_fn, hooks=hooks)
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 612, in _train_model
_, loss = mon_sess.run([estimator_spec.train_op, estimator_spec.loss])
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 505, in run
run_metadata=run_metadata)
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 842, in run
run_metadata=run_metadata)
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 798, in run
return self._sess.run(*args, **kwargs)
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\training\monitored_session.py", line 960, in run
run_metadata=run_metadata))
File "C:\Users\sz\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\training\basic_session_run_hooks.py", line 477, in after_run
raise NanLossDuringTrainingError
tensorflow.python.training.basic_session_run_hooks.NanLossDuringTrainingError: NaN loss during training.
And below is the model code:
import tensorflow as tf
from helper import load_data_and_label
import cv2
import sys
import math
def cnn_model_fn(features, labels, mode):
#input layer
input_layer = tf.reshape(features['x'], [-1, 224, 224, 3])
#conv layer 1
conv1 = tf.layers.conv2d(inputs = input_layer, filters = 32, kernel_size
= [5,5], padding = 'same', activation = tf.nn.relu)
#pooling layer 1
pool1 = tf.layers.max_pooling2d(inputs = conv1, pool_size = [2,2], strides = 2)
#conv2 and pool2 layers
conv2 = tf.layers.conv2d(inputs = pool1, filters = 64, kernel_size = [5,5], padding = 'same', activation = tf.nn.relu)
pool2 = tf.layers.max_pooling2d(inputs = conv2, pool_size = [2,2], strides = 2)
#conv3 and pool3 layers
conv3 = tf.layers.conv2d(inputs = pool2, filters = 64, kernel_size = [5,5], padding = 'same', activation = tf.nn.relu)
pool3 = tf.layers.max_pooling2d(inputs = conv3, pool_size = [2,2], strides = 2)
#conv4 and pool4 layers
conv4 = tf.layers.conv2d(inputs = pool3, filters = 64, kernel_size = [5,5], padding = 'same', activation = tf.nn.relu)
pool4 = tf.layers.max_pooling2d(inputs = conv4, pool_size = [2,2], strides = 2)
#conv5 and pool5 layers
conv5 = tf.layers.conv2d(inputs = pool4, filters = 64, kernel_size = [5,5], padding = 'same', activation = tf.nn.relu)
pool5 = tf.layers.max_pooling2d(inputs = conv5, pool_size = [2,2], strides = 2)
#dense layer
pool5_flat = tf.reshape(pool5, [-1, 7 * 7 * 64])
dense = tf.layers.dense(inputs = pool5_flat, units = 1024, activation = tf.nn.relu)
dropout = tf.layers.dropout(inputs = dense, rate = 0.5,
training = mode == tf.estimator.ModeKeys.TRAIN)
#logits layer
logits = tf.layers.dense(inputs = dropout, units = 5)
predictions = {"classes":tf.argmax(input = logits, axis = 1),
"prob": tf.nn.softmax(logits, name = 'softmax_tensor')}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode = mode, predictions = predictions)
#calculate loss
onehot_labels = tf.one_hot(indices = tf.cast(labels, tf.int32), depth = 5)
loss = tf.losses.softmax_cross_entropy(onehot_labels = onehot_labels, logits = logits)
#configure training operation
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.001)
train_op = optimizer.minimize(loss = loss, global_step = tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode = mode, loss = loss, train_op = train_op)
#evaluation metrics
eval_metrics_ops = {"accuracy": tf.metrics.accuracy(labels = labels, predictions = predictions["classes"])}
return tf.estimator.EstimatorSpec(mode = mode, loss = loss, eval_metrics_ops = eval_metrics_ops)
def main(imagepath, labelpath):
train_data, train_labels, eval_data, eval_labels = load_data_and_label(imagepath, labelpath)
classifier = tf.estimator.Estimator(model_fn = cnn_model_fn, model_dir = "/tmp/retina_convnet_model")
tensors_to_log = {"prob": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(tensors = tensors_to_log, every_n_iter = 50)
#train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(x = {"x":train_data}, y = train_labels,
batch_size = 32, num_epochs = None, shuffle = True)
classifier.train(input_fn = train_input_fn, steps = 2000, hooks = [logging_hook])
eval_input_fn = tf.estimator.inputs.numpy_input_fn(x = {"x":eval_data}, y = eval_labels, num_epochs = 1, shuffle = False)
eval_results = classifier.evaluate(input_fn = eval_input_fn)
print(eval_results)
if __name__ == "__main__":
main(sys.argv[1], sys.argv[2])
Thank you so much! Any help would be really appreciated!
Did you do any preprocessing on the images? If not, then maybe try to standardize the images in your helper function and see if that helps.