I trained a neural network using gtsrb and trying to use the saved model(.h5) but getting the following error when using model.predict:
ValueError: Input 0 of layer sequential is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (None, 30, 3)
the training code:
import cv2
import numpy as np
import os
import sys
import tensorflow as tf
from sklearn.model_selection import train_test_split
EPOCHS = 10
IMG_WIDTH = 30
IMG_HEIGHT = 30
NUM_CATEGORIES = 43
TEST_SIZE = 0.4
def main():
# Check command-line arguments
if len(sys.argv) not in [2, 3]:
sys.exit("Usage: python traffic.py data_directory [model.h5]")
# Get image arrays and labels for all image files
images, labels = load_data(sys.argv[1])
# Split data into training and testing sets
labels = tf.keras.utils.to_categorical(labels)
x_train, x_test, y_train, y_test = train_test_split(
np.array(images), np.array(labels), test_size=TEST_SIZE
)
# Get a compiled neural network
model = get_model()
# Fit model on training data
model.fit(x_train, y_train, epochs=EPOCHS)
# Evaluate neural network performance
model.evaluate(x_test, y_test, verbose=2)
# Save model to file
if len(sys.argv) == 3:
filename = sys.argv[2]
model.save(filename)
print(f"Model saved to {filename}.")
def load_data(data_dir):
"""
Load image data from directory `data_dir`.
Assume `data_dir` has one directory named after each category, numbered
0 through NUM_CATEGORIES - 1. Inside each category directory will be some
number of image files.
Return tuple `(images, labels)`. `images` should be a list of all
of the images in the data directory, where each image is formatted as a
numpy ndarray with dimensions IMG_WIDTH x IMG_HEIGHT x 3. `labels` should
be a list of integer labels, representing the categories for each of the
corresponding `images`.
"""
images = list()
labels = list()
for folder in os.listdir(data_dir):
folder_path = os.path.join(data_dir, folder)
if os.path.isdir(folder_path):
for photo in os.listdir(folder_path):
img = cv2.imread(os.path.join(folder_path, photo))
img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
images.append(img)
labels.append(int(folder))
return images, labels
def get_model():
"""
Returns a compiled convolutional neural network model. Assume that the
`input_shape` of the first layer is `(IMG_WIDTH, IMG_HEIGHT, 3)`.
The output layer should have `NUM_CATEGORIES` units, one for each category.
"""
# Create a convolutional neural network
model = tf.keras.models.Sequential([
# Convolutional layer. Learn 32 filters using a 3x3 kernel
tf.keras.layers.Conv2D(
32, (3, 3), activation="relu", input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)
),
# Max-pooling layer, using 2x2 pool size
tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
tf.keras.layers.Conv2D(
32, (3, 3), activation="relu", input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)
),
# Flatten units
tf.keras.layers.Flatten(),
# Add a hidden layer with dropout
tf.keras.layers.Dense(NUM_CATEGORIES * 16, activation="relu"),
tf.keras.layers.Dropout(0.3),
# Add an output layer with output units for all 10 digits
tf.keras.layers.Dense(NUM_CATEGORIES, activation="softmax")
])
# Train neural network
model.compile(
optimizer="adam",
loss="binary_crossentropy",
metrics=["accuracy"]
)
return model
if __name__ == "__main__":
main()
the code to use the saved model to classify a photo
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import numpy
import cv2
PHOTO_PATH = r'C:\Users\m\ai50\projects\2020\x\traffic\gtsrb\x.ppm'
MODEL_PATH = r'C:\Users\m\ai50\projects\2020\x\traffic\model.h5'
model = load_model(MODEL_PATH, compile=True)
img = cv2.imread(PHOTO_PATH)
img = cv2.resize(img, (30, 30))
img = numpy.array(img)
print(img.shape)
result = model.predict(img)
print(result)
Add a dimension to your image array. The .predict() method expects input with four dimensions: (batch, height, width, channels). Adding a dimension effectively makes this a batch with one image.
result = model.predict(img[numpy.newaxis])
Related
import numpy as np
import matplotlib.pyplot as plt #allows us to see the image data
import os #to read and write from files
import cv2 #to manipulate data and create arrays
import tensorflow as tf
import time
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
DIRECTORY = "C:/Users/User/Desktop/dataset/train" #where the image data is
data = DIRECTORY #loads in the data
MOLES = ["melanoma", "nevus"] #the two categories were are trying to analyse
training_dataset = [] #we will now need to make an array where all the data is, as opposed to two seperate files, to train the model.
for mole in MOLES: #iterates through the two files
path = os.path.join(data, mole) #creates a path to the file
class_num = MOLES.index(mole) #assigns the index 0 or 1 whether an image file is in the first or second file
for img in os.listdir(path): #in the path
try:
img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE) #every file is read in
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE)) #resized, added to an array called new_array...
training_dataset.append([new_array, class_num]) #...and given a number -- 0 or 1 -- based on if it is in the melanma or nevus file
except Exception as ex:
pass
IMG_SIZE = 200 #what we want the max image size to be, both x and y
new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE)) #uses the resize function
plt.imshow(new_array, cmap="gray") #shows the result
plt.show()
import random #as the dataset is simply the number 1 and 0, one after the other they aren't going to train the model
random.shuffle(training_dataset) #the order must be shuffled
X = []
y = []
for features, label in training_dataset:
X.append(features)
y.append(label)
X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 2)
y = np.array(y)
X = X/255.0
#NAME = "melanoma-nevus-cnn-64x2-{}".format(int(time.time()))
dense_layers = [0]
layer_sizes = [64]
conv_layers = [2]
for dense_layer in dense_layers:
for layer_size in layer_sizes:
for conv_layer in conv_layers:
NAME = "test-{}-conv-{}-nodes-{}-dense-{}".format(conv_layer, layer_size, dense_layer, int(time.time()))
tensorboard = TensorBoard(log_dir='logs/{}'.format(NAME))
print(NAME)
model = Sequential()
model.add(Conv2D(layer_size, (3, 3), input_shape=X.shape[1:]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
for l in range(conv_layer-1):
model.add(Conv2D(layer_size, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
for _ in range(dense_layer):
model.add(Dense(layer_size))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation("sigmoid"))
#model.compile(loss="categorical_crossentropy")
model.compile(loss="binary_crossentropy",
optimizer='adam',
metrics=['accuracy'])
#According to Kingma et al., 2014, the method [adam] is "computationally efficient, has little memory requirement, invariant to diagonal rescaling of gradients, and is well suited for problems that are large in terms of data/parameters".
model.fit(X, y, batch_size = 32, epochs = 10, validation_split = 0.3, callbacks=[tensorboard])
def prepare(filepath):
IMG_SIZE = 200 # 50 in txt-based
img_array = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE) # read in the image, convert to grayscale
new_array = cv2.resize(img_array, (200, 200)) # resize image to match model's expected sizing
return new_array.reshape(-1, IMG_SIZE, IMG_SIZE, 1) # return the image with shaping that TF wants.
prediction = model.predict([prepare("C:/Users/User/Desktop/dataset/test/melanoma/ISIC_0012758.jpg")])
prediction
ERROR-----------------------------------------------------------------------------------------
ValueError Traceback (most recent call last)
in
2
3
#prediction = model.predict([prepare("C:/Users/User/Desktop/dataset/test/melanoma/ISIC_0012758.jpg")])
----> 4 prediction = model.predict([prepare("C:/Users/User/Desktop/dataset/test/melanoma/ISIC_0012758.jpg")])
5 prediction
in prepare(filepath)
5 img_array = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE) # read in the image, convert to grayscale
6 new_array = cv2.resize(img_array, (200, 200)) # resize image to match model's expected sizing
----> 7 return new_array.reshape(-1, IMG_SIZE, IMG_SIZE, 2) # return the image with shaping that TF wants.
ValueError: cannot reshape array of size 40000 into shape (200,200,2)
I have been working on Python code on JuypterLabs and using Keras and Tensorboard to create a CNN and to try to train a model to sort images based on whether they are melanoma moles (cancerous) or nevus moles (regular). I've run into a problem on testing the model. When ever I try to run the test code it gives me the error:
WARNING:tensorflow:Model was constructed with shape (None, 200, 200, 2) for input KerasTensor(type_spec=TensorSpec(shape=(None, 200, 200, 2), dtype=tf.float32, name='conv2d_input'), name='conv2d_input', description="created by layer 'conv2d_input'"), but it was called on an input with incompatible shape (None, 200, 200, 1).
When I change the shape of the array to (-1, 200, 200, 2), as it was asking me to. I instead get the error:
ValueError: cannot reshape array of size 40000 into shape (200,200,2)
I've tried messing with the CNN shape but that doesn't work, can anyone help?
I am trying to run a deep learning code that I found in a tutorial in order to familiarise myself with resnet50, keras and tensorflow with python 3.7. When I run my code, I get the following error:
TypeError: Cannot convert a symbolic Keras input/output to a numpy array. This error may indicate that you're trying to pass a symbolic value to a NumPy call, which is not supported. Or, you may be trying to pass Keras symbolic inputs/outputs to a TF API that does not register dispatching, preventing Keras from automatically converting the API call to a lambda layer in the Functional Model.
I tried to use the following fix as mentioned on stack overflow:
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()
Without any success. My full code can be seen below:
from keras.applications.resnet50 import ResNet50
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
from keras.preprocessing import image
from sklearn.linear_model import LogisticRegression
from tensorflow.python.framework.ops import disable_eager_execution
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Download the architecture of ResNet50 with ImageNet weights
base_model = ResNet50(include_top=False, weights='imagenet')
# Taking the output of the last convolution block in ResNet50
x = base_model.output
# Adding a Global Average Pooling layer
x = GlobalAveragePooling2D()(x)
# Adding a fully connected layer having 1024 neurons
x = Dense(1024, activation='relu')(x)
# Adding a fully connected layer having 2 neurons which will
# give probability of image having either dog or cat
predictions = Dense(2, activation='softmax')(x)
# Model to be trained
model = Model(inputs=base_model.input, outputs=predictions)
# Training only top layers i.e. the layers which we have added in the end
for layer in base_model.layers:
layer.trainable = False
# Compiling the model
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics = ['accuracy'],
experimental_run_tf_function=False)
# Creating objects for image augmentations
train_datagen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1./255)
# Proving the path of training and test dataset
# Setting the image input size as (224, 224)
# We are using class mode as binary because there are only two classes in our data
training_set = train_datagen.flow_from_directory('training_set',
target_size = (224, 224),
batch_size = 32,
class_mode = 'categorical')
test_set = test_datagen.flow_from_directory('test_set',
target_size = (224, 224),
batch_size = 32,
class_mode = 'categorical')
# Training the model for 5 epochs
model.fit_generator(training_set,
steps_per_epoch = 8000,
epochs = 5,
validation_data = test_set,
validation_steps = 2000)
# We will try to train the last stage of ResNet50
for layer in base_model.layers[0:143]:
layer.trainable = False
for layer in base_model.layers[143:]:
layer.trainable = True
# Training the model for 10 epochs
model.fit_generator(training_set,
steps_per_epoch = 8000,
epochs = 10,
validation_data = test_set,
validation_steps = 2000)
# Saving the weights in the current directory
model.save_weights("resnet50_weights.h5")
# Predicting the final result of image
test_image = image.load_img('cat_or_dog_test.jpg', target_size = (224, 224))
test_image = image.img_to_array(test_image)\
# Expanding the 3-d image to 4-d image.
# The dimensions will be Batch, Height, Width, Channel
test_image = np.expand_dims(test_image, axis = 0)
# Predicting the final class
classifier = LogisticRegression()
result = classifier.predict(test_image)
# Fetching the class labels
labels = training_set.class_indices
labels = list(labels.items())
# Printing the final label
for label, i in labels:
if i == result:
print("The test image has: ", label)
break
I had the same problem when using: from keras import Input;
But, when I change to: from tensorflow.keras import Input, it works!
I assume that the following line is where the error occurs:
test_image = np.expand_dims(test_image, axis = 0)
The reason is probably that you try to apply a numpy function to a tensor. Don't do that. Either convert your tensor to numpy or use a function that work on tensors. Normally, I'd say prefer the second option over the first one (it will prevent unnecessary conversions and make your code more efficient). In your case you will need to convert your tensor to numpy because you are using sklearn afterward:
test_image = np.expand_dims(test_image.numpy(), axis=0)
I am new to DL and I received a similar error a nd the following has helped me.
Try:
del base_model
Before:
base_model = ResNet50(include_top=False, weights='imagenet')
and also simultaneously:
Try:
del model
Before:
model = Model(inputs=base_model.input, outputs=predictions)
Please let me know if this has helped you or hasn't :) .
Try using tensorflow.keras.something instead of keras.something.
It worked for me.
Ofcourse you have to also import tensorlfow
I've been working off of a guide I found for making an object classifier. My classifier's job is to determine if the image it's looking at is anime, as I want to integrate it into a bot that will flag anime down in a group chat. The chart the network outputs after training shows decent but improvable results, but the classifying script does not seem to be decently accurate at all. My dataset is 1000 images, here is the chart of my last training attempt.
As you can see, the val_loss value is fairly workable but turbulent.
I am feeding the trained model 2 images to test it after training and saving weights, an image of a normal houseplant, and a generic anime girl. The model predicts 0.37% Anime for the houseplant, and 0.00% Anime for the anime photo. These photos are visually similar to their respective datasets, those being "Anime" and "Other" (which includes images of cars, plants, houses, and other "random" objects). These images are class labeled by their subfolder.
Here is the code for my model:
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras import backend as K
class SmallerVGGNet:
#staticmethod
def build(width, height, depth, classes):
# initialize the model along with the input shape to be
# "channels last" and the channels dimension itself
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
# if we are using "channels first", update the input shape
# and channels dimension
if K.image_data_format() == "channels_first":
inputShape = (depth, height, width)
chanDim = 1
# CONV => RELU => POOL
model.add(Conv2D(32, (3, 3), padding="same",
input_shape=inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
# (CONV => RELU) * 2 => POOL
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# (CONV => RELU) * 2 => POOL
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# first (and only) set of FC => RELU layers
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
# 1 node
model.add(Dense(1))
model.add(Activation("sigmoid"))
return model
My trainer:
import matplotlib
matplotlib.use("Agg")
# import the necessary packages
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from pyimagesearch.smallervggnet import SmallerVGGNet
import matplotlib.pyplot as plt
from imutils import paths
import numpy as np
import argparse
import random
import pickle
import cv2
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True,
help="path to input dataset (i.e., directory of images)")
ap.add_argument("-m", "--model", required=True,
help="path to output model")
ap.add_argument("-l", "--labelbin", required=True,
help="path to output label binarizer")
ap.add_argument("-p", "--plot", type=str, default="plot.png",
help="path to output accuracy/loss plot")
args = vars(ap.parse_args())
# initialize the number of epochs to train for, initial learning rate,
# batch size, and image dimensions
EPOCHS = 100
INIT_LR = 1e-3
BS = 32
IMAGE_DIMS = (96, 96, 3)
# initialize the data and labels
data = []
labels = []
# grab the image paths and randomly shuffle them
print("Output: loading images...")
imagePaths = sorted(list(paths.list_images(args["dataset"])))
random.seed(42)
random.shuffle(imagePaths)
# loop over the input images
for imagePath in imagePaths:
# load the image, pre-process it, and store it in the data list
image = cv2.imread(imagePath)
image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
image = img_to_array(image)
data.append(image)
# extract the class label from the image path and update the
# labels list
label = imagePath.split(os.path.sep)[-2]
labels.append(label)
# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
print("Output: data matrix: {:.2f}MB".format(
data.nbytes / (1024 * 1000.0)))
# binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
labels, test_size=0.2, random_state=42)
# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=45, width_shift_range=0.1,
height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
horizontal_flip=True, fill_mode="nearest")
# initialize the model
print("Output: compiling model...")
model = SmallerVGGNet.build(width=IMAGE_DIMS[1], height=IMAGE_DIMS[0],
depth=IMAGE_DIMS[2], classes=len(lb.classes_))
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,
metrics=["accuracy"])
# train the network
print("Output: training network...")
H = model.fit(
x=aug.flow(trainX, trainY, batch_size=BS),
validation_data=(testX, testY),
steps_per_epoch=len(trainX) // BS,
epochs=EPOCHS, verbose=1)
# save the model to disk
print("Output: serializing network...")
model.save(args["model"], save_format="h5")
# save the label binarizer to disk
print("Output: serializing label binarizer...")
f = open(args["labelbin"], "wb")
f.write(pickle.dumps(lb))
f.close()
And lastly, my classifier:
import tensorflow
from keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
import numpy as np
import argparse
import imutils
import pickle
import cv2
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-m", "--model", required=True,
help="path to trained model model")
ap.add_argument("-l", "--labelbin", required=True,
help="path to label binarizer")
ap.add_argument("-i", "--image", required=True,
help="path to input image")
args = vars(ap.parse_args())
# load the image
image = cv2.imread(args["image"])
output = image.copy()
# pre-process the image for classification
image = cv2.resize(image, (96, 96))
image = image.astype("float") / 255.0
image = img_to_array(image)
image = np.expand_dims(image, axis=0)
# load the trained convolutional neural network and the label
# binarizer
print("[INFO] loading network...")
model = load_model(args["model"])
lb = pickle.loads(open(args["labelbin"], "rb").read())
# classify the input image
print("[INFO] classifying image...")
proba = model.predict(image)[0]
idx = np.argmax(proba)
label = lb.classes_[idx]
# correct answer
filename = args["image"][args["image"].rfind(os.path.sep) + 1:]
correct = "correct" if filename.rfind(label) != -1 else "incorrect"
# build the label and draw the label on the image
label = "{}: {:.2f}% ({})".format(label, proba[idx] * 100, correct)
output = imutils.resize(output, width=400)
cv2.putText(output, label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX,
0.7, (0, 255, 0), 2)
# show the output image
print("[INFO] {}".format(label))
cv2.imshow("Output", output)
cv2.waitKey(0)
Some things I've tried include smaller epoch counts, alternatives to sigmoid, different loss functions, and more concrete categories.
Your model ends with a single Dense unit with sigmoid activation, however, you later use np.argmax() alongside a list of labels as if you were using softmax activation. You are accidentally predicting whichever class is labeled ”0” for every sample. Your model trained fine, it’s just how you made the predictions afterwards.
I am trying to do transfer learning with InceptionV3 on the MNIST dataset.
The plan is to read in the MNIST dataset, resize the images, and then use these to train, like so:
import numpy as np
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import tensorflow.compat.v2 as tf
import tensorflow.compat.v1 as tfv1
from tensorflow.python.keras.applications import InceptionV3
tfv1.enable_v2_behavior()
print(tf.version.VERSION)
img_size = 299
def preprocess_tf_image(image, label):
image = tf.image.grayscale_to_rgb(image)
image = tf.image.resize(image, [img_size, img_size])
return image, label
#Acquire MNIST data
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#Convert data to [0,1] range
x_train, x_test = x_train / 255.0, x_test / 255.0
#Add extra dimension to images so that they can be converted to RGB
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape (x_test.shape[0], 28, 28, 1)
x_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
x_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
#Convert images to RGB space and resize
x_train = x_train.map(preprocess_tf_image)
x_test = x_test.map(preprocess_tf_image)
img_shape = (img_size, img_size, 3)
#Get trained model, but leave off the head
base_model = InceptionV3(input_shape = img_shape, weights='imagenet', include_top=False)
base_model.trainable = False
#Make a model with a new head
model = tf.keras.Sequential([
base_model,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax')
])
#Compile model
model.compile(
optimizer='adam', #tf.keras.optimizers.RMSprop(lr=BASE_LEARNING_RATE),
loss='binary_crossentropy',
metrics=['accuracy']
)
model.fit(x_train, epochs=5)
model.evaluate(x_test)
But, when I run this, things stop at model.fit() with the error:
ValueError: Error when checking input: expected inception_v3_input to have 4 dimensions, but got array with shape (299, 299, 3)
What's going on?
After you apply map to a dataset the response has no information about the batch size, you have to invoke the batch function to add it:
x_train = x_train.batch(batch_size = BATCH_SIZE) # adds batch size dimension to train dataset
x_test = x_test.batch(batch_size = BATCH_SIZE) # idem for test.
After that I could fully train and evaluate the model using Google's Colab as you can check here.
(Attempt #2, see below for history)
I'm a total newbie with Deep Learning and Keras, and trying to play with the MNIST sample, following the tutorial at https://elitedatascience.com/keras-tutorial-deep-learning-in-python.
With regard to that sample, I just made some minimal adjustments to the code, to make it compatible with the latest API. Then, I added a sample Python script which loads the saved model and a BMP image file (specified as the script first argument), and outputs the class prediction in the form of an exit code.
I trained the model getting accuracy 0.9911, and saved it.
Yet, when testing with some BMP samples I get inconsistent results: only some of the digits are recognized correctly. Probably I'm missing something obvious in preprocessing my data. After Toyo's reply, I changed both the scripts, following https://nextjournal.com/schmudde/ml4a-mnist, so here I quote the newer versions. The older ones are at the bottom.
I also added a sample visualization of the preprocessed array representing an image, and looking at it I can see the digit, 'white on black', as expected (in the predict preprocessing I added color inversion by setting each value equal to 255-value, as my input BMP are 'black on white').
Yet, the issue is still there.
Thanks!
New Scripts
Training:
# numpy with a specific seed for reproducibility
import numpy as np
np.random.seed(123)
# keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
# load pre-shuffled MNIST data into train and test sets
# MNIST: 60k samples of 28x28 grayscale (0-255) images,
# and additional 10k samples for test.
((x_train, y_train), (x_test, y_test)) = mnist.load_data()
n_train, height, width = x_train.shape
n_test, _, _ = x_test.shape
print('x_train shape: {}'.format(x_train.shape))
print('n_train: {}'.format(n_train))
print('height: {}'.format(height))
print('width: {}'.format(width))
print('n_test: {}'.format(n_test))
# we require shape (count, height, width, channel), where count
# is the count of our samples, and channel is the number of channels,
# here equal to 1 as these are grayscale images (for RGB it would be 3).
x_train = x_train.reshape(n_train, height, width, 1).astype('float32')
x_test = x_test.reshape(n_test, height, width, 1).astype('float32')
# normalize from [0, 255] to [0, 1]
x_train /= 255
x_test /= 255
# show image array
print('Sample array: {}\n'.format(x_train[0].shape))
print(x_train[0])
from matplotlib import pyplot as plt
plt.imshow(x_train[0].reshape(28, 28))
plt.show()
# the labels need to be converted into one-hot vectors,
# which are nc-element arrays (nc is the number of classes),
# which are 0 for all classes except 1 for the class the label
# is assigned to.
# convert integer labels into one-hot vectors
n_classes = 10
y_train = np_utils.to_categorical(y_train, n_classes)
y_test = np_utils.to_categorical(y_test, n_classes)
# define model architecture.
model = Sequential()
# number of convolutional filters
n_filters = 32
# convolution filter size
n_conv = 3
# pooling window size
n_pool = 2
# (1) convolution
model.add(Convolution2D(
n_filters,
kernel_size=(n_conv, n_conv),
# we have a 28x28 single channel (grayscale) image
# so the input shape should be (28, 28, 1)
input_shape=(height, width, 1)
))
model.add(Activation('relu'))
model.add(Convolution2D(n_filters, kernel_size=(n_conv, n_conv)))
model.add(Activation('relu'))
# apply pooling to summarize the features extracted thus far
model.add(MaxPooling2D(pool_size=(n_pool, n_pool)))
# (2) connected layers
model.add(Dropout(0.25))
# flatten the data for the 1D layers
model.add(Flatten())
# Dense(n_outputs)
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
# (3) the softmax output layer gives us a probablity for each class
model.add(Dense(n_classes))
model.add(Activation('softmax'))
model.compile(
loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
# how many examples to look at during each update step
batch_size = 128
# how many times to run through the full set of examples
n_epochs = 10
model.fit(x_train,
y_train,
batch_size=batch_size,
epochs=n_epochs,
validation_data=(x_test, y_test))
# how'd we do?
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
print('loss:', loss)
print('accuracy:', accuracy)
# save the model. Use load_model(path) to load it later
model.save('/projects/python/mnist/mnist.h5')
Prediction:
# get the path to the image to be recognized from 1st arg
import sys
if len(sys.argv) < 2:
print('Missing image file path')
exit(-1)
# load the pretrained MNIST model
print('Loading model...')
import numpy as np
import keras
model = keras.models.load_model('/projects/python/mnist/mnist.h5')
# load the image ensuring that its size is 28x28
print('Loading image ' + sys.argv[1])
from keras.preprocessing import image
x = image.load_img(sys.argv[1], color_mode='grayscale', target_size=(28, 28))
# convert image to NumPy array
x = image.img_to_array(x, data_format='channels_last')
print('np from image:\n')
print(x.shape)
print(x)
# add a major dimension to represent the number of samples, i.e. 1
x = x.reshape(1, 28, 28, 1)
# convert data type to float32 and normalize 0-255 values to range [0, 1]
x = x.astype('float32')
# invert colors, we need white on black
x = 255 - x
x /= 255
print('np before predicting:\n')
print(x.shape)
print(x)
from matplotlib import pyplot as plt
plt.imshow(x.reshape(28, 28))
plt.show()
# predict
classes = model.predict_classes(x)
print(classes[0])
exit(classes[0])
Old scripts
# numpy with a specific seed for reproducibility
import numpy as np
np.random.seed(123)
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
from matplotlib import pyplot as plt
# load pre-shuffled MNIST data into train and test sets
# 60k samples of 28x28 images
((x_train, y_train), (x_test, y_test)) = mnist.load_data()
print(x_train.shape)
plt.imshow(x_train[0])
input("Press enter to continue...")
# preprocess input data, as Theano requires explicit depth:
# from shape (n, width, height) to (n, depth=1, width, height)
x_train = x_train.reshape(x_train.shape[0], 1, 28, 28)
x_test = x_test.reshape(x_test.shape[0], 1, 28, 28)
# convert our data type to float32 and normalize our data values to the range [0, 1]
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
# convert 1-dimensional class arrays (with values 0-9)
# to 10-dimensional class matrices (with values 0-1)
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)
# define model architecture
model = Sequential()
# input_shape is the shape of each single parameter:
# here it is depth=1, width=28, height=28
# 32 convolution filters
# 3 rows in each convolution kernel
# 3 cols in each convolution kernel
model.add(Convolution2D(32, (3, 3), activation='relu', input_shape=(1, 28, 28), data_format="channels_first"))
model.add(Convolution2D(32, (3, 3), activation='relu', data_format="channels_first"))
# reduce the model's parameters by sliding a 2x2 pooling filter
# across the previous layer and taking the max of the 4 values in the 2x2 filter
model.add(MaxPooling2D(pool_size=(2,2)))
# regularize the model to prevent overfitting
model.add(Dropout(0.25))
# flatten (make 1-dimensional)
model.add(Flatten())
# dense: 128=output size of the layer
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
# final layer has size 10 for the 10 digits
model.add(Dense(10, activation='softmax'))
# compile model (loss function and optimizer)
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
# fit model on training data
model.fit(x_train, y_train,
batch_size=32, nb_epochs=10, verbose=1)
# evaluate model on test data
score = model.evaluate(x_test, y_test, verbose=0)
# save the model. Use load_model(path) to load it later
model.save('/projects/python/mnist/mnist.h5')
The consuming script:
# get the path to the image to be recognized from 1st arg
import sys
if len(sys.argv) < 2:
print('Missing image file path')
exit(-1)
# load the pretrained MNIST model
print('Loading model...')
import numpy as np
import keras
model = keras.models.load_model('/projects/python/mnist/mnist.h5')
# load the image ensuring that its size is 28x28
print('Loading image ' + sys.argv[1])
from keras.preprocessing import image
x = image.load_img(sys.argv[1], color_mode='grayscale', target_size=(28, 28))
# convert image to NumPy array
x = image.img_to_array(x, data_format='channels_first')
# add a major dimension to represent the number of samples, i.e. 1
x = x.reshape(1, 1, 28, 28)
# convert data type to float32 and normalize 0-255 values to range [0, 1]
x = x.astype('float32')
x /= 255
print(x.shape)
print(x)
# evaluate
classes = model.predict_classes(x)
print(classes[0])
exit(classes[0])