I've been working off of a guide I found for making an object classifier. My classifier's job is to determine if the image it's looking at is anime, as I want to integrate it into a bot that will flag anime down in a group chat. The chart the network outputs after training shows decent but improvable results, but the classifying script does not seem to be decently accurate at all. My dataset is 1000 images, here is the chart of my last training attempt.
As you can see, the val_loss value is fairly workable but turbulent.
I am feeding the trained model 2 images to test it after training and saving weights, an image of a normal houseplant, and a generic anime girl. The model predicts 0.37% Anime for the houseplant, and 0.00% Anime for the anime photo. These photos are visually similar to their respective datasets, those being "Anime" and "Other" (which includes images of cars, plants, houses, and other "random" objects). These images are class labeled by their subfolder.
Here is the code for my model:
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras import backend as K
class SmallerVGGNet:
#staticmethod
def build(width, height, depth, classes):
# initialize the model along with the input shape to be
# "channels last" and the channels dimension itself
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
# if we are using "channels first", update the input shape
# and channels dimension
if K.image_data_format() == "channels_first":
inputShape = (depth, height, width)
chanDim = 1
# CONV => RELU => POOL
model.add(Conv2D(32, (3, 3), padding="same",
input_shape=inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
# (CONV => RELU) * 2 => POOL
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# (CONV => RELU) * 2 => POOL
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# first (and only) set of FC => RELU layers
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
# 1 node
model.add(Dense(1))
model.add(Activation("sigmoid"))
return model
My trainer:
import matplotlib
matplotlib.use("Agg")
# import the necessary packages
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from pyimagesearch.smallervggnet import SmallerVGGNet
import matplotlib.pyplot as plt
from imutils import paths
import numpy as np
import argparse
import random
import pickle
import cv2
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True,
help="path to input dataset (i.e., directory of images)")
ap.add_argument("-m", "--model", required=True,
help="path to output model")
ap.add_argument("-l", "--labelbin", required=True,
help="path to output label binarizer")
ap.add_argument("-p", "--plot", type=str, default="plot.png",
help="path to output accuracy/loss plot")
args = vars(ap.parse_args())
# initialize the number of epochs to train for, initial learning rate,
# batch size, and image dimensions
EPOCHS = 100
INIT_LR = 1e-3
BS = 32
IMAGE_DIMS = (96, 96, 3)
# initialize the data and labels
data = []
labels = []
# grab the image paths and randomly shuffle them
print("Output: loading images...")
imagePaths = sorted(list(paths.list_images(args["dataset"])))
random.seed(42)
random.shuffle(imagePaths)
# loop over the input images
for imagePath in imagePaths:
# load the image, pre-process it, and store it in the data list
image = cv2.imread(imagePath)
image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
image = img_to_array(image)
data.append(image)
# extract the class label from the image path and update the
# labels list
label = imagePath.split(os.path.sep)[-2]
labels.append(label)
# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
print("Output: data matrix: {:.2f}MB".format(
data.nbytes / (1024 * 1000.0)))
# binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
labels, test_size=0.2, random_state=42)
# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=45, width_shift_range=0.1,
height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
horizontal_flip=True, fill_mode="nearest")
# initialize the model
print("Output: compiling model...")
model = SmallerVGGNet.build(width=IMAGE_DIMS[1], height=IMAGE_DIMS[0],
depth=IMAGE_DIMS[2], classes=len(lb.classes_))
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,
metrics=["accuracy"])
# train the network
print("Output: training network...")
H = model.fit(
x=aug.flow(trainX, trainY, batch_size=BS),
validation_data=(testX, testY),
steps_per_epoch=len(trainX) // BS,
epochs=EPOCHS, verbose=1)
# save the model to disk
print("Output: serializing network...")
model.save(args["model"], save_format="h5")
# save the label binarizer to disk
print("Output: serializing label binarizer...")
f = open(args["labelbin"], "wb")
f.write(pickle.dumps(lb))
f.close()
And lastly, my classifier:
import tensorflow
from keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
import numpy as np
import argparse
import imutils
import pickle
import cv2
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-m", "--model", required=True,
help="path to trained model model")
ap.add_argument("-l", "--labelbin", required=True,
help="path to label binarizer")
ap.add_argument("-i", "--image", required=True,
help="path to input image")
args = vars(ap.parse_args())
# load the image
image = cv2.imread(args["image"])
output = image.copy()
# pre-process the image for classification
image = cv2.resize(image, (96, 96))
image = image.astype("float") / 255.0
image = img_to_array(image)
image = np.expand_dims(image, axis=0)
# load the trained convolutional neural network and the label
# binarizer
print("[INFO] loading network...")
model = load_model(args["model"])
lb = pickle.loads(open(args["labelbin"], "rb").read())
# classify the input image
print("[INFO] classifying image...")
proba = model.predict(image)[0]
idx = np.argmax(proba)
label = lb.classes_[idx]
# correct answer
filename = args["image"][args["image"].rfind(os.path.sep) + 1:]
correct = "correct" if filename.rfind(label) != -1 else "incorrect"
# build the label and draw the label on the image
label = "{}: {:.2f}% ({})".format(label, proba[idx] * 100, correct)
output = imutils.resize(output, width=400)
cv2.putText(output, label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX,
0.7, (0, 255, 0), 2)
# show the output image
print("[INFO] {}".format(label))
cv2.imshow("Output", output)
cv2.waitKey(0)
Some things I've tried include smaller epoch counts, alternatives to sigmoid, different loss functions, and more concrete categories.
Your model ends with a single Dense unit with sigmoid activation, however, you later use np.argmax() alongside a list of labels as if you were using softmax activation. You are accidentally predicting whichever class is labeled ”0” for every sample. Your model trained fine, it’s just how you made the predictions afterwards.
Related
I trained a neural network using gtsrb and trying to use the saved model(.h5) but getting the following error when using model.predict:
ValueError: Input 0 of layer sequential is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (None, 30, 3)
the training code:
import cv2
import numpy as np
import os
import sys
import tensorflow as tf
from sklearn.model_selection import train_test_split
EPOCHS = 10
IMG_WIDTH = 30
IMG_HEIGHT = 30
NUM_CATEGORIES = 43
TEST_SIZE = 0.4
def main():
# Check command-line arguments
if len(sys.argv) not in [2, 3]:
sys.exit("Usage: python traffic.py data_directory [model.h5]")
# Get image arrays and labels for all image files
images, labels = load_data(sys.argv[1])
# Split data into training and testing sets
labels = tf.keras.utils.to_categorical(labels)
x_train, x_test, y_train, y_test = train_test_split(
np.array(images), np.array(labels), test_size=TEST_SIZE
)
# Get a compiled neural network
model = get_model()
# Fit model on training data
model.fit(x_train, y_train, epochs=EPOCHS)
# Evaluate neural network performance
model.evaluate(x_test, y_test, verbose=2)
# Save model to file
if len(sys.argv) == 3:
filename = sys.argv[2]
model.save(filename)
print(f"Model saved to {filename}.")
def load_data(data_dir):
"""
Load image data from directory `data_dir`.
Assume `data_dir` has one directory named after each category, numbered
0 through NUM_CATEGORIES - 1. Inside each category directory will be some
number of image files.
Return tuple `(images, labels)`. `images` should be a list of all
of the images in the data directory, where each image is formatted as a
numpy ndarray with dimensions IMG_WIDTH x IMG_HEIGHT x 3. `labels` should
be a list of integer labels, representing the categories for each of the
corresponding `images`.
"""
images = list()
labels = list()
for folder in os.listdir(data_dir):
folder_path = os.path.join(data_dir, folder)
if os.path.isdir(folder_path):
for photo in os.listdir(folder_path):
img = cv2.imread(os.path.join(folder_path, photo))
img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
images.append(img)
labels.append(int(folder))
return images, labels
def get_model():
"""
Returns a compiled convolutional neural network model. Assume that the
`input_shape` of the first layer is `(IMG_WIDTH, IMG_HEIGHT, 3)`.
The output layer should have `NUM_CATEGORIES` units, one for each category.
"""
# Create a convolutional neural network
model = tf.keras.models.Sequential([
# Convolutional layer. Learn 32 filters using a 3x3 kernel
tf.keras.layers.Conv2D(
32, (3, 3), activation="relu", input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)
),
# Max-pooling layer, using 2x2 pool size
tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
tf.keras.layers.Conv2D(
32, (3, 3), activation="relu", input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)
),
# Flatten units
tf.keras.layers.Flatten(),
# Add a hidden layer with dropout
tf.keras.layers.Dense(NUM_CATEGORIES * 16, activation="relu"),
tf.keras.layers.Dropout(0.3),
# Add an output layer with output units for all 10 digits
tf.keras.layers.Dense(NUM_CATEGORIES, activation="softmax")
])
# Train neural network
model.compile(
optimizer="adam",
loss="binary_crossentropy",
metrics=["accuracy"]
)
return model
if __name__ == "__main__":
main()
the code to use the saved model to classify a photo
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import numpy
import cv2
PHOTO_PATH = r'C:\Users\m\ai50\projects\2020\x\traffic\gtsrb\x.ppm'
MODEL_PATH = r'C:\Users\m\ai50\projects\2020\x\traffic\model.h5'
model = load_model(MODEL_PATH, compile=True)
img = cv2.imread(PHOTO_PATH)
img = cv2.resize(img, (30, 30))
img = numpy.array(img)
print(img.shape)
result = model.predict(img)
print(result)
Add a dimension to your image array. The .predict() method expects input with four dimensions: (batch, height, width, channels). Adding a dimension effectively makes this a batch with one image.
result = model.predict(img[numpy.newaxis])
I am trying to train a convolutional neural network but I get a quite high number of false positive classified objects. I am using two classes, each 10.000 images with quite obvious differences. I would expect a rather easy task for a CNN, also I used some hand crafted features with a random forest classifier before which worked quite well.
This is the model I am using:
def build(width, height, depth, classes):
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
# if we are using "channels first", update the input shape
# and channels dimension
if K.image_data_format() == "channels_first":
inputShape = (depth, height, width)
chanDim = 1
# CONV => RELU => POOL layer set
model.add(Conv2D(32, (3, 3), padding="same",
input_shape=inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# (CONV => RELU) * 2 => POOL layer set
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# (CONV => RELU) * 3 => POOL layer set
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# first (and only) set of FC => RELU layers
model.add(Flatten())
model.add(Dense(512))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.25))
# softmax classifier
model.add(Dense(classes))
model.add(Activation("softmax"))
# return the constructed network architecture
return model
After applying data augmentation, training and validation loss look better but still I get to many false positives.
Here is a screenshot of some example images from a validation set (screenshot), green marked are correct classified, the rest are false positives. Any suggestion, how to improve my model?
Edit:
I add also the code for pre-processing the images:
import matplotlib
matplotlib.use("Agg")
from smallvggnet import SmallVGGNet
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import pickle
import cv2
import os
from keras.utils.np_utils import to_categorical
# initialize the data and labels
print("[INFO] loading images...")
data = []
labels = []
# grab the image paths and randomly shuffle them
imagePaths = sorted(list(paths.list_images("C:/06112020_hyphae/all/")))
random.seed(42)
random.shuffle(imagePaths)
# loop over the input images
for imagePath in imagePaths:
image = cv2.imread(imagePath)
image = cv2.resize(image, (350, 150))
data.append(image)
label = imagePath.split(os.path.sep)[-2].split('/')[-1]
if label == 'pos':
label = 1
else:
label = 0
labels.append(label)
# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)
unique, counts = np.unique(trainY, return_counts=True)
print (dict(zip(unique, counts)))
trainY = to_categorical(trainY)
testY = to_categorical(testY)
# construct the image generator for data augmentation
#aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2, horizontal_flip=True, fill_mode="nearest")
aug = ImageDataGenerator()
# initialize our VGG-like Convolutional Neural Network
model = SmallVGGNet.build(width=350, height=150, depth=3,
classes=2)
# initialize our initial learning rate, # of epochs to train for,
# and batch size
INIT_LR = 0.01
EPOCHS = 20
BS = 32
# initialize the model and optimizer (you'll want to use
# binary_crossentropy for 2-class classification)
print("[INFO] training network...")
opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,
metrics=["accuracy"])
# train the network
H = model.fit(x=aug.flow(trainX, trainY, batch_size=BS),
validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
epochs=EPOCHS)
I've watched a tutorial about image recognition in Python, and used written code for training a network. It compiles and learning fine, but how to use it for prediction on new images? Maybe something like: model.predict(y)?
Here is the code:
import numpy
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Flatten, Activation
from keras.layers import Dropout
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.optimizers import SGD
numpy.random.seed(42)
#Loading data
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
batch_size = 32
nb_classes = 10
#Number of epochs
epochNumber = 25
#Image size
img_rows, img_cols = 32, 32
#RGB
img_channels = 3
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
#To catogories
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)
#Creating a model
model = Sequential()
#Adding layers
model.add(Conv2D(32, (3, 3), padding='same',
input_shape=(32, 32, 3), activation='relu'))
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes, activation='softmax'))
#Optimization
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
optimizer=sgd,
metrics=['accuracy'])
#Training model
model.fit(X_train, Y_train,
batch_size=batch_size,
epochs=epochNumber,
validation_split=0.1,
shuffle=True,
verbose=2)
scores = model.evaluate(X_test, Y_test, verbose=0)
print("Accuracy on test data: %.2f%%" % (scores[1]*100))
Then, what to do to predict?
target = "C://Users//Target.png"
print(model.predict(target))
How to correctly use model.predict and how to convert result to user-friendly output?
Note: if you are using keras package instead of tf.keras, replace tf.keras with keras in all the following code snippets.
To load a single image, you can use tf.keras.preprocessing.image.load_img:
image = tf.keras.preprocessing.image.load_img(image_path, target_size=(img_rows, img_cols))
This would load the image into PIL format; therefore, we need to convert it to numpy array before feeding it to our model:
import numpy as np
input_arr = tf.keras.preprocessing.image.img_to_array(image)
input_arr = np.array([input_arr]) # Convert single image to a batch.
Now, you might make a mistake by rushing into using predict method on input_arr. However, you should first perform the same preprocessing steps of training phase in prediction phase as well:
input_arr = input_arr.astype('float32') / 255. # This is VERY important
Now, it's ready to be given to the model for prediction:
predictions = model.predict(input_arr)
Bonus: Since your model is a classifier and it's using Softmax activation at the top, the predictions variable would contain the probabilities for each class. To find out the predicted class, we use argmax from Numpy to find the index of the class with the highest probability:
predicted_class = np.argmax(predictions, axis=-1)
you can use cv2 to read in the image. You want to make sure that what ever processing you did on the input image in training you also do on the image you read in with CV2. Be careful CV2 reads images in BGR format. If you trained your model on rgb images you need to convert the cv2 image to rgb as shown in the code below.Then you want to make the image 32 X 32 X3 so if it is not that size use cv2 to resize the image. I assume you rescaled your training images so you need to rescale the cv2 image as well. Code is below
import cv2
img=cv2.imread(f_path) # where f_path is the path to the image file
img=cv2.resize(img, (32,32), interpolation = cv2.INTER_AREA)
img=img/255
# CV2 inputs images in BGR format in general when you train a model you may have
#trained it with images in rgb format. If so you need to convert the cv2 image.
#uncomment the line below if that is the case.
#img=img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
predictions=model.predict(img)
pre_class=predictions.argmax()
# this will give you an integer value
I have a simple Python code (a Keras tutorial for training). I tried to remove img = img.convert('L') to keep colors when loading images (all my images are RGB colored so data is not the issue), but I encountered this error:
training_images = np.array([i[0] for i in training_data]).reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 3)
ValueError: could not broadcast input array from shape (300,300,3) into shape (300,300)
What's going wrong? How to fix it?
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from PIL import Image
from random import shuffle, choice
import numpy as np
import os
IMAGE_SIZE = 300
IMAGE_DIRECTORY = './data/test_set'
def label_img(name):
if name == 'cats': return np.array([1, 0])
elif name == 'notcats' : return np.array([0, 1])
def load_data():
train_data = []
directories = next(os.walk(IMAGE_DIRECTORY))[1]
for dirname in directories:
file_names = next(os.walk(os.path.join(IMAGE_DIRECTORY, dirname)))[2]
for i in range(200):
image_name = choice(file_names)
image_path = os.path.join(IMAGE_DIRECTORY, dirname, image_name)
label = label_img(dirname)
img = Image.open(image_path)
#img = img.convert('L')
img = img.resize((IMAGE_SIZE, IMAGE_SIZE), Image.ANTIALIAS)
train_data.append([np.array(img), label])
return train_data
def create_model():
model = Sequential()
model.add(Conv2D(32, kernel_size = (3, 3), activation='relu',
input_shape=(IMAGE_SIZE, IMAGE_SIZE, 1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dense(2, activation = 'softmax'))
return model
training_data = load_data()
training_images = np.array([i[0] for i in training_data]).reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1)
training_labels = np.array([i[1] for i in training_data])
model = create_model()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(training_images, training_labels, batch_size=50, epochs=10, verbose=1)
Since I was able to identify the problem after some discussion in the comments, I will post it as an answer.
At the line
training_images = np.array([i[0] for i in training_data]).reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1)
you are attempting to reshape 3-channel RGB images into single channel (greyscale) images, which is not possible (and also not something you want to do, since you want to keep the colours), hence the ValueError. This part was only necessary before you removed img = img.convert('L'), in order to give the training data the proper shape for the model, which had an input shape of (IMAGE_SIZE, IMAGE_SIZE, 1).
Now that you are working with RGB images, the reshape can be removed, since the images will already have the correct shape (IMAGE_SIZE, IMAGE_SIZE, 3) as returned by load_data(). However, as explained in nneonneo's answer, your model will need to be modified to be able to handle the new input shape.
model.add(Conv2D(32, kernel_size = (3, 3), activation='relu',
input_shape=(IMAGE_SIZE, IMAGE_SIZE, 1)))
Your model wants a grayscale image (1 channel), but you're trying to train on colour images (3 channels). This won't work. You will have to modify your model to take colour images, or pass in grayscale images. The sample code you started with uses .reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1) in order to convert a grayscale image into the shape required for the first layer of this neural net.
If the model is designed for grayscale, you should simply leave the .convert('L') in, which converts colour images to grayscale. Many image classification tasks work just fine in grayscale.
(Attempt #2, see below for history)
I'm a total newbie with Deep Learning and Keras, and trying to play with the MNIST sample, following the tutorial at https://elitedatascience.com/keras-tutorial-deep-learning-in-python.
With regard to that sample, I just made some minimal adjustments to the code, to make it compatible with the latest API. Then, I added a sample Python script which loads the saved model and a BMP image file (specified as the script first argument), and outputs the class prediction in the form of an exit code.
I trained the model getting accuracy 0.9911, and saved it.
Yet, when testing with some BMP samples I get inconsistent results: only some of the digits are recognized correctly. Probably I'm missing something obvious in preprocessing my data. After Toyo's reply, I changed both the scripts, following https://nextjournal.com/schmudde/ml4a-mnist, so here I quote the newer versions. The older ones are at the bottom.
I also added a sample visualization of the preprocessed array representing an image, and looking at it I can see the digit, 'white on black', as expected (in the predict preprocessing I added color inversion by setting each value equal to 255-value, as my input BMP are 'black on white').
Yet, the issue is still there.
Thanks!
New Scripts
Training:
# numpy with a specific seed for reproducibility
import numpy as np
np.random.seed(123)
# keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
# load pre-shuffled MNIST data into train and test sets
# MNIST: 60k samples of 28x28 grayscale (0-255) images,
# and additional 10k samples for test.
((x_train, y_train), (x_test, y_test)) = mnist.load_data()
n_train, height, width = x_train.shape
n_test, _, _ = x_test.shape
print('x_train shape: {}'.format(x_train.shape))
print('n_train: {}'.format(n_train))
print('height: {}'.format(height))
print('width: {}'.format(width))
print('n_test: {}'.format(n_test))
# we require shape (count, height, width, channel), where count
# is the count of our samples, and channel is the number of channels,
# here equal to 1 as these are grayscale images (for RGB it would be 3).
x_train = x_train.reshape(n_train, height, width, 1).astype('float32')
x_test = x_test.reshape(n_test, height, width, 1).astype('float32')
# normalize from [0, 255] to [0, 1]
x_train /= 255
x_test /= 255
# show image array
print('Sample array: {}\n'.format(x_train[0].shape))
print(x_train[0])
from matplotlib import pyplot as plt
plt.imshow(x_train[0].reshape(28, 28))
plt.show()
# the labels need to be converted into one-hot vectors,
# which are nc-element arrays (nc is the number of classes),
# which are 0 for all classes except 1 for the class the label
# is assigned to.
# convert integer labels into one-hot vectors
n_classes = 10
y_train = np_utils.to_categorical(y_train, n_classes)
y_test = np_utils.to_categorical(y_test, n_classes)
# define model architecture.
model = Sequential()
# number of convolutional filters
n_filters = 32
# convolution filter size
n_conv = 3
# pooling window size
n_pool = 2
# (1) convolution
model.add(Convolution2D(
n_filters,
kernel_size=(n_conv, n_conv),
# we have a 28x28 single channel (grayscale) image
# so the input shape should be (28, 28, 1)
input_shape=(height, width, 1)
))
model.add(Activation('relu'))
model.add(Convolution2D(n_filters, kernel_size=(n_conv, n_conv)))
model.add(Activation('relu'))
# apply pooling to summarize the features extracted thus far
model.add(MaxPooling2D(pool_size=(n_pool, n_pool)))
# (2) connected layers
model.add(Dropout(0.25))
# flatten the data for the 1D layers
model.add(Flatten())
# Dense(n_outputs)
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
# (3) the softmax output layer gives us a probablity for each class
model.add(Dense(n_classes))
model.add(Activation('softmax'))
model.compile(
loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy']
)
# how many examples to look at during each update step
batch_size = 128
# how many times to run through the full set of examples
n_epochs = 10
model.fit(x_train,
y_train,
batch_size=batch_size,
epochs=n_epochs,
validation_data=(x_test, y_test))
# how'd we do?
loss, accuracy = model.evaluate(x_test, y_test, verbose=0)
print('loss:', loss)
print('accuracy:', accuracy)
# save the model. Use load_model(path) to load it later
model.save('/projects/python/mnist/mnist.h5')
Prediction:
# get the path to the image to be recognized from 1st arg
import sys
if len(sys.argv) < 2:
print('Missing image file path')
exit(-1)
# load the pretrained MNIST model
print('Loading model...')
import numpy as np
import keras
model = keras.models.load_model('/projects/python/mnist/mnist.h5')
# load the image ensuring that its size is 28x28
print('Loading image ' + sys.argv[1])
from keras.preprocessing import image
x = image.load_img(sys.argv[1], color_mode='grayscale', target_size=(28, 28))
# convert image to NumPy array
x = image.img_to_array(x, data_format='channels_last')
print('np from image:\n')
print(x.shape)
print(x)
# add a major dimension to represent the number of samples, i.e. 1
x = x.reshape(1, 28, 28, 1)
# convert data type to float32 and normalize 0-255 values to range [0, 1]
x = x.astype('float32')
# invert colors, we need white on black
x = 255 - x
x /= 255
print('np before predicting:\n')
print(x.shape)
print(x)
from matplotlib import pyplot as plt
plt.imshow(x.reshape(28, 28))
plt.show()
# predict
classes = model.predict_classes(x)
print(classes[0])
exit(classes[0])
Old scripts
# numpy with a specific seed for reproducibility
import numpy as np
np.random.seed(123)
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist
from matplotlib import pyplot as plt
# load pre-shuffled MNIST data into train and test sets
# 60k samples of 28x28 images
((x_train, y_train), (x_test, y_test)) = mnist.load_data()
print(x_train.shape)
plt.imshow(x_train[0])
input("Press enter to continue...")
# preprocess input data, as Theano requires explicit depth:
# from shape (n, width, height) to (n, depth=1, width, height)
x_train = x_train.reshape(x_train.shape[0], 1, 28, 28)
x_test = x_test.reshape(x_test.shape[0], 1, 28, 28)
# convert our data type to float32 and normalize our data values to the range [0, 1]
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
# convert 1-dimensional class arrays (with values 0-9)
# to 10-dimensional class matrices (with values 0-1)
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)
# define model architecture
model = Sequential()
# input_shape is the shape of each single parameter:
# here it is depth=1, width=28, height=28
# 32 convolution filters
# 3 rows in each convolution kernel
# 3 cols in each convolution kernel
model.add(Convolution2D(32, (3, 3), activation='relu', input_shape=(1, 28, 28), data_format="channels_first"))
model.add(Convolution2D(32, (3, 3), activation='relu', data_format="channels_first"))
# reduce the model's parameters by sliding a 2x2 pooling filter
# across the previous layer and taking the max of the 4 values in the 2x2 filter
model.add(MaxPooling2D(pool_size=(2,2)))
# regularize the model to prevent overfitting
model.add(Dropout(0.25))
# flatten (make 1-dimensional)
model.add(Flatten())
# dense: 128=output size of the layer
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
# final layer has size 10 for the 10 digits
model.add(Dense(10, activation='softmax'))
# compile model (loss function and optimizer)
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
# fit model on training data
model.fit(x_train, y_train,
batch_size=32, nb_epochs=10, verbose=1)
# evaluate model on test data
score = model.evaluate(x_test, y_test, verbose=0)
# save the model. Use load_model(path) to load it later
model.save('/projects/python/mnist/mnist.h5')
The consuming script:
# get the path to the image to be recognized from 1st arg
import sys
if len(sys.argv) < 2:
print('Missing image file path')
exit(-1)
# load the pretrained MNIST model
print('Loading model...')
import numpy as np
import keras
model = keras.models.load_model('/projects/python/mnist/mnist.h5')
# load the image ensuring that its size is 28x28
print('Loading image ' + sys.argv[1])
from keras.preprocessing import image
x = image.load_img(sys.argv[1], color_mode='grayscale', target_size=(28, 28))
# convert image to NumPy array
x = image.img_to_array(x, data_format='channels_first')
# add a major dimension to represent the number of samples, i.e. 1
x = x.reshape(1, 1, 28, 28)
# convert data type to float32 and normalize 0-255 values to range [0, 1]
x = x.astype('float32')
x /= 255
print(x.shape)
print(x)
# evaluate
classes = model.predict_classes(x)
print(classes[0])
exit(classes[0])