I have an error when I launch the training of the model indicating the existence of more than one element is ambiguous
I try to detect the smile of faces from RGB images by CNN using python and KERAS
from google.colab import drive
drive.mount('/content/drive')
rom imutils import paths
import cv2
from tensorflow.keras.utils import img_to_array
data = []
labels = []
imagePaths = list(paths.list_images("/content/drive/MyDrive/dataset/SMILEs"))
for imagePath in sorted(list(paths.list_images("/content/drive/MyDrive/dataset/SMILEs"))):
# load the image, pre-process it, and store it in the data list
image = cv2.imread(imagePath)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = imutils.resize(image, width=28)
image = img_to_array(image)
data.append(image)
# extract the class label from the image path and update the labels list
label = imagePath.split(os.path.sep)[-3]
label = "smiling" if label == "positives" else "not_smiling"
labels.append(label)
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
le = LabelEncoder().fit(labels)
labels = np_utils.to_categorical(le.transform(labels), 2)
classTotals = labels.sum(axis=0)
classWeight = classTotals.max() / classTotals
(trainX, testX, trainY, testY) = train_test_split(data,labels, test_size=0.20, stratify=labels, random_state=42)
class LeNet:
def build(width, height, depth, classes):
model = Sequential()
inputShape = (height, width, depth)
#if K.image_data_format() == "channels_first":
#inputShape = (depth, height, width)
# premiere couche de CONV==>RELU==>POOL:
model.add(Conv2D(20, (5, 5), padding="same",input_shape=inputShape))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
# Deuxieme couche CONV==>RELU==>POOL:
model.add(Conv2D(50, (5, 5), padding="same"))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
# La couche fully connected et d'applatissement:
model.add(Flatten())
model.add(Dense(500))
model.add(Activation("relu"))
model.add(Dense(classes))
model.add(Activation("softmax"))
return model
model = LeNet.build(28, 28, 1,2)
from tensorflow import keras
print("[INFO] compiling model...")
model.compile(loss="categorical_crossentropy",optimizer="adam",metrics=['accuracy'])
print("[INFO] training network...")
H = model.fit(trainX, trainY, validation_data=(testX, testY),class_weight=classWeight, batch_size=64, epochs=15, verbose=1)
and here is the error displayed
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
Related
This is for self driving car in Carla.
The goal is to calculate steering value from the image captured by camera in front of the vehicle.
Using Carla I have collected photos and sensor data.
All my data and code is in here:
(If you want to change code please copy the modeltrainig file before)
https://drive.google.com/drive/folders/1lkdkM4Nxx8xLAsx42sEDSIhb2r7gW0IN
https://colab.research.google.com/drive/15EfxGKZSAk1mfZuMBwL9H6YQAlewgE7o#scrollTo=6uFx2miYap4A
I have loaded data from CSV file:
columns = ['center', 'throttle', 'steering', 'break', 'speed']
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/example.csv', names = columns)
def load_img_steering(datadir, df):
image_path = []
steering = []
for i in range(len(data)):
indexed_data = data.iloc[i]
center = indexed_data[0]
image_path.append(os.path.join(datadir, center.strip()))
steering.append(float(indexed_data[2]))
image_paths = np.asarray(image_path)
steerings = np.asarray(steering)
return image_paths, steerings
image_paths, steerings = load_img_steering('/content/drive/MyDrive/images/_out', data)
X_train, X_valid, Y_train, Y_valid = train_test_split(image_paths, steerings, test_size=0.2, random_state=0)
For training modify image into 100x100x3 shape.
def img_preprocess(img):
img = npimg.imread(img)
# Crop image to remove unnecessary features
img = img[400:700, :, :]
## Change to YUV image
img = cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
# Gaussian blur
img = cv2.GaussianBlur(img, (3, 3), 0)
# Decrease size for easier processing
img = cv2.resize(img, (100, 100))
# Normalize values
img = img / 255
return img
X_train = np.array(list(map(img_preprocess, X_train)))
X_valid = np.array(list(map(img_preprocess, X_valid)))
the modified image is like this:
original Image and simplified image
I create a model using Resnet50 like this
resnet = ResNet50(weights='imagenet', include_top=False, input_shape=(100, 100, 3))
def nvidia_model():
model = Sequential()
model.add(resnet)
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(100, activation='elu'))
model.add(Dropout(0.5))
model.add(Dense(50, activation='elu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='elu'))
model.add(Dropout(0.5))
model.add(Dense(1))
optimizer = Adam(lr=1e-3)
model.compile(loss='mse', optimizer=optimizer, metrics=['accuracy'])
return model
model = nvidia_model()
print(model.summary())
And I have trained model
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['training', 'validation'])
plt.title('Loss')
plt.xlabel('Epoch')
plt.show()
model.save("my_model")
model.predict(Y_train)
The prediction of model is very bad like this:
array([[-2.0958593],
[-2.0967839],
[-2.0957477],
[-2.0983617],
[-2.0957 ],
[-2.0964625],
[-2.0983138],
....
....
[-2.0968916],
[-2.0961847],
[-2.0955198],
[-2.0963144],
[-2.0974863],
[-2.0947154],
[-2.0971653],
[-2.0964508],
[-2.0966938],
[-2.0947733],
[-2.0951297],
[-2.0956593],
[-2.0963016],
[-2.0963776],])
If I increase epoch they become more constantly.
Why is this like that?
I am trying to train a convolutional neural network but I get a quite high number of false positive classified objects. I am using two classes, each 10.000 images with quite obvious differences. I would expect a rather easy task for a CNN, also I used some hand crafted features with a random forest classifier before which worked quite well.
This is the model I am using:
def build(width, height, depth, classes):
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
# if we are using "channels first", update the input shape
# and channels dimension
if K.image_data_format() == "channels_first":
inputShape = (depth, height, width)
chanDim = 1
# CONV => RELU => POOL layer set
model.add(Conv2D(32, (3, 3), padding="same",
input_shape=inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# (CONV => RELU) * 2 => POOL layer set
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# (CONV => RELU) * 3 => POOL layer set
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# first (and only) set of FC => RELU layers
model.add(Flatten())
model.add(Dense(512))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.25))
# softmax classifier
model.add(Dense(classes))
model.add(Activation("softmax"))
# return the constructed network architecture
return model
After applying data augmentation, training and validation loss look better but still I get to many false positives.
Here is a screenshot of some example images from a validation set (screenshot), green marked are correct classified, the rest are false positives. Any suggestion, how to improve my model?
Edit:
I add also the code for pre-processing the images:
import matplotlib
matplotlib.use("Agg")
from smallvggnet import SmallVGGNet
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random
import pickle
import cv2
import os
from keras.utils.np_utils import to_categorical
# initialize the data and labels
print("[INFO] loading images...")
data = []
labels = []
# grab the image paths and randomly shuffle them
imagePaths = sorted(list(paths.list_images("C:/06112020_hyphae/all/")))
random.seed(42)
random.shuffle(imagePaths)
# loop over the input images
for imagePath in imagePaths:
image = cv2.imread(imagePath)
image = cv2.resize(image, (350, 150))
data.append(image)
label = imagePath.split(os.path.sep)[-2].split('/')[-1]
if label == 'pos':
label = 1
else:
label = 0
labels.append(label)
# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)
unique, counts = np.unique(trainY, return_counts=True)
print (dict(zip(unique, counts)))
trainY = to_categorical(trainY)
testY = to_categorical(testY)
# construct the image generator for data augmentation
#aug = ImageDataGenerator(rotation_range=30, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2, horizontal_flip=True, fill_mode="nearest")
aug = ImageDataGenerator()
# initialize our VGG-like Convolutional Neural Network
model = SmallVGGNet.build(width=350, height=150, depth=3,
classes=2)
# initialize our initial learning rate, # of epochs to train for,
# and batch size
INIT_LR = 0.01
EPOCHS = 20
BS = 32
# initialize the model and optimizer (you'll want to use
# binary_crossentropy for 2-class classification)
print("[INFO] training network...")
opt = SGD(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,
metrics=["accuracy"])
# train the network
H = model.fit(x=aug.flow(trainX, trainY, batch_size=BS),
validation_data=(testX, testY), steps_per_epoch=len(trainX) // BS,
epochs=EPOCHS)
I've been working off of a guide I found for making an object classifier. My classifier's job is to determine if the image it's looking at is anime, as I want to integrate it into a bot that will flag anime down in a group chat. The chart the network outputs after training shows decent but improvable results, but the classifying script does not seem to be decently accurate at all. My dataset is 1000 images, here is the chart of my last training attempt.
As you can see, the val_loss value is fairly workable but turbulent.
I am feeding the trained model 2 images to test it after training and saving weights, an image of a normal houseplant, and a generic anime girl. The model predicts 0.37% Anime for the houseplant, and 0.00% Anime for the anime photo. These photos are visually similar to their respective datasets, those being "Anime" and "Other" (which includes images of cars, plants, houses, and other "random" objects). These images are class labeled by their subfolder.
Here is the code for my model:
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras import backend as K
class SmallerVGGNet:
#staticmethod
def build(width, height, depth, classes):
# initialize the model along with the input shape to be
# "channels last" and the channels dimension itself
model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
# if we are using "channels first", update the input shape
# and channels dimension
if K.image_data_format() == "channels_first":
inputShape = (depth, height, width)
chanDim = 1
# CONV => RELU => POOL
model.add(Conv2D(32, (3, 3), padding="same",
input_shape=inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
# (CONV => RELU) * 2 => POOL
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# (CONV => RELU) * 2 => POOL
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
# first (and only) set of FC => RELU layers
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
# 1 node
model.add(Dense(1))
model.add(Activation("sigmoid"))
return model
My trainer:
import matplotlib
matplotlib.use("Agg")
# import the necessary packages
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from pyimagesearch.smallervggnet import SmallerVGGNet
import matplotlib.pyplot as plt
from imutils import paths
import numpy as np
import argparse
import random
import pickle
import cv2
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True,
help="path to input dataset (i.e., directory of images)")
ap.add_argument("-m", "--model", required=True,
help="path to output model")
ap.add_argument("-l", "--labelbin", required=True,
help="path to output label binarizer")
ap.add_argument("-p", "--plot", type=str, default="plot.png",
help="path to output accuracy/loss plot")
args = vars(ap.parse_args())
# initialize the number of epochs to train for, initial learning rate,
# batch size, and image dimensions
EPOCHS = 100
INIT_LR = 1e-3
BS = 32
IMAGE_DIMS = (96, 96, 3)
# initialize the data and labels
data = []
labels = []
# grab the image paths and randomly shuffle them
print("Output: loading images...")
imagePaths = sorted(list(paths.list_images(args["dataset"])))
random.seed(42)
random.shuffle(imagePaths)
# loop over the input images
for imagePath in imagePaths:
# load the image, pre-process it, and store it in the data list
image = cv2.imread(imagePath)
image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
image = img_to_array(image)
data.append(image)
# extract the class label from the image path and update the
# labels list
label = imagePath.split(os.path.sep)[-2]
labels.append(label)
# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
print("Output: data matrix: {:.2f}MB".format(
data.nbytes / (1024 * 1000.0)))
# binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
labels, test_size=0.2, random_state=42)
# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=45, width_shift_range=0.1,
height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
horizontal_flip=True, fill_mode="nearest")
# initialize the model
print("Output: compiling model...")
model = SmallerVGGNet.build(width=IMAGE_DIMS[1], height=IMAGE_DIMS[0],
depth=IMAGE_DIMS[2], classes=len(lb.classes_))
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,
metrics=["accuracy"])
# train the network
print("Output: training network...")
H = model.fit(
x=aug.flow(trainX, trainY, batch_size=BS),
validation_data=(testX, testY),
steps_per_epoch=len(trainX) // BS,
epochs=EPOCHS, verbose=1)
# save the model to disk
print("Output: serializing network...")
model.save(args["model"], save_format="h5")
# save the label binarizer to disk
print("Output: serializing label binarizer...")
f = open(args["labelbin"], "wb")
f.write(pickle.dumps(lb))
f.close()
And lastly, my classifier:
import tensorflow
from keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
import numpy as np
import argparse
import imutils
import pickle
import cv2
import os
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-m", "--model", required=True,
help="path to trained model model")
ap.add_argument("-l", "--labelbin", required=True,
help="path to label binarizer")
ap.add_argument("-i", "--image", required=True,
help="path to input image")
args = vars(ap.parse_args())
# load the image
image = cv2.imread(args["image"])
output = image.copy()
# pre-process the image for classification
image = cv2.resize(image, (96, 96))
image = image.astype("float") / 255.0
image = img_to_array(image)
image = np.expand_dims(image, axis=0)
# load the trained convolutional neural network and the label
# binarizer
print("[INFO] loading network...")
model = load_model(args["model"])
lb = pickle.loads(open(args["labelbin"], "rb").read())
# classify the input image
print("[INFO] classifying image...")
proba = model.predict(image)[0]
idx = np.argmax(proba)
label = lb.classes_[idx]
# correct answer
filename = args["image"][args["image"].rfind(os.path.sep) + 1:]
correct = "correct" if filename.rfind(label) != -1 else "incorrect"
# build the label and draw the label on the image
label = "{}: {:.2f}% ({})".format(label, proba[idx] * 100, correct)
output = imutils.resize(output, width=400)
cv2.putText(output, label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX,
0.7, (0, 255, 0), 2)
# show the output image
print("[INFO] {}".format(label))
cv2.imshow("Output", output)
cv2.waitKey(0)
Some things I've tried include smaller epoch counts, alternatives to sigmoid, different loss functions, and more concrete categories.
Your model ends with a single Dense unit with sigmoid activation, however, you later use np.argmax() alongside a list of labels as if you were using softmax activation. You are accidentally predicting whichever class is labeled ”0” for every sample. Your model trained fine, it’s just how you made the predictions afterwards.
I have a simple Python code (a Keras tutorial for training). I tried to remove img = img.convert('L') to keep colors when loading images (all my images are RGB colored so data is not the issue), but I encountered this error:
training_images = np.array([i[0] for i in training_data]).reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 3)
ValueError: could not broadcast input array from shape (300,300,3) into shape (300,300)
What's going wrong? How to fix it?
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from PIL import Image
from random import shuffle, choice
import numpy as np
import os
IMAGE_SIZE = 300
IMAGE_DIRECTORY = './data/test_set'
def label_img(name):
if name == 'cats': return np.array([1, 0])
elif name == 'notcats' : return np.array([0, 1])
def load_data():
train_data = []
directories = next(os.walk(IMAGE_DIRECTORY))[1]
for dirname in directories:
file_names = next(os.walk(os.path.join(IMAGE_DIRECTORY, dirname)))[2]
for i in range(200):
image_name = choice(file_names)
image_path = os.path.join(IMAGE_DIRECTORY, dirname, image_name)
label = label_img(dirname)
img = Image.open(image_path)
#img = img.convert('L')
img = img.resize((IMAGE_SIZE, IMAGE_SIZE), Image.ANTIALIAS)
train_data.append([np.array(img), label])
return train_data
def create_model():
model = Sequential()
model.add(Conv2D(32, kernel_size = (3, 3), activation='relu',
input_shape=(IMAGE_SIZE, IMAGE_SIZE, 1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dense(2, activation = 'softmax'))
return model
training_data = load_data()
training_images = np.array([i[0] for i in training_data]).reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1)
training_labels = np.array([i[1] for i in training_data])
model = create_model()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(training_images, training_labels, batch_size=50, epochs=10, verbose=1)
Since I was able to identify the problem after some discussion in the comments, I will post it as an answer.
At the line
training_images = np.array([i[0] for i in training_data]).reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1)
you are attempting to reshape 3-channel RGB images into single channel (greyscale) images, which is not possible (and also not something you want to do, since you want to keep the colours), hence the ValueError. This part was only necessary before you removed img = img.convert('L'), in order to give the training data the proper shape for the model, which had an input shape of (IMAGE_SIZE, IMAGE_SIZE, 1).
Now that you are working with RGB images, the reshape can be removed, since the images will already have the correct shape (IMAGE_SIZE, IMAGE_SIZE, 3) as returned by load_data(). However, as explained in nneonneo's answer, your model will need to be modified to be able to handle the new input shape.
model.add(Conv2D(32, kernel_size = (3, 3), activation='relu',
input_shape=(IMAGE_SIZE, IMAGE_SIZE, 1)))
Your model wants a grayscale image (1 channel), but you're trying to train on colour images (3 channels). This won't work. You will have to modify your model to take colour images, or pass in grayscale images. The sample code you started with uses .reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1) in order to convert a grayscale image into the shape required for the first layer of this neural net.
If the model is designed for grayscale, you should simply leave the .convert('L') in, which converts colour images to grayscale. Many image classification tasks work just fine in grayscale.
I want to enter 8 images at the same time to the same CNN structure using conv3d. my CNN model is as following:
def build(sample, frame, height, width, channels, classes):
model = Sequential()
inputShape = (sample, frame, height, width, channels)
chanDim = -1
if K.image_data_format() == "channels_first":
inputShape = (sample, frame, channels, height, width)
chanDim = 1
model.add(Conv3D(32, (3, 3, 3), padding="same", input_shape=inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling3D(pool_size=(2, 2, 2), padding="same", data_format="channels_last"))
model.add(Dropout(0.25))
model.add(Conv3D(64, (3, 3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling3D(pool_size=(2, 2, 2), padding="same", data_format="channels_last"))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128)) #(Dense(1024))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
# softmax classifier
model.add(Dense(classes))
model.add(Activation("softmax")
The training of model is as follow:
IMAGE_DIMS = (57, 8, 60, 60, 3) # since I have 460 images so 57 sample with 8 image each
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
# binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
# note: data is a list of all dataset images
(trainX, testX, trainY, testY) train_test_split(data, labels, test_size=0.2, random_state=42)
aug = ImageDataGenerator(rotation_range=25, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, fill_mode="nearest")
# initialize the model
model = CNN_Network.build(sample= IMAGE_DIMS[0], frame=IMAGE_DIMS[1],
height = IMAGE_DIMS[2], width=IMAGE_DIMS[3],
channels=IMAGE_DIMS[4], classes=len(lb.classes_))
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer= opt, metrics=["accuracy"])
# train the network
model.fit_generator(
aug.flow(trainX, trainY, batch_size=BS),
validation_data=(testX, testY),
steps_per_epoch=len(trainX) // BS,
epochs=EPOCHS, verbose=1)
I have confused with the input_shape, I know Conv3D require 5D input, the input is 4D with batch added from keras, but I have the following error:
ValueError: Error when checking input: expected conv3d_1_input to have 5 dimensions, but got array with shape (92, 60, 60, 3)
Can anyone please help me what can I do? what is the 92 resulted, I determine input_shape with (57, 8, 60, 60, 3). And what is my input_shape should become to get 8 colored images input to the same model at the same time.
** Edit: updated the link
Here is a custom imagedatagenerator for 5D input to Conv3D nets. Hope it helps. Here is an example on how to use it:
from tweaked_ImageGenerator_v2 import ImageDataGenerator
datagen = ImageDataGenerator()
train_data=datagen.flow_from_directory('path/to/data', target_size=(x, y), batch_size=32, frames_per_step=4)
OR
You can build your own 5D tensor:
frames_folder = 'path/to/folder'
X_data = []
y_data = []
list_of_sent = os.listdir(frames_folder)
print (list_of_sent)
class_num = 0
time_steps = 0
frames = []
for i in list_of_sent:
classes_folder = str(frames_folder + '/' + i) #path to each class
print (classes_folder)
list_of_frames = os.listdir(classes_folder)
time_steps= 0
frames = []
for filename in sorted(list_of_frames):
if ( time_steps == 8 ):
X_data.append(frames) #appending each tensor of 8 frames resized to 110,110
y_data.append(class_num) #appending a class label to the set of 8 frames
j = 0
frames = []
else:
time_steps+=1
filename = cv2.imread(vid + '/' + filename)
filename = cv2.resize(filename,(110, 110),interpolation=cv2.INTER_AREA)
frames.append(filename)
class_num+=1
X_data = np.array(X_data)
y_data = np.array(y_data)
For the snippet above, the folder structure must be like that:
data/
class0/
img001.jpg
img002.jpg
...
class1/
img001.jpg
img002.jpg
...
input shape must be without sample, so instead of
inputShape = (sample, frame, height, width, channels)
try:
inputShape = (frame, height, width, channels)