I am struggling for some days to create my own model for face recognition with Keras in python. I will walk you through my setup and give you my code, I'm sure there is something I'm not doing right but I can't tell what. So, my dataset is composed of 97 people with an average of 10 photos per person. The total number of photos is 1106. The dataset is in a folder "faces" and the pictures for each person are in folders named from 0 to 96. Here is the code I am using to process and split my data into train, validation and test and also to create my CNN using Keras. Any help or suggestions are much appreciated, thank you!
channels = 3
rows = 50
cols = 50
classes = 97
data = []
labels = []
images = sorted(list(paths.list_images("faces")))
for image in images:
img = cv2.imread(image)
img = cv2.resize(img, (rows, cols)).flatten()
data.append(img)
label = image.split(os.path.sep)[-2]
labels.append(label)
data = np.array(data, dtype="float32") / 255.0
labels = [int(i) for i in labels]
labels = np.array(labels)
xtrain, testX, ytrain, testY = train_test_split(data, labels, test_size = 0.3, random_state = 13)
trainX, validX, trainY, validY = train_test_split(xtrain, ytrain, test_size = 0.2, random_state = 14)
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
validY = lb.transform(validY)
testY = lb.transform(testY)
trainX = trainX.astype("float32")
validX = validX.astype("float32")
testX = testX.astype("float32")
trainY = trainY.astype("float32")
validY = validY.astype("float32")
testY = testY.astype("float32")
trainX = trainX.reshape([trainX.shape[0], cols, rows, channels])
validX = validX.reshape([validX.shape[0], cols, rows, channels])
testX = testX.reshape([testX.shape[0], cols, rows, channels])
weight_decay = 1e-4
model = Sequential()
model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=trainX.shape[1:]))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.3))
model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(classes))
model.add(Activation('softmax'))
datagen = ImageDataGenerator(horizontal_flip=True,
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
vertical_flip=False)
datagen.fit(trainX)
optim = RMSprop(lr=0.001, decay=1e-6)
#sgd = SGD(lr=0.01, momentum=0.9)
model.compile(loss='categorical_crossentropy', optimizer=optim, metrics=['accuracy'])
model.fit_generator(datagen.flow(trainX, trainY, batch_size=64), steps_per_epoch = trainX.shape[0], epochs = 50, validation_data=(validX, validY), verbose = 1)
score = model.evaluate(testX, testY, batch_size = 64, verbose = 1)
print("Test score: ", score[0])
print("Test accuracy: ", score[1])
model_json = model.to_json()
open('face_architecture.json', 'w').write(model_json)
model.save_weights('face_weights.h5', overwrite=True)
Related
I have created a model to predict emotion from a voice sample, the model is made from the code below:
there are total 8 emotions:
neutral, calm, happy, sad, angry, disgust, surprised
i first extracted the features of each and every voice sample and put them in a dataframe, then loaded
them one by one to both X and (labels to Y) then split the data as shown below:
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=0, shuffle=True)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_train = np.expand_dims(x_train, axis=2)
x_test = np.expand_dims(x_test, axis=2)
model=Sequential()
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu', input_shape=(x_train.shape[1], 1)))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(256, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Conv1D(128, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Dropout(0.2))
model.add(Conv1D(64, kernel_size=5, strides=1, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=5, strides = 2, padding = 'same'))
model.add(Flatten())
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=8, activation='softmax'))
model.compile(optimizer = 'adam' , loss = 'categorical_crossentropy' , metrics = ['accuracy'])
model.summary()
rlrp = ReduceLROnPlateau(monitor='loss', factor=0.4, verbose=0, patience=2, min_lr=0.0000001)
history=model.fit(x_train, y_train, batch_size=64, epochs=75, validation_data=(x_test, y_test), callbacks=[rlrp])
got total 89% accuracy
But when i try to predict any emotion, even with a voice sample from the training set, it always gives
"surprised" as the predicted emotion.
code below:
class livepredictions:
def __init__(self, path, file):
self.path = path
self.file = file
def loadmodel(self):
self.loaded_model = keras.models.load_model(self.path)
return self.loaded_model.summary()
def makepredictions(self):
x = []
scaler = StandardScaler()
x = extract_features(self.file)
Features = pd.DataFrame(x)
x = np.expand_dims(x, axis=0)
scaler.fit(x)
x = scaler.transform(x)
x = np.expand_dims(x, axis=2)
predictions = self.loaded_model.predict_classes(x)
print(predictions)
print("Prediction is", " ", self.convertclasstoemotion(predictions))
#staticmethod
def convertclasstoemotion(pred):
label_conversion = {'0': 'neutral',
'1': 'calm',
'2': 'happy',
'3': 'sad',
'4': 'angry',
'5': 'fear',
'6': 'disgust',
'7': 'surprised'}
for key, value in label_conversion.items():
if int(key) == pred:
label = value
return label
what am i doing wrong?
I'm new to deep learning. I'm trying to train a model that identifies plant diseases, and I'm using this dataset that has a bunch of images. I'm aware that this is a lot of data, and I'm only using the color subfolder. I want to use all of the data in that subfolder. The thing is, Kaggle only provides 13GB of memory as of now, and my session keeps restarting because my script is trying to use more memory than it has. Here is my code:
### There are some imports here that I removed because there is a lot of them
NUM_CLASSES = 38
IMG_SIZE = 150
x = []
y = []
def train_data_gen(DIR, ID):
for img in os.listdir(DIR):
try:
path = DIR + '/' + img
img = plt.imread(path)
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
if img.shape == (IMG_SIZE, IMG_SIZE, 3):
x.append(img)
y.append(ID)
except:
None
#--
for DIR in os.listdir('../input/plantvillage-dataset/color/'):
train_data_gen('../input/plantvillage-dataset/color/' + DIR, DIR)
print(DIR)
#
print('reached label encoder')
le = LabelEncoder()
y = le.fit_transform(y)
x = np.array(x)
y = to_categorical(y, NUM_CLASSES)
print('data split')
x_train,x_test,y_train,y_test = train_test_split(x, y, test_size = 0.15)
x_train,x_val,y_train,y_val = train_test_split(x_train, y_train, test_size = 0.15)
print('datagen')
datagen = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
samplewise_std_normalization=False,
rotation_range=60,
zoom_range = 0.1,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range=0.1,
fill_mode = "reflect"
)
print('datagen_fit')
datagen.fit(x_train)
print('model')
model = Sequential()
model.add(Conv2D(64, kernel_size=(3, 3), strides=2, padding='Same', activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(BatchNormalization())
#model.add(Dropout(0.2))
model.add(Conv2D(128, kernel_size=(3, 3), strides=2, padding='Same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(BatchNormalization())
#model.add(Dropout(0.3))
model.add(Conv2D(128, kernel_size=(3, 3), strides=2, padding='Same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(BatchNormalization())
#model.add(Dropout(0.3))
model.add(Conv2D(128, kernel_size=(3, 3), strides=2, padding='Same', activation='relu'))
#model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
#model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Dense(NUM_CLASSES, activation='softmax'))
print('Model compile')
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Model Fit')
model.fit_generator(datagen.flow(x_train,y_train,batch_size=32), epochs=75, steps_per_epoch=x_train.shape[0]//32, validation_data=(x_val, y_val), verbose=1)
model.save('plantus_model')
I've placed print functions through out my code to see where the actual problem is. The part where it stops is right when I fit datagen. I don't think it's that one thing that uses up so much ram, but all the stuff before it too. How can I reduce RAM usage so I can actually get to training my model?
Thank you in advance for providing answers and constructive feedback.
you are using datagen.fit. This is only necessary if you are setting any of the parameters featurewise_center, samplewise_center, featurewise_std_normalization,
samplewise_std_normalization or zca_whitening to True. Since you are not doing that you do not need to fit the data set. This should avoid you using to much memory.
I use a CNN model to train image classification , it got great accuracy at test and validation (98% and 97%), but when use my image to predict it alway go wrong, here is my code:
BATCH_SIZE = 30
IMG_HEIGHT = 256
IMG_WIDTH = 256
STEPS_PER_EPOCH = np.ceil(image_count/BATCH_SIZE)
train_data_gen = image_generator.flow_from_directory(directory=str(data_dir),
batch_size=BATCH_SIZE,
shuffle=True,
target_size=(IMG_HEIGHT, IMG_WIDTH),
classes = list(CLASS_NAMES))
here is prepare for dataset and data argumentation:
imgDataGen=ImageDataGenerator(
validation_split=0.2,
rescale=1/255,
horizontal_flip=True,
zoom_range=0.3,
rotation_range=15.,
width_shift_range=0.1,
height_shift_range=0.1,
)
prepare data:
train_dataset = imgDataGen.flow_from_directory(
directory=str(data_dir),
target_size = (IMG_HEIGHT, IMG_WIDTH),
classes = list(CLASS_NAMES),
batch_size = BATCH_SIZE,
subset = 'training'
)
val_dataset = imgDataGen.flow_from_directory(
directory=str(data_dir),
target_size = (IMG_HEIGHT, IMG_WIDTH),
classes = list(CLASS_NAMES),
batch_size =BATCH_SIZE,
subset = 'validation'
)
the model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(256, 256, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(6, activation='sigmoid'))
complie:
model.compile(loss='binary_crossentropy',
optimizer=keras.optimizers.SGD(learning_rate=0.001,momentum=0.9),
metrics=['acc'])
train
history = model.fit_generator(
train_dataset,
validation_data = val_dataset,
workers=10,
epochs=20,
)
It get pretty high accuracy 98% on test and 97% on validation
but when i try with my code to predict
def prepare(filepath):
IMG_SIZE=256
img_array=cv2.imread(filepath)
new_array= cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
return new_array.reshape(1,IMG_SIZE,IMG_SIZE,3)
model=tf.keras.models.load_model('trained-model.h5',compile=False)
#np.set_printoptions(formatter={'float_kind':'{:f}'.format})
predict=model.predict([prepare('cat.jpg')])
pred_name = CATEGORIES[np.argmax(predict)]
print(pred_name)
it got wrong, with cat image it go for dog and dog for cat, but sometime it go right, just i think 98% is more accurate than this, if i try 5 image of cats it fail 3 or 4 images
so it because dataset or because of code?
please help, thanks
So in your second code-block you have this:
rescale=1/255
This is for normalizing your image into the range [0;1]. So every image gets rescaled (/normalized) before going through the network. But in you las code-block where you test it on an image you didnt add normalization. Try adding that to your "prepare" function:
def prepare(filepath):
IMG_SIZE = 256
img_array = cv2.imread(filepath)
# add this:
img_array = image_array / 255
new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
return new_array.reshape(1,IMG_SIZE,IMG_SIZE,3)
In my root folder images I have three folders called 0, 1, 2. In folder 0 there are no smilies. In folder 1 there are happy handwritten smilies and in folder 2 there are sad handwritten smilies.
The images are jpg color images with the dimension 26x26.
This is my code
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
import numpy as np
import os
import cv2
from sklearn.model_selection import train_test_split
def getImages(path, classes):
folder = os.listdir(path)
classes_counter = 0
images = []
images_classes = []
for x in range (0,len(folder)):
myPicList = os.listdir(path+"/"+ str(classes[classes_counter]))
for pic in myPicList:
img_path = path+"/" + str(classes[classes_counter]) + "/" + pic
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
images.append(img)
images_classes.append(classes_counter)
classes_counter +=1
images = np.array(images, dtype="float") / 255
return images, images_classes
def createModel(classes, images_dimension):
classes_amount = len(np.unique(classes))
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu', input_shape=images_dimension))
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(classes_amount, activation='softmax'))
return model
labels = [0,1,2]
images, images_classes = getImages('training-images', labels)
images_dimension=(26,26,3)
X_train, X_test, Y_train, Y_test = train_test_split(images, images_classes, test_size=0.2) # if 1000 images split will 200 for testing
X_train, X_validation, Y_train, Y_validation = train_test_split(X_train, Y_train, test_size=0.2) # if 1000 images 20% of remaining 800 will be 160 for validation
model = createModel(labels, images_dimension)
batch_size = 20
epochs = 100
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(X_validation, Y_validation))
model.evaluate(X_test,Y_test,verbose=0)
In the Line images = np.array(images, dtype="float") / 255 I get the error:
Traceback (most recent call last):
File "train-nn.py", line 54, in <module>
images, images_classes = getImages('training-images', labels)
File "train-nn.py", line 24, in getImages
images = np.array(images, dtype="float") / 255
ValueError: could not broadcast input array from shape (26,26) into shape (26)
I think something is wrong with the datastructure or with the array structure. I have no clue what I did wrong. Maybe someone known this problem and can give me a hint!
Here you can download the whole project as zip file.
http://fileshare.mynotiz.de/cnn-handwritten-smilies.zip
I found the problem. One of my test-data images was not in the format of 26x26 it was 26x23.
I am training a binary CNN in keras for classifying polarity of emotions (expression) e.g. Smiling/Not_smiling. this is my code. I am training this on multi-GPU machine, but surprised by how long this training takes. Each class binary model is taking 5-6 hours. Is this normal/expected?
I had previously trained a multi-class model combining all the classes and that took about 4 hours in total.
Note: each pos/neg class contains ~5000-10000 images.
Am I doing this right? Is this training duration expected?
class_names = ["smiling","frowning","surprised","sad"]
## set vars!
for cname in class_names:
print("[+] training: ",model_name,cname)
dp_path_train = './emotion_data/{0}/train/{1}'.format(model_name,cname)
dp_path_val = './emotion_data/{0}/val/{1}'.format(model_name,cname)
dir_checkpoint = './models'
G = 2 # no. of gpus to use
batch_size = 32 * G
step_size = 1000//G
print("[*] batch size & step size: ", batch_size,step_size)
model = Sequential()
model.add(Conv2D(32, kernel_size = (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(96, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
train_datagen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1./255)
training_set = train_datagen.flow_from_directory(dp_path_train,
target_size = (224, 224),
batch_size = batch_size,
class_mode = 'binary')
test_set = test_datagen.flow_from_directory(dp_path_val,
target_size = (224, 224),
batch_size = batch_size,
class_mode = 'binary')
model.fit_generator(training_set,
steps_per_epoch = step_size,
epochs = 50,
validation_data = test_set,
validation_steps = 2000)
print("[+] saving model: ",model_name,cname)
model.save("./models2/{0}_{1}.hdf5".format(model_name,cname))
Removing all the BatchNormalization layers should help speed things up, or you can use it less frequently between your network architecture layers