I am training a binary CNN in keras for classifying polarity of emotions (expression) e.g. Smiling/Not_smiling. this is my code. I am training this on multi-GPU machine, but surprised by how long this training takes. Each class binary model is taking 5-6 hours. Is this normal/expected?
I had previously trained a multi-class model combining all the classes and that took about 4 hours in total.
Note: each pos/neg class contains ~5000-10000 images.
Am I doing this right? Is this training duration expected?
class_names = ["smiling","frowning","surprised","sad"]
## set vars!
for cname in class_names:
print("[+] training: ",model_name,cname)
dp_path_train = './emotion_data/{0}/train/{1}'.format(model_name,cname)
dp_path_val = './emotion_data/{0}/val/{1}'.format(model_name,cname)
dir_checkpoint = './models'
G = 2 # no. of gpus to use
batch_size = 32 * G
step_size = 1000//G
print("[*] batch size & step size: ", batch_size,step_size)
model = Sequential()
model.add(Conv2D(32, kernel_size = (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(96, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
train_datagen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1./255)
training_set = train_datagen.flow_from_directory(dp_path_train,
target_size = (224, 224),
batch_size = batch_size,
class_mode = 'binary')
test_set = test_datagen.flow_from_directory(dp_path_val,
target_size = (224, 224),
batch_size = batch_size,
class_mode = 'binary')
model.fit_generator(training_set,
steps_per_epoch = step_size,
epochs = 50,
validation_data = test_set,
validation_steps = 2000)
print("[+] saving model: ",model_name,cname)
model.save("./models2/{0}_{1}.hdf5".format(model_name,cname))
Removing all the BatchNormalization layers should help speed things up, or you can use it less frequently between your network architecture layers
Related
I'm new to deep learning. I'm trying to train a model that identifies plant diseases, and I'm using this dataset that has a bunch of images. I'm aware that this is a lot of data, and I'm only using the color subfolder. I want to use all of the data in that subfolder. The thing is, Kaggle only provides 13GB of memory as of now, and my session keeps restarting because my script is trying to use more memory than it has. Here is my code:
### There are some imports here that I removed because there is a lot of them
NUM_CLASSES = 38
IMG_SIZE = 150
x = []
y = []
def train_data_gen(DIR, ID):
for img in os.listdir(DIR):
try:
path = DIR + '/' + img
img = plt.imread(path)
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
if img.shape == (IMG_SIZE, IMG_SIZE, 3):
x.append(img)
y.append(ID)
except:
None
#--
for DIR in os.listdir('../input/plantvillage-dataset/color/'):
train_data_gen('../input/plantvillage-dataset/color/' + DIR, DIR)
print(DIR)
#
print('reached label encoder')
le = LabelEncoder()
y = le.fit_transform(y)
x = np.array(x)
y = to_categorical(y, NUM_CLASSES)
print('data split')
x_train,x_test,y_train,y_test = train_test_split(x, y, test_size = 0.15)
x_train,x_val,y_train,y_val = train_test_split(x_train, y_train, test_size = 0.15)
print('datagen')
datagen = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
samplewise_std_normalization=False,
rotation_range=60,
zoom_range = 0.1,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range=0.1,
fill_mode = "reflect"
)
print('datagen_fit')
datagen.fit(x_train)
print('model')
model = Sequential()
model.add(Conv2D(64, kernel_size=(3, 3), strides=2, padding='Same', activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(BatchNormalization())
#model.add(Dropout(0.2))
model.add(Conv2D(128, kernel_size=(3, 3), strides=2, padding='Same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(BatchNormalization())
#model.add(Dropout(0.3))
model.add(Conv2D(128, kernel_size=(3, 3), strides=2, padding='Same', activation='relu'))
model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
model.add(BatchNormalization())
#model.add(Dropout(0.3))
model.add(Conv2D(128, kernel_size=(3, 3), strides=2, padding='Same', activation='relu'))
#model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
#model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Dense(NUM_CLASSES, activation='softmax'))
print('Model compile')
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print('Model Fit')
model.fit_generator(datagen.flow(x_train,y_train,batch_size=32), epochs=75, steps_per_epoch=x_train.shape[0]//32, validation_data=(x_val, y_val), verbose=1)
model.save('plantus_model')
I've placed print functions through out my code to see where the actual problem is. The part where it stops is right when I fit datagen. I don't think it's that one thing that uses up so much ram, but all the stuff before it too. How can I reduce RAM usage so I can actually get to training my model?
Thank you in advance for providing answers and constructive feedback.
you are using datagen.fit. This is only necessary if you are setting any of the parameters featurewise_center, samplewise_center, featurewise_std_normalization,
samplewise_std_normalization or zca_whitening to True. Since you are not doing that you do not need to fit the data set. This should avoid you using to much memory.
I use a CNN model to train image classification , it got great accuracy at test and validation (98% and 97%), but when use my image to predict it alway go wrong, here is my code:
BATCH_SIZE = 30
IMG_HEIGHT = 256
IMG_WIDTH = 256
STEPS_PER_EPOCH = np.ceil(image_count/BATCH_SIZE)
train_data_gen = image_generator.flow_from_directory(directory=str(data_dir),
batch_size=BATCH_SIZE,
shuffle=True,
target_size=(IMG_HEIGHT, IMG_WIDTH),
classes = list(CLASS_NAMES))
here is prepare for dataset and data argumentation:
imgDataGen=ImageDataGenerator(
validation_split=0.2,
rescale=1/255,
horizontal_flip=True,
zoom_range=0.3,
rotation_range=15.,
width_shift_range=0.1,
height_shift_range=0.1,
)
prepare data:
train_dataset = imgDataGen.flow_from_directory(
directory=str(data_dir),
target_size = (IMG_HEIGHT, IMG_WIDTH),
classes = list(CLASS_NAMES),
batch_size = BATCH_SIZE,
subset = 'training'
)
val_dataset = imgDataGen.flow_from_directory(
directory=str(data_dir),
target_size = (IMG_HEIGHT, IMG_WIDTH),
classes = list(CLASS_NAMES),
batch_size =BATCH_SIZE,
subset = 'validation'
)
the model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(256, 256, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(6, activation='sigmoid'))
complie:
model.compile(loss='binary_crossentropy',
optimizer=keras.optimizers.SGD(learning_rate=0.001,momentum=0.9),
metrics=['acc'])
train
history = model.fit_generator(
train_dataset,
validation_data = val_dataset,
workers=10,
epochs=20,
)
It get pretty high accuracy 98% on test and 97% on validation
but when i try with my code to predict
def prepare(filepath):
IMG_SIZE=256
img_array=cv2.imread(filepath)
new_array= cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
return new_array.reshape(1,IMG_SIZE,IMG_SIZE,3)
model=tf.keras.models.load_model('trained-model.h5',compile=False)
#np.set_printoptions(formatter={'float_kind':'{:f}'.format})
predict=model.predict([prepare('cat.jpg')])
pred_name = CATEGORIES[np.argmax(predict)]
print(pred_name)
it got wrong, with cat image it go for dog and dog for cat, but sometime it go right, just i think 98% is more accurate than this, if i try 5 image of cats it fail 3 or 4 images
so it because dataset or because of code?
please help, thanks
So in your second code-block you have this:
rescale=1/255
This is for normalizing your image into the range [0;1]. So every image gets rescaled (/normalized) before going through the network. But in you las code-block where you test it on an image you didnt add normalization. Try adding that to your "prepare" function:
def prepare(filepath):
IMG_SIZE = 256
img_array = cv2.imread(filepath)
# add this:
img_array = image_array / 255
new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
return new_array.reshape(1,IMG_SIZE,IMG_SIZE,3)
I'm working on a binary classification problem. I was getting 69% accuracy at first, but kept running out of memory so I shrunk certain parameters, now it's coming up 0. Any idea whats going on?
model = Sequential()
from keras.layers import Dropout
model.add(Conv2D(96, kernel_size=11, padding="same", input_shape=(300, 300, 1), activation = 'relu'))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
model.add(Conv2D(128, kernel_size=3, padding="same", activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(128, kernel_size=3, padding="same", activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
from keras.layers.core import Activation
model.add(Flatten())
# model.add(Dense(units=1000, activation='relu' ))
model.add(Dense(units= 300, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1))
model.add(Activation("softmax"))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
featurewise_center=True,
rotation_range=90,
fill_mode='nearest',
validation_split = 0.2
)
datagen.fit(train)
train_generator = datagen.flow(train, train_labels, batch_size=8)
# # fits the model on batches with real-time data augmentation:
history = model.fit_generator(generator=train_generator,
use_multiprocessing=True,
steps_per_epoch = len(train_generator) / 8,
epochs = 5,
workers=20)
Softmax should only be used if you have a multiclass classification problem. You have a single output from your Dense layer, so you should use sigmoid.
I am struggling for some days to create my own model for face recognition with Keras in python. I will walk you through my setup and give you my code, I'm sure there is something I'm not doing right but I can't tell what. So, my dataset is composed of 97 people with an average of 10 photos per person. The total number of photos is 1106. The dataset is in a folder "faces" and the pictures for each person are in folders named from 0 to 96. Here is the code I am using to process and split my data into train, validation and test and also to create my CNN using Keras. Any help or suggestions are much appreciated, thank you!
channels = 3
rows = 50
cols = 50
classes = 97
data = []
labels = []
images = sorted(list(paths.list_images("faces")))
for image in images:
img = cv2.imread(image)
img = cv2.resize(img, (rows, cols)).flatten()
data.append(img)
label = image.split(os.path.sep)[-2]
labels.append(label)
data = np.array(data, dtype="float32") / 255.0
labels = [int(i) for i in labels]
labels = np.array(labels)
xtrain, testX, ytrain, testY = train_test_split(data, labels, test_size = 0.3, random_state = 13)
trainX, validX, trainY, validY = train_test_split(xtrain, ytrain, test_size = 0.2, random_state = 14)
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
validY = lb.transform(validY)
testY = lb.transform(testY)
trainX = trainX.astype("float32")
validX = validX.astype("float32")
testX = testX.astype("float32")
trainY = trainY.astype("float32")
validY = validY.astype("float32")
testY = testY.astype("float32")
trainX = trainX.reshape([trainX.shape[0], cols, rows, channels])
validX = validX.reshape([validX.shape[0], cols, rows, channels])
testX = testX.reshape([testX.shape[0], cols, rows, channels])
weight_decay = 1e-4
model = Sequential()
model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=trainX.shape[1:]))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.3))
model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)))
model.add(Activation('elu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(classes))
model.add(Activation('softmax'))
datagen = ImageDataGenerator(horizontal_flip=True,
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
vertical_flip=False)
datagen.fit(trainX)
optim = RMSprop(lr=0.001, decay=1e-6)
#sgd = SGD(lr=0.01, momentum=0.9)
model.compile(loss='categorical_crossentropy', optimizer=optim, metrics=['accuracy'])
model.fit_generator(datagen.flow(trainX, trainY, batch_size=64), steps_per_epoch = trainX.shape[0], epochs = 50, validation_data=(validX, validY), verbose = 1)
score = model.evaluate(testX, testY, batch_size = 64, verbose = 1)
print("Test score: ", score[0])
print("Test accuracy: ", score[1])
model_json = model.to_json()
open('face_architecture.json', 'w').write(model_json)
model.save_weights('face_weights.h5', overwrite=True)
I want to implement a model in keras for sentiment classification(anger or non anger) based on spectograms. I have generated the spectograms using the audio dataset from Friends. Each spectogram has a length of 8 seconds. In total, I have 9117 train samples, 1006 validation samples and 2402 test samples.
I use a relatively simple CNN architecture and I tried different combinations of it + optimizer + learning rate + batch size but none of the results seem to generalize well...The loss decreases nicely till a certain point but the validation loss increases by each epoch.
This is the model I am using:
model = Sequential()
model.add(Convolution2D(filters=32, kernel_size=3, strides=1,input_shape=input_shape, activation='relu', padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
# model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(filters=64, kernel_size=3, strides=1, activation='relu', padding="same"))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Convolution2D(filters=128, kernel_size=3, strides=1, activation='relu', padding="same"))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(classes, activation='sigmoid')) #output layer
This is how I load the images:
img_rows = 120
img_cols = 160
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
SPECTOGRAMS_DIRECTORY + TRAIN_SUBDIR,
target_size=(img_cols, img_rows),
batch_size=batch_size,
class_mode='binary')
validation_generator = validation_datagen.flow_from_directory(
SPECTOGRAMS_DIRECTORY + VALIDATION_SUBDIR,
target_size=(img_cols, img_rows),
batch_size=batch_size,
class_mode='binary')
test_generator = test_datagen.flow_from_directory(
SPECTOGRAMS_DIRECTORY + TEST_SUBDIR,
target_size=(img_cols, img_rows),
batch_size=1,
class_mode='binary',
shuffle=False)
input_shape=(img_cols, img_rows, channels)
opt = SGD(lr=0.001)
model.compile(loss='binary_crossentropy',
optimizer=opt,
metrics=['accuracy'])
history = model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size,
verbose=2)
##EVALUATE
print("EVALUATE THE MODEL...")
score = model.evaluate_generator(generator=validation_generator,
steps=nb_validation_samples // batch_size)
The spectograms look like this:
As I said, I tried using different combinations of batch size (16,32,64), SGD with 0.001 learning rate, Adam with 0.0001 learning rate, but for each combination the training loss goes down while the validation loss goes up.
Model seems to be over-fitting. You can try the below approaches to overcome this issue.
If possible try to gather more data or you can use data augmentation techniques to increase the number of samples.
You can use dropout in Keras to reduce the over-fitting. (Looks like you have already added dropout, you can try tuning the values)
Thank you