How to plot the confusion matrix after fitting my model?
this is how my database is:
train_path = '/content/drive/MyDrive/TCC_FILES/New_Dataset/train'
test_path = '/content/drive/MyDrive/TCC_FILES/New_Dataset/test'
valid_path = '/content/drive/MyDrive/TCC_FILES/New_Dataset/valid'
#New code
train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input, horizontal_flip = True,
width_shift_range = 0.2,
height_shift_range = 0.2) \
.flow_from_directory(directory=train_path, target_size=(IMG_WIDTH,IMG_HEIGHT), classes=['0_NoCancer', '1_Cancer'], batch_size=BATCH_SIZE)
valid_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input) \
.flow_from_directory(directory=valid_path, target_size=(IMG_WIDTH,IMG_HEIGHT), classes=['0_NoCancer', '1_Cancer'], batch_size=BATCH_SIZE)
test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.resnet50.preprocess_input) \
.flow_from_directory(directory=test_path, target_size=(IMG_WIDTH,IMG_HEIGHT), classes=['0_NoCancer', '1_Cancer'], batch_size=BATCH_SIZE, shuffle=False)
model.fit(train_batches,
steps_per_epoch=np.ceil(float(18522) / float(BATCH_SIZE)),
epochs = 20,callbacks=[early_stop,reduce_lr,checkpoint_callback],
validation_steps=np.ceil(float(2058) / float(BATCH_SIZE)),
validation_data = valid_batches)```
hey my friend if you wand evaluated your model you should train it before
and after use this code in sklearn
confusion_matrix(predictions.argmax(axis=1), y_test.argmax(axis=1))
ites difrent in code
Related
I am trying to load multiple models using for loop on the Azure ml notebooks cluster. However, I see that I am receiving the following error:
Can you kindly let me know if there is a solution this?
The code below is used for running and saving the model
def train_model_naive_split():
all_history = {}
inp_train_gen = ImageDataGenerator(rescale = 1./255,
rotation_range = 260,
width_shift_range=0.4,
height_shift_range=0.4,
shear_range=0.2,
zoom_range=0.4,
horizontal_flip = True,
vertical_flip=True,
fill_mode="nearest")
train_data = pd.read_csv("glaucoma.csv")
train_data['Glaucoma'] = train_data['Glaucoma'].astype(str)
Y = train_data[['Glaucoma']]
skf = StratifiedKFold(n_splits = 5, random_state = 7, shuffle = True)
fold = 1
for train_index, val_index in skf.split(np.zeros(len(train_data)),Y):
training_data = train_data.iloc[train_index]
validation_data = train_data.iloc[val_index]
train_iterator = inp_train_gen.flow_from_dataframe(training_data,
x_col='Filename',
y_col='Glaucoma',
directory='ORIGA/ORIGA/Images',
target_size=(256,256),
batch_size=20,
class_mode='binary',
shuffle=True)
validation_iterator = inp_train_gen.flow_from_dataframe(validation_data,
x_col='Filename',
y_col='Glaucoma',
directory='ORIGA/ORIGA/Images',
target_size=(256,256),
batch_size=20,
class_mode='binary',
shuffle=True)
model = create_cnn_model()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_name = f'outputs/best-model-kfold-{fold}.h5'
history = model.fit(train_iterator,
validation_data=validation_iterator,
epochs=5,
)
model.save(model_name)
all_history[f'history-fold-{fold}'] = history
fold += 1
return all_history
The code below is used for loading the models from the saved folder:
model_files = os.listdir('outputs/')
models = []
for model_file in model_files[2:]:
print('loading model /outputs/'+model_file)
models.append(keras.models.load_model('outputs/'+model_file))
I am building a multiclass classification model that would be able to recognize 4 different insects. I am using Resnet50 (weights = imagenet).
The dataset is small, average 100 photos per class (more than 400 in total)
Depends on model, I usually get val_accuracy more than 90% (epochs 200) and test accuracy around 80-85% but when I print confusion matrix or plot actual and predicted labels for given photos, results are terrible (usually around 25%).
I have tried different models (resnet18, resnet50v2, Xception) I was freezing model layers, tried different data augumentation parameters, different model parameters(such as: Dropout(0.5, 0.2), kernel_regularizer='l2' because I read that helps reducing overfitting).
I think problem is while generating images but I don't know what else to change there, I tried val_generator with shuffle=False/True, train_generator seed=1/off but final results are similar.
I am adding images of confusion matrix, accuracy and plotted photos.
I am using jupyter notebook.
Thank you!
directory_train = "keras_splited/train"
directory_test = "keras_splited/test"
directory_val = "keras_splited/val"
BATCH_SIZE = 32
IMG_SIZE = 224
def make_DataImageGenerator(validation_split=None):
image_generator = ImageDataGenerator(
rescale=(1.0/255),
rotation_range=40,
zoom_range=0.1,
horizontal_flip=True,
vertical_flip=True,
validation_split=validation_split
)
return image_generator
train_img_generator = make_DataImageGenerator(validation_split=None)
val_img_generator = make_DataImageGenerator(validation_split=None)
test_img_generator = make_DataImageGenerator(validation_split=None)
def get_generator(img_generator, directory, train_valid=None, seed=None, shuffle=True):
train_generator = img_generator.flow_from_directory(
directory,
batch_size=BATCH_SIZE,
target_size=(IMG_SIZE, IMG_SIZE),
subset=train_valid,
seed=seed,
shuffle=shuffle
)
return train_generator
train_generator = get_generator(train_img_generator, directory_train)
val_generator = get_generator(val_img_generator, directory_val)
test_generator = get_generator(test_img_generator, directory_test)
target_labels = next(os.walk(directory_train))[1]
target_labels.sort()
num_classes = len(target_labels)
model_feature_extraction = tf.keras.applications.ResNet50(weights="imagenet", include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
x = model_feature_extraction.output
x = layers.GlobalAveragePooling2D()(x)
x = Dense(1024, activation="relu")(x)
myModelOut = Dense(4, activation="softmax")(x)
model = Model(inputs=model_feature_extraction.input, outputs=myModelOut)
optimizer = "adam"
loss = "categorical_crossentropy"
def freeze_pretrained_weights(model):
#model.layers[0].trainable=False #wanted to freeze the model but didn't work good
model.compile(
optimizer=optimizer,
loss=loss,
metrics=["accuracy"]
)
return model
frozen_new_model = freeze_pretrained_weights(model)
my_callbacks = [
tf.keras.callbacks.ModelCheckpoint("testno/best_model/", save_best_only=True, monitor="accuracy", save_weights_only=False, mode="max"),
tf.keras.callbacks.ReduceLROnPlateau(monitor="loss", factor=0.2, patience=25, min_lr=0.001)
]
def train_model(model, train_gen, valid_gen, epochs):
train_steps_per_epoch = train_gen.n // train_gen.batch_size
history = model.fit(
train_gen,
steps_per_epoch=train_steps_per_epoch,
epochs=epochs,
callbacks=my_callbacks,
validation_data=valid_gen,
)
return history
history_frozen_model = train_model(frozen_new_model, train_generator, val_generator, epochs=150)
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.plot(history_frozen_model.history['accuracy'])
plt.plot(history_frozen_model.history['val_accuracy'])
plt.title('Accuracy vs. epochs')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='lower right')
plt.subplot(122)
plt.plot(history_frozen_model.history['loss'])
plt.plot(history_frozen_model.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()
test_steps = test_generator.n // test_generator.batch_size
test_generator.reset()
new_model_test_loss, new_model_test_acc = frozen_new_model.evaluate(test_generator)
print('\nTest dataset')
print(f"Loss: {new_model_test_loss}")
print(f"Accuracy: {new_model_test_acc}")
pred = frozen_new_model.predict(test_generator, steps=test_steps, verbose=1)
batch = next(test_generator)
batch_images = np.array(batch[0])
batch_labels = np.array(batch[1])
target_labels = np.asarray(target_labels)
print(target_labels)
plt.figure(figsize=(15,15))
for n, i in enumerate(np.arange(6)):
actual = target_labels[np.argmax(batch_labels[i])]
predicted = target_labels[np.argmax(pred[i])]
confidence = round(100*(np.max(pred[i])),2)
ax = plt.subplot(3,3,n+1)
plt.imshow(batch_images[i])
plt.title(f"Actual: {actual},\n Predicted: {predicted},\n Confidence: {confidence}")
plt.axis('off')
from sklearn.metrics import ConfusionMatrixDisplay
y_true_lista = []
y_pred_lista = []
for i, img in enumerate(batch_labels):
y_true = np.argmax(batch_labels[i]).reshape(-1)
for i in y_true:
y_true_lista.append(i)
y_pred = np.argmax(pred[i]).reshape(-1)
for i in y_pred:
y_pred_lista.append(i)
print("y_true: ", y_true_lista)
print("y_pred: ", y_pred_lista)
matrix = confusion_matrix(y_true, y_pred)
#print(matrix.shape)
labels = target_labels
cm = confusion_matrix(y_true_lista, y_pred_lista)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)
disp.plot(cmap=plt.cm.Blues, xticks_rotation = 'vertical')
plt.show()
I don't know what to change to get right results when plotting and on the matrix
Can someone point me to the right direction? What did I do wrong with this model or is it something wrong with plotting?
Failed to find data adapter that can handle input: <class 'tensorflow.python.keras.preprocessing.image.ImageDataGenerator'>, <class 'NoneType'>
I am trying to implement deep learning model and I have run the code more than 10 times , some times in 41, 72,88,100 epoch I got this error, is there anybody to help me
def Tuning_Model():
for k in range(FOLDS):
timestamp = datetime.fromtimestamp(time()).strftime('%Y%m%d-%H%M%S')
output_directory = model_path + "\\" + timestamp
if not os.path.exists(output_directory):
os.makedirs(output_directory)
# Training image augmentation
train_data_generator =tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1. / 255,
fill_mode="constant",
shear_range=0.2,
zoom_range=(0.5, 1),
horizontal_flip=True,
rotation_range=360,
channel_shift_range=25,
brightness_range=(0.75, 1.25))
# Validation image augmentation
val_data_generator =tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1. / 255,
fill_mode="constant",
shear_range=0.2,
zoom_range=(0.5, 1),
horizontal_flip=True,
rotation_range=360,
channel_shift_range=25,
brightness_range=(0.75, 1.25))
test_data_generator =tf.keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255)
train_data_generator = train_data_generator.flow_from_directory(
train_data_path,
target_size = RAW_IMG_SIZE,
batch_size = BATCH_SIZE,
class_mode ='categorical'
)
val_data_generator = val_data_generator.flow_from_directory(
val_data_path,
target_size = RAW_IMG_SIZE,
batch_size = BATCH_SIZE,
class_mode = 'categorical')
test_data = test_data_generator.flow_from_directory(
test_data_path,
target_size = IMG_SIZE,
batch_size = BATCH_SIZE,
class_mode = 'categorical',
shuffle = False)
train_data_generator = crop_generator(train_data_generator, IMG_SIZE)
val_data_generator = crop_generator(val_data_generator, IMG_SIZE)
model = Build_Model(model_name)
model_checkpoint = ModelCheckpoint(output_directory + "\\best_model.hdf5", verbose=1, save_best_only=True)
early_stopping = EarlyStopping(patience=STOPPING_PATIENCE, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau('val_loss', factor=0.5, patience=LR_PATIENCE, min_lr=0.000003125)
model.compile(loss='binary_crossentropy', optimizer=Adam(lr=INITIAL_LR), metrics=['categorical_accuracy'])
history = model.fit_generator(
generator = train_data_generator,
steps_per_epoch = train_image_count // BATCH_SIZE,
epochs = epochs,
validation_data = val_data_generator,
validation_steps= val_image_count // BATCH_SIZE,
callbacks = [model_checkpoint, early_stopping, reduce_lr],
shuffle = False)
# Load the last best model
model = load_model(
output_directory + "\\best_model.hdf5")
# Evaluate model on test set
predictions = model.predict_generator(test_data_generator, test_image_count // BATCH_SIZE + 1)
y_true = test_data_generator.classes
y_pred = np.argmax(predictions, axis=1)
print(classification_report(y_true, y_pred, labels=CLASSES, target_names=CLASS_NAMES))
report = classification_report(y_true, y_pred, labels=CLASSES, target_names=CLASS_NAMES, output_dict=True)
with open(output_directory + '\\classification_report.csv', 'w') as f:
for key in report.keys():
f.write("%s,%s\n" % (key, report[key]))
conf_arr = confusion_matrix(y_true, y_pred, labels=CLASSES)
print(conf_arr)
np.savetxt(output_directory + "\\confusion_matrix.csv", conf_arr, delimiter=",")
# Clear model from GPU after each iteration
print("Finished testing fold {}\n".format(k + 1))
K.clear_session()
k = k + 1
if __name__ == '__main__':
Tuning_Model()
ValueError: Failed to find data adapter that can handle input: <class 'tensorflow.python.keras.preprocessing.image.ImageDataGenerator'>, <class 'NoneType
Have you checked whether training/testing data are in the form of numpy arrays before fitting the model?
Also it might be the case where you are importing model from keras like keras.Sequential() but the generator from Tensorflow like tf.keras.preprocessing.image.ImageDataGenerator().
I'm trying to generate two parameters from ImageDataGenerator for input to my model.fit_generator() but that don't work, I don't now if is the best way to do that.
My structure is:
enter image description here
input_imgen1 = ImageDataGenerator(rescale = 1./255,
vertical_flip=True,
validation_split=0.2,
horizontal_flip = True)
input_imgen2 = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
rotation_range=5.)
testgenerator = ImageDataGenerator(rescale = 1./255)
def generate_generator_multiple(generator1, generator2, train_data_dir, batch_size, img_height,
img_width):
genX1 = generator1.flow_from_directory(train_data_dir,
target_size = (img_height, img_width),
class_mode = 'categorical',
batch_size = batch_size,
shuffle=False,
seed=7)
genX2 = generator2.flow_from_directory(train_data_dir,
target_size = (img_height, img_width),
class_mode = 'categorical',
batch_size = batch_size,
shuffle=False,
seed=7)
while True:
X1i = genX1.next()
X2i = genX2.next()
yield [X1i[0], X2i[0]], X2i[1] #Yield both images and their mutual label
data_gen_train=generate_generator_multiple(generator1=input_imgen1,
generator2=input_imgen2,
train_data_dir=train_dir,
batch_size=batch_size,
img_height=IMG_HEIGHT,
img_width=IMG_WIDTH)
history = model.fit_generator(
data_gen_train,
epochs=epochs,
steps_per_epoch=25,
verbose=1,
validation_data=testgenerator,
validation_steps=25,
callbacks=[checkpoint, early_stop, tensor_board]
)
Error when I fit:
enter image description here
As evident from the logs your error is caused during validation data_gen_valid should be constructed the same way as data_gen_train .
So if your training data has been the concatenation of two generators so should be your validation data.
I am trying to learn how to use multi-models input with flow_from_directory, but there is something I can't figure out. thanks for your help.
The way I understand is that we supply the fit_generator with the two_image_generator and the and the fit method will infer the labels.... what I am missing...
def two_image_generator(generator,
directory,
batch_size,
shuffle = False,
img_size1 = (224,224),
img_size2 = (299,299)):
gen1 = generator.flow_from_directory(
# This is the target directory
directory,
# All images will be resized to target height and width.
target_size=img_size1,
batch_size=batch_size,
# Since we use categorical_crossentropy loss, we need categorical labels
class_mode='categorical',
shuffle = shuffle,
seed = 1)
gen2 = generator.flow_from_directory(
# This is the target directory
directory,
# All images will be resized to target height and width.
target_size=img_size2,
batch_size=batch_size,
# Since we use categorical_crossentropy loss, we need categorical labels
class_mode='categorical',
shuffle = shuffle,
seed = 1)
while True:
X1i = gen1.next()
X2i = gen2.next()
if y_col:
yield [X1i[0], X2i[0]], X1i[1] #X1i[1] is the label
else:
yield [X1i, X2i]
#add data_augmentation
train_aug_datagen = ImageDataGenerator(
rotation_range = 20,
shear_range = 0.1,
zoom_range = 0.2,
width_shift_range = 0.1,
height_shift_range = 0.1,
horizontal_flip = True
)
train_generator = two_image_generator(train_aug_datagen,
train_dir,
batch_size = batch_size,
shuffle = True)
validation_datagen = ImageDataGenerator()
validation_generator = two_image_generator(validation_datagen,
validation_dir,
batch_size = batch_size,
shuffle = True)
def create_base_model(MODEL, img_size, lambda_fun = None):
inp = Input(shape = (img_size[0], img_size[1], 3))
x = inp
if lambda_fun:
x = Lambda(lambda_fun)(x)
base_model = MODEL(input_tensor = x, weights = 'imagenet',
include_top = False, pooling = 'avg')
model = Model(inp, base_model.output)
return model
#define vgg + resnet50 + densenet
model1 = create_base_model(vgg16.VGG16, (224, 224), vgg16.preprocess_input)
model2 = create_base_model(resnet50.ResNet50, (224, 224), resnet50.preprocess_input)
model3 = create_base_model(inception_v3.InceptionV3, (299, 299), inception_v3.preprocess_input)
model1.trainable = False
model2.trainable = False
model3.trainable = False
inpA = Input(shape = (224, 224, 3))
inpB = Input(shape = (299, 299, 3))
out1 = model1(inpA)
out2 = model2(inpA)
out3 = model3(inpB)
x = Concatenate()([out1, out2, out3])
x = Dropout(0.2)(x)
x = Dense(2, activation='softmax')(x)
model = Model([inpA, inpB], x)
############################################################################
trained_models_path = './models/VggFace_best_model'
model_names = trained_models_path + '_epoch_{epoch:02d}_val_acc_{val_accuracy:.4f}.hdf5'
checkpoint = ModelCheckpoint(model_names, 'val_accuracy', verbose=1, save_best_only=True)
############################################################################
early = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=1, mode='auto')
callbacks = [checkpoint,early]
history = model.fit_generator(train_generator,
steps_per_epoch= NUM_TRAIN //batch_size,
epochs=100,
validation_data=validation_generator,
validation_steps= NUM_TEST //batch_size,
verbose=1,
use_multiprocessing=True,
workers=14,
callbacks=callbacks )
NameError: name 'y_col' is not defined
#add data_augmentation
train_aug_datagen = ImageDataGenerator(
rescale = 1./255,
rotation_range = 20,
shear_range = 0.1,
zoom_range = 0.2,
width_shift_range = 0.1,
height_shift_range = 0.1,
horizontal_flip = True
)
validation_datagen = ImageDataGenerator(rescale = 1./255)
def two_image_generator(generator,
directory,
batch_size,
shuffle = False,
img_size1 = (224,224),
img_size2 = (299,299)):
gen1 = generator.flow_from_directory(
# This is the target directory
directory,
# All images will be resized to target height and width.
target_size=img_size1,
batch_size=batch_size,
# Since we use categorical_crossentropy loss, we need categorical labels
class_mode='categorical',
shuffle = shuffle,
seed = 7)
gen2 = generator.flow_from_directory(
# This is the target directory
directory,
# All images will be resized to target height and width.
target_size=img_size2,
batch_size=batch_size,
# Since we use categorical_crossentropy loss, we need categorical labels
class_mode='categorical',
shuffle = shuffle,
seed = 7)
while True:
X1i = gen1.next()
X2i = gen2.next()
yield [X1i[0], X2i[0]], X2i[1] #Yield both images and their mutual label
train_generator = two_image_generator(train_aug_datagen,
train_dir,
batch_size = batch_size,
shuffle = True)
validation_generator = two_image_generator(validation_datagen,
validation_dir,
batch_size = batch_size,
shuffle = True)
############################################################################
trained_models_path = './models/VggFace_best_model'
model_names = trained_models_path + '_epoch_{epoch:02d}_val_acc_{val_accuracy:.4f}.hdf5'
checkpoint = ModelCheckpoint(model_names, 'val_accuracy', verbose=1, save_best_only=True)
############################################################################
early = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=1, mode='auto')
callbacks = [checkpoint,early]
history = model.fit_generator(train_generator,
steps_per_epoch= NUM_TRAIN //batch_size,
epochs=100,
validation_data=validation_generator,
validation_steps= NUM_TEST //batch_size,
verbose=1,
use_multiprocessing=True,
# workers=14,
callbacks=callbacks )
Epoch 1/100
Found 5000 images belonging to 2 classes.
Found 5000 images belonging to 2 classes.
Found 52700 images belonging to 2 classes.
Found 52700 images belonging to 2 classes.
340/625 [===============>..............] - ETA: 4:37 - loss: 7.7634 - acc: 0.4926
import pandas as pd