Related
I am currently replicating a machine learning architecture found in this paper:
https://downloads.hindawi.com/journals/acisc/2018/1439312.pdf
Specifically on page 4 of the paper:
Any ideas on how to implement this on TensorFlow? My current model looks like this:
BATCH_SIZE = 32
INPUT_SIZE1 = (15, 30, 3)
INPUT_SIZE2 = (30, 60, 3)
INPUT_SIZE3 = (40 ,80, 3)
LEARNING_RATE = 0.001
EPOCHS = 10
CNN_CR1_INPUT = keras.Input(shape = INPUT_SIZE1)
CNN_CR1 = Conv2D(64, (5, 5), strides=2, padding='same', activation='relu')(CNN_CR1_INPUT)
CNN_CR1 = MaxPooling2D(3,3)(CNN_CR1)
CNN_CR1 = Conv2D(64, (3,3), strides=1, padding='same', activation='relu')(CNN_CR1)
CNN_CR1 = Conv2D(64, (3,3), strides=2, padding='same', activation='relu')(CNN_CR1)
CNN_CR1 = Flatten()(CNN_CR1)
CNN_CR1_OUTPUT = Dense(1)(CNN_CR1)
CNN_CR2_INPUT = keras.Input(shape = INPUT_SIZE2)
CNN_CR2 = Conv2D(64, (5,5), strides=2, padding='same', activation='relu')(CNN_CR2_INPUT)
CNN_CR2 = MaxPooling2D(3, 3)(CNN_CR2)
CNN_CR2 = Conv2D(64, (3,3), strides=1, padding='same', activation='relu')(CNN_CR2)
CNN_CR2 = Conv2D(64, (3,3), strides=2, padding='same', activation='relu')(CNN_CR2)
CNN_CR2 = Flatten()(CNN_CR2)
CNN_CR2_OUTPUT = Dense(1)(CNN_CR2)
CNN_CR3_INPUT = keras.Input(shape = INPUT_SIZE3)
CNN_CR3 = Conv2D(64, (5,5), strides=2, padding='same', activation='relu')(CNN_CR3_INPUT)
CNN_CR3 = MaxPooling2D(3, 3)(CNN_CR3)
CNN_CR3 = Conv2D(64, (3,3), strides=1, padding='same', activation='relu')(CNN_CR3)
CNN_CR3 = Conv2D(64, (3,3), strides=2, padding='same', activation='relu')(CNN_CR3)
CNN_CR3 = Flatten()(CNN_CR3)
CNN_CR3_OUTPUT = Dense(1)(CNN_CR3)
# SUGGESTION: This kinda weird. If this works, we only need 1 valitadation datagen? Not sure how it all connect together.
CNN_MAX = Maximum()([CNN_CR1_OUTPUT, CNN_CR2_OUTPUT, CNN_CR3_OUTPUT])
CNN_MODEL = keras.Model(inputs=[CNN_CR1_INPUT, CNN_CR2_INPUT, CNN_CR3_INPUT], outputs=[CNN_MAX])
I am not sure if the model I make is correct or not and I need some assistance. Also, how do you create the input pipeline for a cascading neural network like this one? I already tried this:
TRAIN_DATAGEN1 = ImageDataGenerator(
# SUGGESTION: Not sure if this is needed??
rescale = 1/255.0
)
TRAIN_GENERATOR1 = TRAIN_DATAGEN1.flow_from_directory(
os.path.join(WORKING_DATASETS['GI4E']['train']['images'], '0'),
target_size = (15, 30),
class_mode ='binary',
batch_size = BATCH_SIZE
)
TEST_DATAGEN1 = ImageDataGenerator(
rescale = 1/255.0,
)
TEST_GENERATOR1 = TEST_DATAGEN1.flow_from_directory(
os.path.join(WORKING_DATASETS['GI4E']['test']['images'], '0'),
target_size = (15, 30),
class_mode ='binary',
batch_size = BATCH_SIZE
)
# CNN 2
TRAIN_DATAGEN2 = ImageDataGenerator(
rescale = 1/255.0
)
TRAIN_GENERATOR2 = TRAIN_DATAGEN2.flow_from_directory(
os.path.join(WORKING_DATASETS['GI4E']['train']['images'], '1'),
target_size = (30, 60),
class_mode ='binary',
batch_size = BATCH_SIZE
)
TEST_DATAGEN2 = ImageDataGenerator(
rescale = 1/255.0,
)
TEST_GENERATOR2 = TEST_DATAGEN2.flow_from_directory(
os.path.join(WORKING_DATASETS['GI4E']['test']['images'], '1'),
target_size = (30, 60),
class_mode ='binary',
batch_size = BATCH_SIZE
)
# CNN 3
TRAIN_DATAGEN3 = ImageDataGenerator(
rescale = 1/255.0
)
TRAIN_GENERATOR3 = TRAIN_DATAGEN3.flow_from_directory(
os.path.join(WORKING_DATASETS['GI4E']['train']['images'], '2'),
target_size = (40 ,80),
class_mode = 'binary',
batch_size = BATCH_SIZE
)
TEST_DATAGEN3 = ImageDataGenerator(
rescale = 1/255.0,
)
TEST_GENERATOR3 = TEST_DATAGEN3.flow_from_directory(
os.path.join(WORKING_DATASETS['GI4E']['test']['images'], '2'),
target_size = (40 ,80),
class_mode = 'binary',
batch_size = BATCH_SIZE
)
But it spits out an error when I try to fit it.
ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'keras.preprocessing.image.DirectoryIterator'>"}), <class 'NoneType'>
After digging through the documentation, I am aware that TensorFlow cannot handle a bunch of ImageDataGenerator together in a list. But I'm not sure how to feed images to the model without ImageDataGenerator.
So, to summarize:
How to recreate said model in TensorFlow? Is the model that I create correct already?
How to create the input pipeline for the model? Any alternative beside ImageDataGenerator?
You could try building the model with tf.data - load the datas with keras.utils , for data augmentation you can add layers on to model architecture (tf.keras.layers.Rescaling,tf.keras.layers.RandomRotation etc). There are other augmentation methods that are given in the docs below which are more efficient in utilizing the GPU that ImageDataGenerator
https://www.tensorflow.org/tutorials/images/data_augmentation#data_augmentation_2
https://www.tensorflow.org/api_docs/python/tf/keras/utils
https://www.tensorflow.org/guide/keras/preprocessing_layers
I want to work this project on github : https://github.com/kesaroid/Glaucoma-Detection
And here is a CNN.py file to create keras h5 file:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from os import path, environ
from imgaug import augmenters as iaa
from keras import backend as K
from keras import optimizers
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.layers import BatchNormalization, Activation
from keras.layers import Input, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
img_width, img_height = 256, 256
channels = 3
input_shape = channels, img_width, img_height if K.image_data_format() == 'channels_first' \
else img_width, img_height, channels
train_data_dir = path.join('data', 'train')
validation_data_dir = path.join('data', 'validation')
nb_train_samples = int(environ.get('TRAINING_SAMPLES', 20))
nb_validation_samples = int(environ.get('VALIDATION_SAMPLES', 20))
batch_size = 16
epochs = 100
input_tensor = Input(shape=input_shape)
block1 = BatchNormalization(name='norm_0')(input_tensor)
# Block 1
block1 = Conv2D(8, (3, 3), name='conv_11', activation='relu')(block1)
block1 = Conv2D(16, (3, 3), name='conv_12', activation='relu')(block1)
block1 = Conv2D(32, (3, 3), name='conv_13', activation='relu')(block1)
block1 = Conv2D(64, (3, 3), name='conv_14', activation='relu')(block1)
block1 = MaxPooling2D(pool_size=(2, 2))(block1)
block1 = BatchNormalization(name='norm_1')(block1)
block1 = Conv2D(16, 1)(block1)
# Block 2
block2 = Conv2D(32, (3, 3), name='conv_21', activation='relu')(block1)
block2 = Conv2D(64, (3, 3), name='conv_22', activation='relu')(block2)
block2 = Conv2D(64, (3, 3), name='conv_23', activation='relu')(block2)
block2 = Conv2D(128, (3, 3), name='conv_24', activation='relu')(block2)
block2 = MaxPooling2D(pool_size=(2, 2))(block2)
block2 = BatchNormalization(name='norm_2')(block2)
block2 = Conv2D(64, 1)(block2)
# Block 3
block3 = Conv2D(64, (3, 3), name='conv_31', activation='relu')(block2)
block3 = Conv2D(128, (3, 3), name='conv_32', activation='relu')(block3)
block3 = Conv2D(128, (3, 3), name='conv_33', activation='relu')(block3)
block3 = Conv2D(64, (3, 3), name='conv_34', activation='relu')(block3)
block3 = MaxPooling2D(pool_size=(2, 2))(block3)
block3 = BatchNormalization(name='norm_3')(block3)
# Block 4
block4 = Conv2D(64, (3, 3), name='conv_41', activation='relu')(block3)
block4 = Conv2D(32, (3, 3), name='conv_42', activation='relu')(block4)
block4 = Conv2D(16, (3, 3), name='conv_43', activation='relu')(block4)
block4 = Conv2D(8, (2, 2), name='conv_44', activation='relu')(block4)
block4 = MaxPooling2D(pool_size=(2, 2))(block4)
block4 = BatchNormalization(name='norm_4')(block4)
block4 = Conv2D(2, 1)(block4)
block5 = GlobalAveragePooling2D()(block4)
output = Activation('softmax')(block5)
model = Model(inputs=[input_tensor], outputs=[output])
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False),
metrics=['accuracy'])
# Initiate the train and test generators with data Augmentation
sometimes = lambda aug: iaa.Sometimes(0.6, aug)
seq = iaa.Sequential([
iaa.GaussianBlur(sigma=(0, 1.0)),
iaa.Sharpen(alpha=1, lightness=0),
iaa.CoarseDropout(p=0.1, size_percent=0.15),
sometimes(iaa.Affine(
scale={'x': (0.8, 1.2), 'y': (0.8, 1.2)},
translate_percent={'x': (-0.2, 0.2), 'y': (-0.2, 0.2)},
rotate=(-30, 30),
shear=(-16, 16)))
])
train_datagen = ImageDataGenerator(
rescale=1. / 255,
preprocessing_function=seq.augment_image,
horizontal_flip=True,
vertical_flip=True)
test_datagen = ImageDataGenerator(
rescale=1. / 255,
horizontal_flip=True,
vertical_flip=True)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_height, img_width),
class_mode='categorical')
checkpoint = ModelCheckpoint('f1.h5', monitor='acc', verbose=1, save_best_only=True, save_weights_only=False,
mode='auto', period=1)
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=2, verbose=0, mode='auto', cooldown=0, min_lr=0)
model.fit_generator(
train_generator,
steps_per_epoch=nb_train_samples // batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=nb_validation_samples // batch_size,
callbacks=[checkpoint, reduce_lr]
)
But i get the error message:
C:\anaconda3\envs\tf2.7\python.exe
C:/Users/yigit/Documents/GitHub/Glaucoma-Detection/CNN.py
Traceback (most recent call last):
File "C:\Users\yigit\Documents\GitHub\Glaucoma-Detection\CNN.py", line 38, in
block1 = MaxPooling2D(pool_size=(2, 2))(block1)
File "C:\anaconda3\envs\tf2.7\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\anaconda3\envs\tf2.7\lib\site-packages\keras\engine\input_spec.py", line 213, in assert_input_compatibility
raise ValueError(f'Input {input_index} of layer "{layer_name}" '
ValueError: Input 0 of layer "max_pooling2d" is incompatible with the layer: expected ndim=4, found ndim=6. Full shape received: (None, 3, 256, 248, 248, 64)
Process finished with exit code 1
What should i do guys ?
tf.keras.layers.MaxPool2D expects input shape 4D tensor with shape (batch_size, rows, cols, channels). In this case you are adding extra dimension. Make sure your inputs are of 4D.
Working sample code
import tensorflow as tf
input_shape = (4, 28, 28, 3)
x = tf.random.normal(input_shape)
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=input_shape[1:]))
model.add(tf.keras.layers.Conv2D(filters=32,
kernel_size=2,
activation='relu'))
model.add(tf.keras.layers.MaxPool2D(pool_size=2))
output = model(x)
Output
TensorShape([4, 13, 13, 32])
I am using a Unet model for satellite image segmentation with inputs 512x512x3. But on executing the model i am getting the following error:
ValueError: Cannot feed value of shape (3, 512, 512) for Tensor 'conv2d_19_target:0', which has shape '(?, ?, ?, ?)'. the code for the Unet model is :
from __future__ import print_function
import os
from skimage.transform import resize
from skimage.io import imsave
import numpy as np
from keras.models import Model
from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Conv2DTranspose
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
from keras import backend as K
from data import load_train_data, load_test_data
K.set_image_data_format('channels_last') # TF dimension ordering in this code
img_rows = 512
img_cols = 512
image_channels=3
smooth = 1.
OUTPUT_MASK_CHANNELS = 1
def dice_coef(y_true, y_pred):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def dice_coef_loss(y_true, y_pred):
return -dice_coef(y_true, y_pred)
def get_unet():
inputs = Input((img_rows, img_cols, 3))
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)
up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=3)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)
up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=3)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)
up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=3)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)
up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=3)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)
conv_final = Conv2D(OUTPUT_MASK_CHANNELS, (1, 1),activation='sigmoid')(conv9)
#conv_final = Activation('sigmoid')(conv_final)
model = Model(inputs, conv_final, name="ZF_UNET_224")
#conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)
#model = Model(inputs=[inputs], outputs=[conv10])
model.compile(optimizer=Adam(lr=1e-5), loss=dice_coef_loss, metrics=[dice_coef])
return model
def preprocess(imgs):
imgs_p = np.ndarray((imgs.shape[0], img_rows, img_cols), dtype=np.uint8)
for i in range(imgs.shape[0]):
imgs_p[i] = resize(imgs[i], (img_cols, img_rows), preserve_range=True)
imgs_p = imgs_p[..., np.newaxis]
return imgs_p
def train_and_predict():
print('-'*30)
print('Loading and preprocessing train data...')
print('-'*30)
imgs_train, imgs_mask_train = load_train_data()
#imgs_train = preprocess(imgs_train)
#imgs_mask_train = preprocess(imgs_mask_train)
imgs_train = imgs_train.astype('float32')
mean = np.mean(imgs_train) # mean for data centering
std = np.std(imgs_train) # std for data normalization
imgs_train -= mean
imgs_train /= std
imgs_mask_train = imgs_mask_train.astype('float32')
imgs_mask_train /= 255. # scale masks to [0, 1]
print('-'*30)
print('Creating and compiling model...')
print('-'*30)
model = get_unet()
model_checkpoint = ModelCheckpoint('weights.h5', monitor='val_loss', save_best_only=True)
print('-'*30)
print('Fitting model...')
print('-'*30)
model.fit(imgs_train, imgs_mask_train, batch_size=3, epochs=20, verbose=2, shuffle=True,
validation_split=0.2,
callbacks=[model_checkpoint])
print('-'*30)
print('Loading and preprocessing test data...')
print('-'*30)
imgs_test, imgs_id_test = load_test_data()
imgs_test = preprocess(imgs_test)
imgs_test = imgs_test.astype('float32')
imgs_test -= mean
imgs_test /= std
print('-'*30)
print('Loading saved weights...')
print('-'*30)
model.load_weights('weights.h5')
print('-'*30)
print('Predicting masks on test data...')
print('-'*30)
imgs_mask_test = model.predict(imgs_test, verbose=1)
np.save('imgs_mask_test.npy', imgs_mask_test)
print('-' * 30)
print('Saving predicted masks to files...')
print('-' * 30)
pred_dir = 'preds'
if not os.path.exists(pred_dir):
os.mkdir(pred_dir)
for image, image_id in zip(imgs_mask_test, imgs_id_test):
image = (image[:, :, 0] * 255.).astype(np.uint8)
imsave(os.path.join(pred_dir, str(image_id) + '_pred.png'), image)
if __name__ == '__main__':
train_and_predict()
The error traceback is as follows:
File "/home/deeplearning/Downloads/Models/ultrasound-nerve-segmentation-master/train.py", line 158, in <module> train_and_predict()
File "/home/deeplearning/Downloads/Models/ultrasound-nerve-segmentation-master/train.py", line 124, in train_and_predict callbacks=[model_checkpoint])
File "/home/deeplearning/anaconda3/envs/myenv/lib/python3.6/site-packages/keras/engine/training.py", line 1037, in fit
validation_steps=validation_steps)
File "/home/deeplearning/anaconda3/envs/myenv/lib/python3.6/site-packages/keras/engine/training_arrays.py", line 199, in fit_loop
outs = f(ins_batch)
File "/home/deeplearning/anaconda3/envs/myenv/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2672, in __call__
return self._legacy_call(inputs)
File "/home/deeplearning/anaconda3/envs/myenv/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 2654, in _legacy_call
**self.session_kwargs)
File "/home/deeplearning/anaconda3/envs/myenv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 767, in run
run_metadata_ptr)
File "/home/deeplearning/anaconda3/envs/myenv/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 944, in _run
% (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
ValueError: Cannot feed value of shape (3, 512, 512) for Tensor 'conv2d_19_target:0', which has shape '(?, ?, ?, ?)'
Plz help me finding what wrong is going in it
You set K.set_image_data_format('channels_last'), but your input image (3 X 512 X 512) has channels first. Either change to K.set_image_data_format('channels_first')(which may not work for the UNET), or permute the dimensions of your input image with np.tranpose to have the input shape (512,512,3).
I'm currently working on an image generating Conv NN and an audio generating Recurrent NN. I built for both the generators but for some reason the build_audio_generator model has in its last layer a Tensor (Tensor("model_4/sequential_4/activation_4/Tanh:0") with shape (?, 1) instead of (?, 28, 28, 1) as needed. My question, how do I have to change the code of build_audio_generator so that it has the same shape(?, 28, 28, 1) as build_generator?
Code:
def build_generator(latent_dim, channels, num_classes):
model = Sequential()
model.add(Dense(128 * 7 * 7, activation="relu", input_dim=latent_dim))
model.add(Reshape((7, 7, 128)))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(128, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(64, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(channels, kernel_size=3, padding='same'))
model.add(Activation("tanh"))
model.summary()
noise = Input(shape=(latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(num_classes, 100)(label))
model_input = multiply([noise, label_embedding])
img = model(model_input)
return Model([noise, label], img)
def build_audio_generator(latent_dim, num_classes):
model = Sequential()
model.add(LSTM(512, input_dim=latent_dim, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512))
model.add(Dense(256))
model.add(Dropout(0.3))
model.add(Dense(num_classes))
model.add(Activation('tanh'))
model.summary()
noise = Input(shape=(None, latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(num_classes, 100)(label))
model_input = multiply([noise, label_embedding])
sound = model(model_input)
return Model([noise, label], sound)
# Build the generator
generator = build_generator(100, 3, 1)
audio_generator = build_audio_generator(100, 1)
# The generator takes noise and the target label as input
# and generates the corresponding digit of that label
noise = Input(shape=(None, 100,))
label = Input(shape=(1,))
img = generator([noise, label])
audio = audio_generator([noise, label])
print('Audio: '+ str(audio))
print('Audio shape: ' + str(audio.shape))
print('IMG: '+str(img))
print('IMG shape: ' + str(img.shape))
Console output:
Audio: Tensor("model_4/sequential_4/activation_4/Tanh:0", shape=(?, 1), dtype=float32)
Audio shape: (?, 1)
IMG: Tensor("model_3/sequential_3/activation_3/Tanh:0", shape=(?, 28, 28, 1), dtype=float32)
IMG shape: (?, 28, 28, 1)
I think you would want 3D for audio instead, no?
Just keep return_sequences=True in all LSTMs.
I'm just getting started with Keras and with Deep learning, so the answer to my question could be obvious to some, but for me it isn't.
I made a model to colorize some black and white photos following the article on Floydhub (where I'm training it) and it works just fine when I train it with similar pictures (such as human faces) but as soon as I use a larger dataset as an input with different pictures, the loss just remains stable and doesn't get better.
I've tried different learning rates and optimizers but just cannot get a good result.
What could I change to get a better result?
This is the code (thanks to Emil Wallner for the article on Floydhub)
# Get images
X = []
for filename in os.listdir('/data/images/Train/'):
X.append(img_to_array(load_img('/data/images/Train/'+filename)))
X = np.array(X, dtype=float)
Xtrain = 1.0/255*X
#Load weights
inception = InceptionResNetV2(weights=None, include_top=True)
inception.load_weights('/data/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5')
inception.graph = tf.get_default_graph()
embed_input = Input(shape=(1000,))
#Encoder
encoder_input = Input(shape=(256, 256, 1,))
encoder_output = Conv2D(64, (3,3), activation='relu', padding='same', strides=2)(encoder_input)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
#Fusion
fusion_output = RepeatVector(32 * 32)(embed_input)
fusion_output = Reshape(([32, 32, 1000]))(fusion_output)
fusion_output = concatenate([encoder_output, fusion_output], axis=3)
fusion_output = Conv2D(256, (1, 1), activation='relu', padding='same')(fusion_output)
#Decoder
decoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(fusion_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(64, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(32, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(16, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(2, (3, 3), activation='tanh', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
model = Model(inputs=[encoder_input, embed_input], outputs=decoder_output)
#Create embedding
def create_inception_embedding(grayscaled_rgb):
grayscaled_rgb_resized = []
for i in grayscaled_rgb:
i = resize(i, (299, 299, 3), mode='constant')
grayscaled_rgb_resized.append(i)
grayscaled_rgb_resized = np.array(grayscaled_rgb_resized)
grayscaled_rgb_resized = preprocess_input(grayscaled_rgb_resized)
with inception.graph.as_default():
embed = inception.predict(grayscaled_rgb_resized)
return embed
# Image transformer
datagen = ImageDataGenerator(
shear_range=0.4,
zoom_range=0.4,
rotation_range=40,
horizontal_flip=True)
#Generate training data
batch_size = 20
def image_a_b_gen(batch_size):
for batch in datagen.flow(Xtrain, batch_size=batch_size):
grayscaled_rgb = gray2rgb(rgb2gray(batch))
embed = create_inception_embedding(grayscaled_rgb)
lab_batch = rgb2lab(batch)
X_batch = lab_batch[:,:,:,0]
X_batch = X_batch.reshape(X_batch.shape+(1,))
Y_batch = lab_batch[:,:,:,1:] / 128
yield ([X_batch, create_inception_embedding(grayscaled_rgb)], Y_batch)
#Train model
tensorboard = TensorBoard(log_dir="/output")
model.compile(optimizer='adam', loss='mse')
model.fit_generator(image_a_b_gen(batch_size), callbacks=[tensorboard], epochs=1000, steps_per_epoch=20)
#Make a prediction on the unseen images
color_me = []
for filename in os.listdir('../Test/'):
color_me.append(img_to_array(load_img('../Test/'+filename)))
color_me = np.array(color_me, dtype=float)
color_me = 1.0/255*color_me
color_me = gray2rgb(rgb2gray(color_me))
color_me_embed = create_inception_embedding(color_me)
color_me = rgb2lab(color_me)[:,:,:,0]
color_me = color_me.reshape(color_me.shape+(1,))
# Test model
output = model.predict([color_me, color_me_embed])
output = output * 128
# Output colorizations
for i in range(len(output)):
cur = np.zeros((256, 256, 3))
cur[:,:,0] = color_me[i][:,:,0]
cur[:,:,1:] = output[i]
imsave("result/img_"+str(i)+".png", lab2rgb(cur))