I am having problem in shape, how could i resolve this?What can I do ?
File "run_demo.py", line 116, in main
model = get_crfrnn_model_def()
File "./src/crfrnn_model.py", line 70, in get_crfrnn_model_def
score_fused = Add()([score2, score_pool4c])
ValueError: Operands could not be broadcast together with shapes (3, 34, 4) (3, 34, 3)
def get_crfrnn_model_def():
channels, height, weight = 3, 500, 500
# Input
input_shape = (height, weight, 3)
img_input = Input(shape=input_shape)
# Add plenty of zero padding
x = ZeroPadding2D(padding=(100, 100))(img_input)
# VGG-16 convolution block 1
x = Conv2D(64, (3, 3), activation='relu', padding='valid', name='conv1_1')(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x)
# VGG-16 convolution block 2
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_1')(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_2')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2', padding='same')(x)
# VGG-16 convolution block 3
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_1')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_2')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool3', padding='same')(x)
pool3 = x
# VGG-16 convolution block 4
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool4', padding='same')(x)
pool4 = x
# VGG-16 convolution block 5
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_1')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_2')(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_3')(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='pool5', padding='same')(x)
# Fully-connected layers converted to convolution layers
x = Conv2D(4096, (7, 7), activation='relu', padding='valid', name='fc6')(x)
x = Dropout(0.5)(x)
x = Conv2D(4096, (1, 1), activation='relu', padding='valid', name='fc7')(x)
x = Dropout(0.5)(x)
x = Conv2D(3, (1, 1), padding='valid', name='score-fr')(x)
print(x)
# Deconvolution
score2 = Conv2DTranspose(3, (4, 4), strides=2, name='score2')(x)
print(score2)
# Skip connections from pool4
score_pool4 = Conv2D(3, (1, 1), name='score-pool4')(pool4)
score_pool4c = Cropping2D((5, 5),name='score-pool4c')(score_pool4)
print('asd')
print(score_pool4c)
score_fused = Add()([score2, score_pool4c])
score4 = Conv2DTranspose(3, (4, 4), strides=2, name='score4', use_bias=False)(score_fused)
# Skip connections from pool3
score_pool3 = Conv2D(3, (1, 1), name='score-pool3')(pool3)
score_pool3c = Cropping2D((9, 9))(score_pool3)
# Fuse things together
score_final = Add()([score4, score_pool3c])
# Final up-sampling and cropping
upsample = Conv2DTranspose(3, (16, 16), strides=8, name='upsample', use_bias=False)(score_final)
upscore = Cropping2D(((31, 37), (31, 37)))(upsample)
output = CrfRnnLayer(image_dims=(height, weight),
num_classes=3,
theta_alpha=160.,
theta_beta=3.,
theta_gamma=3.,
num_iterations=10,
name='crfrnn')([upscore, img_input])
model = Model(img_input, output, name='crfrnn_net')
return model
import numpy as np
import tensorflow as tf
from keras.engine.topology import Layer
import high_dim_filter_loader
custom_module = high_dim_filter_loader.custom_module
def _diagonal_initializer(shape):
return np.eye(shape[0], shape[1], dtype=np.float32)
def _potts_model_initializer(shape):
return -1 * _diagonal_initializer(shape)
class CrfRnnLayer(Layer):
def __init__(self, image_dims, num_classes,
theta_alpha, theta_beta, theta_gamma,
num_iterations, **kwargs):
self.image_dims = image_dims
self.num_classes = num_classes
self.theta_alpha = theta_alpha
self.theta_beta = theta_beta
self.theta_gamma = theta_gamma
self.num_iterations = num_iterations
self.spatial_ker_weights = None
self.bilateral_ker_weights = None
self.compatibility_matrix = None
super(CrfRnnLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Weights of the spatial kernel
self.spatial_ker_weights = self.add_weight(name='spatial_ker_weights',
shape=(self.num_classes, self.num_classes),
initializer=_diagonal_initializer,
trainable=True)
# Weights of the bilateral kernel
self.bilateral_ker_weights = self.add_weight(name='bilateral_ker_weights',
shape=(self.num_classes, self.num_classes),
initializer=_diagonal_initializer,
trainable=True)
# Compatibility matrix
self.compatibility_matrix = self.add_weight(name='compatibility_matrix',
shape=(self.num_classes, self.num_classes),
initializer=_potts_model_initializer,
trainable=True)
super(CrfRnnLayer, self).build(input_shape)
def call(self, inputs):
unaries = tf.transpose(inputs[0][0, :, :, :], perm=(2, 0, 1))
rgb = tf.transpose(inputs[1][0, :, :, :], perm=(2, 0, 1))
c, h, w = self.num_classes, self.image_dims[0], self.image_dims[1]
all_ones = np.ones((c, h, w), dtype=np.float32)
# Prepare filter normalization coefficients
spatial_norm_vals = custom_module.high_dim_filter(all_ones, rgb, bilateral=False,
theta_gamma=self.theta_gamma)
bilateral_norm_vals = custom_module.high_dim_filter(all_ones, rgb, bilateral=True,
theta_alpha=self.theta_alpha,
theta_beta=self.theta_beta)
q_values = unaries
for i in range(self.num_iterations):
softmax_out = tf.nn.softmax(q_values, 0)
# Spatial filtering
spatial_out = custom_module.high_dim_filter(softmax_out, rgb, bilateral=False,
theta_gamma=self.theta_gamma)
spatial_out = spatial_out / spatial_norm_vals
# Bilateral filtering
bilateral_out = custom_module.high_dim_filter(softmax_out, rgb, bilateral=True,
theta_alpha=self.theta_alpha,
theta_beta=self.theta_beta)
bilateral_out = bilateral_out / bilateral_norm_vals
# Weighting filter outputs
message_passing = (tf.matmul(self.spatial_ker_weights,
tf.reshape(spatial_out, (c, -1))) +
tf.matmul(self.bilateral_ker_weights,
tf.reshape(bilateral_out, (c, -1))))
# Compatibility transform
pairwise = tf.matmul(self.compatibility_matrix, message_passing)
# Adding unary potentials
pairwise = tf.reshape(pairwise, (c, h, w))
q_values = unaries - pairwise
return tf.transpose(tf.reshape(q_values, (1, c, h, w)), perm=(0, 2, 3, 1))
def compute_output_shape(self, input_shape):
return input_shape
OK I think I found the problem.
From the Keras documentation:
data_format: A string, one of "channels_last" or "channels_first". The ordering of the dimensions in the inputs. "channels_last" corresponds to inputs with shape (batch, height, width, channels) while "channels_first" corresponds to inputs with shape (batch, channels, height, width). It defaults to the image_data_format value found in your Keras config file at ~/.keras/keras.json. If you never set it, then it will be "channels_last".
However, that is a dirty lie. Actually, the image_data_format can be configured from the backend. And two lines in your run_demo.py does exactly that:
from keras import backend as K
K.set_image_dim_ordering('th')
The best part? This seems to be some legacy API. When I googled the function, I can only find it in Keras 1.2.2 documentation (The current version is 2.2.4).
Consider if you really need those two lines. Actually, remove the second line anyway. If you really need it, you can add in K.set_image_data_format('channels_first'), and probably you also need to change input shape to match:
# input_shape = (height, weight, channels)
input_shape = (channels, height, weight)
I think you can try using keras' image_data_format function:
from keras import backend as K
img_width, img_height = 500, 500
if K.image_data_format() == 'channels_first':
input_shape = (3, img_width, img_height)
else:
input_shape = (img_width, img_height, 3)
Related
Problem
I am doing two classes image segmentation, and I want to use loss function of dice coefficient. However validation loss is not improved. How to Solve these problem?
what I did
Using the mothod of one-hot encoding, Processed label image and it has not include backgroung label.
Code
Shape of X is (num of data, 256, 256, 1) # graysacle
Shape of y is (num of data, 256, 256, 2) # two class and exclude background label
one_hot_y = np.zeros((len(y), image_height, image_width, 2))
for i in range(len(y)):
one_hot = to_categorical(y[i])
one_hot_y[i] = one_hot[:,:,1:]
one_hot_y.shape #-> (566, 256, 256, 2)
#### <-- Unet Model --> ####
from tensorflow import keras
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Concatenate, Conv2DTranspose
from keras import Model
def unet(image_height, image_width, num_classes):
# inputs = Input(input_size)
inputs = Input(shape=(image_height, image_width, 1),name='U-net')
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)
up6 = Concatenate()([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4])
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)
up7 = Concatenate()([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3])
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)
up8 = Concatenate()([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2])
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)
up9 = Concatenate()([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1])
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)
outputs = Conv2D(num_classes, (1, 1), activation='softmax')(conv9)
return Model(inputs=[inputs], outputs=[outputs])```
#### <-- Dice Score --> ####
from tensorflow.keras import backend as K
def dice_coef(y_true, y_pred):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + 0.0001) / (K.sum(y_true_f) + K.sum(y_pred_f) + 0.0001)
def dice_coef_loss(y_true, y_pred):
return 1 - dice_coef(y_true, y_pred)```
#### <-- Fit the Model --> ####
from tensorflow.keras import optimizers
adam = optimizers.Adam(learning_rate=0.0001)
unet_model.compile(optimizer=adam, loss=[dice_coef_loss],metrics=[dice_coef])
hist = unet_model.fit(X_train,y_train, epochs=epochs, batch_size=batch_size,validation_data=(X_val,y_val), callbacks=[checkpoint,earlystopping])
I tried to replicate your experience. I used the Oxford-IIIT Pets database whose label has three classes: 1: Foreground, 2: Background, 3: Not classified. If class 1 ("Foreground") is removed as you did, then the val_loss does not change during the iterations. On the other hand, if the "Not classified" class is removed, the optimization seems to work. The model fails to discriminate between "Background" and "Not classified", which is conceivable.
Besides, there is a small error in the calculation of the dice coefficient: In the denominator, you need to take the sum of the squares. It doesn't change anything for y_true but for y_pred it does.
I can't say why your code doesn't work, but I can tell you the way I do it. Differences are that I exclude the background and encode the target inside the dice coef calculation function.
Then I define my Dice coefficient as follows:
def dice_coef(y_true, y_pred, smooth=1):
# flatten
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
# one-hot encoding y with 3 labels : 0=background, 1=label1, 2=label2
y_true_f = K.one_hot(K.cast(y_true_f, np.uint8), 3)
y_pred_f = K.one_hot(K.cast(y_pred_f, np.uint8), 3)
# calculate intersection and union exluding background using y[:,1:]
intersection = K.sum(y_true_f[:,1:]* y_pred_f[:,1:], axis=[-1])
union = K.sum(y_true_f[:,1:], axis=[-1]) + K.sum(y_pred_f[:,1:], axis=[-1])
# apply dice formula
dice = K.mean((2. * intersection + smooth)/(union + smooth), axis=0)
return dice
def dice_loss(y_true, y_pred):
return 1-dice_coef
I was also confused about this problem until I understood the following code!!!!
import numpy as np
from PIL import Image
from keras import backend as K
def dice_loss(y_true, y_pred):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f* y_pred_f)
val = (2. * intersection + K.epsilon()) / (K.sum(y_true_f * y_true_f) + K.sum(y_pred_f * y_pred_f) + K.epsilon())
return 1. - val
arr1 = np.array([[[9.6,0.6,0.3],
[0.3,0.5,0.5]],
[[0.5,0.5,0.5],
[0.5,0.5,0.5]],
[[0.5,0.5,0.5],
[0.5,0.5,0.5]],
[[0.5,0.5,0.5],
[0.5,0.5,0.5]]])
arr2= np.array([[[9.6,0.6,0.3],
[0.3,0.5,0.5]],
[[0.5,0.5,0.5],
[0.5,0.5,0.5]],
[[0.5,0.5,0.5],
[0.5,0.5,0.5]],
[[0.5,0.5,0.5],
[0.5,0.5,0.5]]])
loss = dice_loss(arr1,arr2)
print(loss)
Lets say I have a model defined like this:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (BatchNormalization, concatenate,
Conv2D, Conv2DTranspose, DepthwiseConv2D,
Dropout, Input, MaxPooling2D,
ReLU, ZeroPadding2D)
input_layer = Input((64, 64, 3))
conv1 = Conv2D(16, (3, 3), padding="same")(input_layer)
conv1 = BatchNormalization()(conv1)
conv1 = ReLU()(conv1)
pool1 = MaxPooling2D((2,2))(conv1)
conv2 = Conv2D(32, (3, 3), padding="same")(pool1)
conv2 = BatchNormalization()(conv2)
conv2 = ReLU()(conv2)
pool2 = MaxPooling2D((2,2))(conv2)
conv3 = Conv2D(64, (3, 3), padding="same")(pool2)
conv3 = BatchNormalization()(conv3)
conv3 = ReLU()(conv3)
pool3 = MaxPooling2D((2,2))(conv3)
mid = Conv2D(128, (3, 3), padding="same")(pool3)
mid = BatchNormalization()(mid)
mid = ReLU()(mid)
dconv3 = Conv2DTranspose(64, (3, 3), strides=(2, 2), padding="same")(mid)
cat3 = concatenate([dconv3, conv3])
dconv2 = Conv2DTranspose(32, (3, 3), strides=(2, 2), padding="same")(dconv3)
cat2 = concatenate([dconv2, conv2])
dconv1 = Conv2DTranspose(16, (3, 3), strides=(2, 2), padding="same")(dconv2)
cat1 = concatenate([dconv1, conv1])
output_layer = Conv2D(1, (1,1), padding="same", activation="sigmoid")(dconv1)
model = Model(input_layer, output_layer)
The model is a very simple UNET which requires that the down sample blocks be concatenated with the upsample blocks. Lets now imagine that I want to define this exact model but with some arbitrary depth aka 2, 3, 4, 5 etc.. downsample and upsample blocks. Instead of having to go in and manually modify the parameters, I would like to automate the model building.
I am very close to accomplishing this, but I fail during concatenation. See below.
class configurable_model():
def __init__(self, csize, channels, start_neurons, depth):
self.csize = csize
self.channels = channels
self.start_neurons = start_neurons
self.depth = depth
def _convblock(self, factor, name):
layer = Sequential(name=name)
layer.add(Conv2D(self.start_neurons * factor, (3, 3), padding="same"))
layer.add(BatchNormalization())
layer.add(ReLU())
return layer
def build_model(self):
model = Sequential()
model.add(Input((self.csize, self.csize, self.channels), name='input'))
factor = 1
for idx in range(self.depth):
model.add(self._convblock(factor, f'downblock{idx}'))
model.add(MaxPooling2D((2,2)))
factor *= 2
model.add(self._convblock(factor, name='middle'))
for idx in reversed(range(self.depth)):
factor //= 2
model.add(Conv2DTranspose(self.start_neurons * factor, (3, 3),
strides=(2, 2), padding="same",
name=f'upblock{idx}'))
#how do I do the concatenation??
model.add(concatenate([model.get_layer(f'upblock{idx}'),
model.get_layer(f'downblock{idx}')]))
model.add(Conv2D(1, (1,1), padding="same",
activation="sigmoid", name='output'))
return model
test = configurable_model(64, 3, 16, 3)
model = test.build_model()
I have tried converting to the functional API, but run into the problem of 'naming' the layers and keeping track of them in the for loops. I tried Concatenate instead of concatenate. I tried model.get_layer('layername').output and model.get_layer('layername').output() in the concatenate statement, etc... nothing is working. The code above gives the error: ValueError: A Concatenate layer should be called on a list of at least 2 inputs.
I was able to get the functional version working by storing the downblocks in a dictionary that I reference later during concatenation. See below:
class configurable_model():
def __init__(self, csize, channels, start_neurons, depth):
self.csize = csize
self.channels = channels
self.start_neurons = start_neurons
self.depth = depth
def _convblock(self, factor, name=None):
block = Sequential(name=name)
block.add(Conv2D(self.start_neurons * factor, (3, 3), padding="same"))
block.add(BatchNormalization())
block.add(ReLU())
block.add(Conv2D(self.start_neurons * factor, (3, 3), padding="same"))
block.add(BatchNormalization())
block.add(ReLU())
return block
def build_model(self):
input_layer = Input((self.csize, self.csize, self.channels), name='input')
x = input_layer
factor = 1
downblocks = {}
for idx in range(self.depth):
x = self._convblock(factor, f'downblock{idx}')(x)
downblocks[f'downblock{idx}'] = x
x = MaxPooling2D((2, 2), name=f'maxpool{idx}')(x)
factor *= 2
x = self._convblock(factor, 'Middle')(x)
for idx in reversed(range(self.depth)):
factor //= 2
x = Conv2DTranspose(self.start_neurons * factor, (3, 3),
strides=(2, 2), padding="same",
name=f'upsample{idx}')(x)
cat = concatenate([x, downblocks[f'downblock{idx}']])
x = self._convblock(factor, f'upblock{idx}')(cat)
output_layer = Conv2D(1, (1, 1), padding="same",
activation="sigmoid", name='output')(x)
return Model(input_layer, output_layer)
This question already has an answer here:
ValueError: A merge layer should be called on a list of inputs. Add()
(1 answer)
Closed 3 years ago.
# import the necessary packages
import keras
from keras.initializers import glorot_uniform
from keras.layers import AveragePooling2D, Input, Add
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
class SmallerVGGNet:
#staticmethod
def build(width, height, depth, classes, finalact):
X1 = Input(shape=(height, width, depth))
# # CONV => RELU => POOL
X = Conv2D(16, (3, 3), padding="same", strides=(1, 1), name="con_layer1")(X1)
X = BatchNormalization(axis=3)(X)
X = Activation("relu")(X)
X = MaxPooling2D(pool_size=(3, 3), strides=(1, 1))(X)
X = Conv2D(32, (3, 3), padding="same", strides=(2, 2), name="con_layer2")(X)
X = BatchNormalization(axis=3)(X)
X = Activation("relu")(X)
X = Conv2D(32, (3, 3), padding="same", strides=(1, 1), name="con_layer3")(X)
X = Activation("relu")(X)
X = BatchNormalization(axis=3)(X)
X = MaxPooling2D(pool_size=(3, 3), strides=(1, 1))(X)
# First component
X0 = Conv2D(256, (5, 5), strides=(1, 1), padding='same', kernel_initializer=glorot_uniform(seed=0))(X)
X0 = BatchNormalization(axis=3)(X0)
X0 = Activation("relu")(X0)
# (CONV => RELU) * 2 => POOL
X = Conv2D(64, (3, 3), padding="same", strides=(2, 2), name="con_layer4")(X0)
X = BatchNormalization(axis=3)(X)
X = Activation("relu")(X)
X = Conv2D(64, (3, 3), padding="same", strides=(1, 1), name="con_layer5")(X)
X = BatchNormalization(axis=3)(X)
X = Activation("relu")(X)
X = AveragePooling2D(pool_size=(3, 3), strides=(1, 1))(X)
# Second Component
X0 = Conv2D(512, (5, 5), strides=(1, 1), padding='valid', kernel_initializer=glorot_uniform(seed=0))(X)
X0 = BatchNormalization(axis=3)(X0)
X0 = Activation("relu")(X0)
# (CONV => RELU) * 2 => POOL
X = Conv2D(128, (3, 3), padding="same", strides=(2, 2), name="con_layer6")(X0)
X = BatchNormalization(axis=3)(X)
X = Activation("relu")(X)
X = Conv2D(128, (3, 3), padding="same", strides=(1, 1), name="con_layer7")(X)
X = BatchNormalization(axis=3)(X)
X = Activation("relu")(X)
X = MaxPooling2D(pool_size=(3, 3), strides=(1, 1))(X)
# Third Component
X0 = Conv2D(1024, (7, 7), strides=(2, 2), padding='valid', kernel_initializer=glorot_uniform(seed=0))(X)
X0 = BatchNormalization(axis=3)(X0)
X0 = Dense(128, activation="relu")(X0)
X0 = Activation("relu")(X0)
X = Flatten()(X1)
X = BatchNormalization()(X)
X = Dropout(0.5)(X)
output = Dense(classes, activation=finalact)(X)
model = Model(inputs=[X1], outputs=output)
print(model.summary())
return model
In the residual networks it should linked the normal layers with the residual or convolutional blocks. According to my code "X" are the normal layers and "X0" are the residual blocks. At the end i want to add these layers together. How to add these two layers together including a a relu activation function.
X0 is your residual block and X is your normal layer. First import from keras import layers then do layers.add([X,X0])
I'm having trouble with this autoencoder I'm building using Keras. The input's shape is dependent on the screen size, and the output is going to be a prediction of the next screen size... However there seems to be an error that I cannot figure out... Please excuse my awful formatting on this website...
Code:
def model_build():
input_img = InputLayer(shape=(1, env_size()[1], env_size()[0]))
x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
model = Model(input_img, decoded)
return model
if __name__ == '__main__':
model = model_build()
model.compile('adam', 'mean_squared_error')
y = np.array([env()])
print(y.shape)
print(y.ndim)
debug = model.fit(np.array([[env()]]), np.array([[env()]]))
Error:
Traceback (most recent call last):
File "/home/ai/Desktop/algernon-test/rewarders.py", line 46, in
debug = model.fit(np.array([[env()]]), np.array([[env()]]))
File "/home/ai/.local/lib/python3.6/site-packages/keras/engine/training.py", line 952, in fit
batch_size=batch_size)
File "/home/ai/.local/lib/python3.6/site-packages/keras/engine/training.py", line 789, in _standardize_user_data
exception_prefix='target')
File "/home/ai/.local/lib/python3.6/site-packages/keras/engine/training_utils.py", line 138, in standardize_input_data
str(data_shape))
ValueError: Error when checking target: expected conv2d_7 to have shape (4, 268, 1) but got array with shape (1, 270, 480)
EDIT:
Code for get_screen imported as env():
def get_screen():
img = screen.grab()
img = img.resize(screen_size())
img = img.convert('L')
img = np.array(img)
return img
You have three 2x downsampling steps, and three x2 upsampling steps. These steps have no knowledge of the original image size, so they will round out the size to the nearest multiple of 8 = 2^3.
cropX = 7 - ((size[0]+7) % 8)
cropY = 7 - ((size[1]+7) % 8)
cropX = 7 - ((npix+7) % 8)
cropY = 7 - ((nlin+7) % 8)
It ought to work if you add a new final layer...
decoded = layers.Cropping2D(((0,cropY),(0,cropX)))(x)
Looks like env_size() and env() mess image dimensions somehow. Consider this example:
image1 = np.random.rand(1, 1, 270, 480) #First dimension is batch size for test purpose
image2 = np.random.rand(1, 4, 268, 1) #Or any other arbitrary dimensions
input_img = layers.Input(shape=image1[0].shape)
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = layers.UpSampling2D((2, 2))(x)
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = layers.UpSampling2D((2, 2))(x)
x = layers.Conv2D(32, (3, 3), activation='relu')(x)
x = layers.UpSampling2D((2, 2))(x)
decoded = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
model = tf.keras.Model(input_img, decoded)
model.compile('adam', 'mean_squared_error')
model.summary()
This line will work:
model.fit(image1, nb_epoch=1, batch_size=1)
But this doesn't
model.fit(image2, nb_epoch=1, batch_size=1)
Edit:
In order to get output of the same size as input you need to calculate convolution kernel size carefully.
image1 = np.random.rand(1, 1920, 1080, 1)
input_img = layers.Input(shape=image1[0].shape)
x = layers.Conv2D(32, 3, activation='relu', padding='same')(input_img)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(16, 3, activation='relu', padding='same')(x)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(8, 3, activation='relu', padding='same')(x)
encoded = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(8, 3, activation='relu', padding='same')(encoded)
x = layers.UpSampling2D((2, 2))(x)
x = layers.Conv2D(16, 3, activation='relu', padding='same')(x)
x = layers.UpSampling2D((2, 2))(x)
x = layers.Conv2D(32, 1, activation='relu')(x) # set kernel size to 1 for example
x = layers.UpSampling2D((2, 2))(x)
decoded = layers.Conv2D(1, 3, activation='sigmoid', padding='same')(x)
model = tf.keras.Model(input_img, decoded)
model.compile('adam', 'mean_squared_error')
model.summary()
This will output same dimensions.
As per this guide http://cs231n.github.io/convolutional-networks/
We can compute the spatial size of the output volume as a function of
the input volume size (W), the receptive field size of the Conv Layer
neurons (F), the stride with which they are applied (S), and the
amount of zero padding used (P) on the border. You can convince
yourself that the correct formula for calculating how many neurons
“fit” is given by (W−F+2P)/S+1. For example for a 7x7 input and a 3x3
filter with stride 1 and pad 0 we would get a 5x5 output. With stride
2 we would get a 3x3 output.
I set the convolutional layer and the pooling layer, then deconvolute and de-pool, the input shape of 256 * 256 * 3 images, but finally there is a shape error:
def build_auto_encode_model(shape=(256,256,3)):
input_img = Input(shape=shape)
x = Convolution2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Convolution2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Convolution2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
x = Convolution2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Convolution2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Convolution2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Convolution2D(3, (3, 3), activation='sigmoid', padding='same')(x)
encoder = Model(inputs=input_img, outputs=encoded)
autoencoder = Model(inputs=input_img, outputs=decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
return encoder, autoencoder
def train_auto_encode_model(encoder_model_path="./data/encoder.h5"):
X = np.load("data/train.npy")
X_train = X[int(round(X.shape[0] * 0.2)):, :]
X_test = X[0:int(round(X.shape[0] * 0.2)), :]
encoder, autoencoder = build_auto_encode_model()
autoencoder.fit(X_train, X_train, epochs=10, batch_size=64, shuffle=True, validation_data=(X_test, X_test))
encoder.save(encoder_model_path)
Here is the error I get:
Error when checking target: expected conv2d_7 to have shape (252, 252, 3) but got array with shape (256, 256, 3)
Error trace-back:
By using autoencoder.summary() you would see that the output shape of the last Conv2D layer is (None, 252, 252, 3); so the labels of shape (256,256,3) are not compatible. The cause of this problem is that you have forgotten to set the padding argument of previous Conv2D layer. By setting it to 'same' this problem would resolve:
x = Convolution2D(16, (3, 3), activation='relu', padding='same')(x)