I need to implement a SENet (squeeze-and-excitation blocks) in key points of my CNN VGG16. Everything runs fine but when I decode the prediction, the result I get is very strange. I give it a picture of a "panda" and it tells me that it is a "cloak". And when I test with the official VGG16 I do not have the same problem. Whether with or without the SENet block.
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Activation, Conv2D, Input, BatchNormalization, Reshape, GlobalAveragePooling2D
from tensorflow.keras.activations import sigmoid, softmax, relu, tanh
from tensorflow.keras import Sequential
import keras
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import vis
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import cv2
from keras.preprocessing import image
from keras import backend as K
from keras.applications.vgg16 import preprocess_input, decode_predictions,VGG16, preprocess_input
from vis.utils import utils
from tensorflow.keras.preprocessing.image import load_img
class SENET_Attn(Layer):
def __init__(self,out_dim, ratio, layer_name="SENET"):
super(SENET_Attn, self).__init__()
self.out_dim = out_dim
self.ratio = ratio
self.layer_name = layer_name
def build(self, ratio, layer_name="SENET"):
self.Global_Average_Pooling = GlobalAveragePooling2D(keepdims= True)
self.Fully_connected_1_1 = Dense(units= self.out_dim/self.ratio, name=self.layer_name+'_fully_connected1'
self.Relu = ReLU()
self.Fully_connected_2 = Dense(units=self.out_dim, name=layer_name+'_fully_connected2', activation = "tanh"
self.Sigmoid = Activation("sigmoid")
def call(self, inputs):
inputs = tf.cast(inputs, dtype = "float32")
squeeze = self.Global_Average_Pooling(inputs)
excitation = self.Fully_connected_1_1(squeeze)
excitation = self.Relu(excitation)
excitation = self.Fully_connected_2(excitation)
excitation = self.Sigmoid(excitation)
excitation = tf.reshape(excitation, [-1,1,1,self.out_dim])
scale = inputs * excitation
return scale
Vgg = VGG16(weights='imagenet', include_top=True)
# SENET-RATIO
ratio = 8
input_layer = tf.keras.Input(shape=(224,224,3))
out = Vgg.layers[1](input_layer) # Block 1 of VGG16
out = Vgg.layers[2](out)
out = Vgg.layers[3](out)
#out = SENET_Attn(out.shape[-1], ratio, )(out) # SENET Attention
out = Vgg.layers[4](out) # Block 2 of VGG16
out = Vgg.layers[5](out)
out = Vgg.layers[6](out)
#out = SENET_Attn(out.shape[-1], ratio, )(out) # SENET Attention
out = Vgg.layers[7](out) # Block 3 of VGG16
out = Vgg.layers[8](out)
out = Vgg.layers[9](out)
out = Vgg.layers[10](out)
#out = SENET_Attn(out.shape[-1], ratio, )(out) # SENET Attention
out = Vgg.layers[11](out) # Block 4 of VGG16
out = Vgg.layers[12](out)
out = Vgg.layers[13](out)
out = Vgg.layers[14](out)
#out = SENET_Attn(out.shape[-1], ratio, )(out) # SENET Attention
out = Vgg.layers[15](out) #Block 5 of VGG16
out = Vgg.layers[16](out)
out = Vgg.layers[17](out)
out = Vgg.layers[18](out)
#out = SENET_Attn(out.shape[-1], ratio, )(out) # SENET Attention
flatten = Flatten()(out)
out = Dense(4096, activation='relu')(flatten)
out = Dropout(0.5)(out)
out = Dense(4096, activation='relu')(out)
out = Dropout(0.5)(out)
out = Dense(1000, activation='softmax')(out)
model = tf.keras.Model(inputs=input_layer, outputs= out)
model.compile('adam', loss ='mae', metrics=['accuracy'])
model.summary()
Model: "model_24"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_99 (InputLayer) [(None, 224, 224, 3)] 0
block1_conv1 (Conv2D) (None, 224, 224, 64) 1792
block1_conv2 (Conv2D) (None, 224, 224, 64) 36928
block1_pool (MaxPooling2D) (None, 112, 112, 64) 0
block2_conv1 (Conv2D) (None, 112, 112, 128) 73856
block2_conv2 (Conv2D) (None, 112, 112, 128) 147584
block2_pool (MaxPooling2D) (None, 56, 56, 128) 0
block3_conv1 (Conv2D) (None, 56, 56, 256) 295168
block3_conv2 (Conv2D) (None, 56, 56, 256) 590080
block3_conv3 (Conv2D) (None, 56, 56, 256) 590080
block3_pool (MaxPooling2D) (None, 28, 28, 256) 0
block4_conv1 (Conv2D) (None, 28, 28, 512) 1180160
block4_conv2 (Conv2D) (None, 28, 28, 512) 2359808
block4_conv3 (Conv2D) (None, 28, 28, 512) 2359808
block4_pool (MaxPooling2D) (None, 14, 14, 512) 0
block5_conv1 (Conv2D) (None, 14, 14, 512) 2359808
block5_conv2 (Conv2D) (None, 14, 14, 512) 2359808
block5_conv3 (Conv2D) (None, 14, 14, 512) 2359808
block5_pool (MaxPooling2D) (None, 7, 7, 512) 0
flatten_24 (Flatten) (None, 25088) 0
dense_72 (Dense) (None, 4096) 102764544
dropout_14 (Dropout) (None, 4096) 0
dense_73 (Dense) (None, 4096) 16781312
dropout_15 (Dropout) (None, 4096) 0
dense_74 (Dense) (None, 1000) 4097000
=================================================================
Total params: 138,357,544
Trainable params: 138,357,544
Non-trainable params: 0
_________________________________________________________________
img = load_img('Panda.jpg',target_size=(224,224))
x = tf.keras.utils.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
print('The most accurate possibility is :', tf.keras.applications.vgg16.decode_predictions(model.predict(x), top=3)[0])
The most accurate possibility is : [('n03045698', 'cloak', 0.9999887), ('n01692333', 'Gila_monster', 5.1300117e-06), ('n02965783', 'car_mirror', 2.17886e-06)]
I imagine that is one of the layers that is missing, but in the summary of my model it is the same layers and the same parameters. Does anyone have a solution? Thank you for your help.
Related
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class KerasSupervisedModelWrapper(keras.Model):
def __init__(self, batch_size, **kwargs):
super().__init__()
self.batch_size = batch_size
def summary(self, input_shape): # temporary fix for a bug
x = layers.Input(shape=input_shape)
model = keras.Model(inputs=[x], outputs=self.call(x))
return model.summary()
class ExampleModel(KerasSupervisedModelWrapper):
def __init__(self, batch_size):
super().__init__(batch_size)
self.conv1 = layers.Conv2D(32, kernel_size=(3, 3), activation='relu')
def call(self, x):
x = self.conv1(x)
return x
model = MyModel(15)
model.summary([28, 28, 1])
output:
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 28, 28, 1)] 0
conv2d_2 (Conv2D) (None, 26, 26, 32) 320
=================================================================
Total params: 320
Trainable params: 320
Non-trainable params: 0
_________________________________________________________________
I'm writting a wrapper for keras model to pre-define some useful method and variables as above.
And I'd like to modify the wrapper to get some layers to compose model as the keras.Sequential does.
Therefore, I added Sequential method that assigns new call method as below.
class KerasSupervisedModelWrapper(keras.Model):
...(continue)...
#staticmethod
def Sequential(layers, **kwargs):
model = KerasSupervisedModelWrapper(**kwargs)
pipe = keras.Sequential(layers)
def call(self, x):
return pipe(x)
model.call = call
return model
However, it seems not working as I intended. Instead, it shows below error message.
model = KerasSupervisedModelWrapper.Sequential([
layers.Conv2D(32, kernel_size=(3, 3), activation="relu")
], batch_size=15)
model.summary((28, 28, 1))
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/tmp/ipykernel_91471/2826773946.py in <module>
1 # model.build((None, 28, 28, 1))
2 # model.compile('adam', loss=keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])
----> 3 model.summary((28, 28, 1))
/tmp/ipykernel_91471/3696340317.py in summary(self, input_shape)
10 def summary(self, input_shape): # temporary fix for a bug
11 x = layers.Input(shape=input_shape)
---> 12 model = keras.Model(inputs=[x], outputs=self.call(x))
13 return model.summary()
14
TypeError: call() missing 1 required positional argument: 'x'
What can I do for the wrapper to get keras.Sequential model while usuing other properties?
You could try something like this:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class KerasSupervisedModelWrapper(keras.Model):
def __init__(self, batch_size, **kwargs):
super().__init__()
self.batch_size = batch_size
def summary(self, input_shape): # temporary fix for a bug
x = layers.Input(shape=input_shape)
model = keras.Model(inputs=[x], outputs=self.call(x))
return model.summary()
#staticmethod
def Sequential(layers, **kwargs):
model = KerasSupervisedModelWrapper(**kwargs)
pipe = keras.Sequential(layers)
model.call = pipe
return model
class ExampleModel(KerasSupervisedModelWrapper):
def __init__(self, batch_size):
super().__init__(batch_size)
self.conv1 = layers.Conv2D(32, kernel_size=(3, 3), activation='relu')
def call(self, x):
x = self.conv1(x)
return x
model = ExampleModel(15)
model.summary([28, 28, 1])
model = KerasSupervisedModelWrapper.Sequential([
layers.Conv2D(32, kernel_size=(3, 3), activation="relu")
], batch_size=15)
model.summary((28, 28, 1))
print(model(tf.random.normal((1, 28, 28, 1))).shape)
Model: "model_9"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_14 (InputLayer) [(None, 28, 28, 1)] 0
conv2d_17 (Conv2D) (None, 26, 26, 32) 320
=================================================================
Total params: 320
Trainable params: 320
Non-trainable params: 0
_________________________________________________________________
Model: "model_10"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_15 (InputLayer) [(None, 28, 28, 1)] 0
sequential_8 (Sequential) (None, 26, 26, 32) 320
=================================================================
Total params: 320
Trainable params: 320
Non-trainable params: 0
_________________________________________________________________
(1, 26, 26, 32)
I've defined a complex deep learning model, but for the purpose of this question, I'll use a simple one.
Consider the following:
import tensorflow as tf
from tensorflow.keras import layers, models
def simpleMLP(in_size, hidden_sizes, num_classes, dropout_prob=0.5):
in_x = layers.Input(shape=(in_size,))
hidden_x = models.Sequential(name="hidden_layers")
for i, num_h in enumerate(hidden_sizes):
hidden_x.add(layers.Dense(num_h, input_shape=(in_size,) if i == 0 else []))
hidden_x.add(layers.Activation('relu'))
hidden_x.add(layers.Dropout(dropout_prob))
out_x = layers.Dense(num_classes, activation='softmax', name='baseline')
return models.Model(inputs=in_x, outputs=out_x(hidden_x(in_x)))
I will call the function in the following manner:
mdl = simpleMLP(28*28, [500, 300], 10)
Now when I do mdl.summary() I get the following:
Model: "functional_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 784)] 0
_________________________________________________________________
hidden_layers (Sequential) (None, 300) 542800
_________________________________________________________________
baseline (Dense) (None, 10) 3010
=================================================================
Total params: 545,810
Trainable params: 545,810
Non-trainable params: 0
_________________________________________________________________
The problem is that the Sequential block is condensed and showing only the last layer but the sum total of parameters.
In my complex model, I have multiple Sequential blocks that are all hidden.
Is there a way to make it be more verbose? Am I doing something wrong in the model definition?
Edit
When using pytorch I don't see the same behaviour, given the following example (taken from here):
import torch
import torch.nn as nn
class MyCNNClassifier(nn.Module):
def __init__(self, in_c, n_classes):
super().__init__()
self.conv_block1 = nn.Sequential(
nn.Conv2d(in_c, 32, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(32),
nn.ReLU()
)
self.conv_block2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64),
nn.ReLU()
)
self.decoder = nn.Sequential(
nn.Linear(32 * 28 * 28, 1024),
nn.Sigmoid(),
nn.Linear(1024, n_classes)
)
def forward(self, x):
x = self.conv_block1(x)
x = self.conv_block2(x)
x = x.view(x.size(0), -1) # flat
x = self.decoder(x)
return x
When printing it I get:
MyCNNClassifier(
(conv_block1): Sequential(
(0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(conv_block2): Sequential(
(0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
)
(decoder): Sequential(
(0): Linear(in_features=25088, out_features=1024, bias=True)
(1): Sigmoid()
(2): Linear(in_features=1024, out_features=10, bias=True)
)
)
There is nothing wrong in model summary in Tensorflow 2.x.
import tensorflow as tf
from tensorflow.keras import layers, models
def simpleMLP(in_size, hidden_sizes, num_classes, dropout_prob=0.5):
in_x = layers.Input(shape=(in_size,))
hidden_x = models.Sequential(name="hidden_layers")
for i, num_h in enumerate(hidden_sizes):
hidden_x.add(layers.Dense(num_h, input_shape=(in_size,) if i == 0 else []))
hidden_x.add(layers.Activation('relu'))
hidden_x.add(layers.Dropout(dropout_prob))
out_x = layers.Dense(num_classes, activation='softmax', name='baseline')
return models.Model(inputs=in_x, outputs=out_x(hidden_x(in_x)))
mdl = simpleMLP(28*28, [500, 300], 10)
mdl.summary()
Output:
Model: "functional_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 784)] 0
_________________________________________________________________
hidden_layers (Sequential) (None, 300) 542800
_________________________________________________________________
baseline (Dense) (None, 10) 3010
=================================================================
Total params: 545,810
Trainable params: 545,810
Non-trainable params: 0
_________________________________________________________________
You can use get_layer to retrieve a layer on either its name or index.
If name and index are both provided, index will take precedence.
Indices are based on order of horizontal graph traversal (bottom-up).
Here to get Sequential layer (i.e. indexed at 1 in mdl) details, you can try
mdl.get_layer(index=1).summary()
Output:
Model: "hidden_layers"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_2 (Dense) (None, 500) 392500
_________________________________________________________________
activation_2 (Activation) (None, 500) 0
_________________________________________________________________
dropout_2 (Dropout) (None, 500) 0
_________________________________________________________________
dense_3 (Dense) (None, 300) 150300
_________________________________________________________________
activation_3 (Activation) (None, 300) 0
_________________________________________________________________
dropout_3 (Dropout) (None, 300) 0
=================================================================
Total params: 542,800
Trainable params: 542,800
Non-trainable params: 0
_________________________________________________________________
I'm constructing an encoder-decoder using a BLSTM to model word inflection generation.
I'm not sure why I am getting the titular error message at the model.fit step. I am passing in a matrix of integer-encoded word vectors, but I was under the impression that my input would be converted to three dimensions when passed through the Embedding layer.
encoder_inputs = Input(shape=(enc_len,))
encoder_embedding = Embedding(vocab_size, 100, mask_zero=True)(encoder_inputs)
encoder_outputs = Bidirectional(LSTM(100))(encoder_embedding)
e = Dense(200)(encoder_outputs)
e = RepeatVector(35)(e)
decoder_inputs_lemm = Input(shape=(dec_len,))
decoder_inputs_infl = Input(shape=(dec_len,))
embedding_layer = Embedding(vocab_size, 100) # shared weights
decoder_embedding_lemm = embedding_layer(decoder_inputs_lemm)
decoder_embedding_infl = embedding_layer(decoder_inputs_infl)
concat = Concatenate()([decoder_embedding_lemm, decoder_embedding_infl, e])
decoder_outputs = LSTM(100, return_sequences=True)(concat)
decoder_outputs = TimeDistributed(Dense(dec_len, activation='softmax'))(decoder_outputs)
# prepare input data
enc_lemma = pad_sequences([x[0] for x in data['train']], enc_len, padding='pre')
dec_lemma = pad_sequences([x[0] for x in data['train']], dec_len, padding='post')
dec_infl_shifted = pad_sequences([x[1] for x in data['train']], enc_len, padding='post')
dec_infl_shifted = np.hstack((np.full((dec_infl_shifted.shape[0], 1), 2), dec_infl_shifted))
dec_infl_target = pad_sequences([x[1] for x in data['train']], enc_len, padding='post') # not shifted
dec_infl_target = np.hstack((dec_infl_target, np.full((dec_infl_target.shape[0], 1), 0)))
model = Model([encoder_inputs, decoder_inputs_lemm, decoder_inputs_infl], decoder_outputs)
model.compile(optimizer='adadelta', loss='categorical_crossentropy')
model.fit([enc_lemma, dec_lemma, dec_infl_shifted], dec_infl_target, epochs=30, verbose=1)
Here is the summary:
Model: "model_1"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) (None, 34) 0
__________________________________________________________________________________________________
embedding_1 (Embedding) (None, 34, 100) 6300 input_1[0][0]
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 200) 160800 embedding_1[0][0]
__________________________________________________________________________________________________
input_2 (InputLayer) (None, 35) 0
__________________________________________________________________________________________________
input_3 (InputLayer) (None, 35) 0
__________________________________________________________________________________________________
dense_1 (Dense) (None, 200) 40200 bidirectional_1[0][0]
__________________________________________________________________________________________________
embedding_2 (Embedding) (None, 35, 100) 6300 input_2[0][0]
input_3[0][0]
__________________________________________________________________________________________________
repeat_vector_1 (RepeatVector) (None, 35, 200) 0 dense_1[0][0]
__________________________________________________________________________________________________
concatenate_1 (Concatenate) (None, 35, 400) 0 embedding_2[0][0]
embedding_2[1][0]
repeat_vector_1[0][0]
__________________________________________________________________________________________________
lstm_2 (LSTM) (None, 35, 100) 200400 concatenate_1[0][0]
__________________________________________________________________________________________________
time_distributed_1 (TimeDistrib (None, 35, 35) 3535 lstm_2[0][0]
==================================================================================================
Total params: 417,535
Trainable params: 417,535
Non-trainable params: 0
__________________________________________________________________________________________________
None
I am currently looking into CycleGAN and im using simontomaskarlssons github repository as my baseline. My problem arises when the training is done and I want to use the saved model to generate new samples. Here the model architecture for the loaded model are different from the initialized generator. The direct link for the saveModel function is here.
When I initialize the generator that does the translation from domain A to B the summary looks like the following (line in github). This is as expected since my input image is (140,140,1) and I am expecting an output image as (140,140,1):
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_5 (InputLayer) (None, 140, 140, 1) 0
__________________________________________________________________________________________________
reflection_padding2d_1 (Reflect (None, 146, 146, 1) 0 input_5[0][0]
__________________________________________________________________________________________________
conv2d_9 (Conv2D) (None, 140, 140, 32) 1600 reflection_padding2d_1[0][0]
__________________________________________________________________________________________________
instance_normalization_5 (Insta (None, 140, 140, 32) 64 conv2d_9[0][0]
__________________________________________________________________________________________________
...
__________________________________________________________________________________________________
activation_12 (Activation) (None, 140, 140, 32) 0 instance_normalization_23[0][0]
__________________________________________________________________________________________________
reflection_padding2d_16 (Reflec (None, 146, 146, 32) 0 activation_12[0][0]
__________________________________________________________________________________________________
conv2d_26 (Conv2D) (None, 140, 140, 1) 1569 reflection_padding2d_16[0][0]
__________________________________________________________________________________________________
activation_13 (Activation) (None, 140, 140, 1) 0 conv2d_26[0][0]
==================================================================================================
Total params: 2,258,177
Trainable params: 2,258,177
Non-trainable params: 0
When the training is done I want to load the saved models to generate new samples (translation from domain A to domain B). In this case it does not matter if the model is successful at translating the images or not. I load the model with the following code:
# load json and create model
json_file = open('G_A2B_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json, custom_objects={'ReflectionPadding2D': ReflectionPadding2D, 'InstanceNormalization': InstanceNormalization})
or the following which gives the same result.
loaded_model = load_model('G_A2B_model.h5', custom_objects={'ReflectionPadding2D': ReflectionPadding2D, 'InstanceNormalization': InstanceNormalization})
Where ReflectionPadding2D is initialized as (note that I have a separate file for loading the model then for training CycleGAN):
# reflection padding taken from
# https://github.com/fastai/courses/blob/master/deeplearning2/neural-style.ipynb
class ReflectionPadding2D(Layer):
def __init__(self, padding=(1, 1), **kwargs):
self.padding = tuple(padding)
self.input_spec = [InputSpec(ndim=4)]
super(ReflectionPadding2D, self).__init__(**kwargs)
def compute_output_shape(self, s):
return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3])
def call(self, x, mask=None):
w_pad, h_pad = self.padding
return tf.pad(x, [[0, 0], [h_pad, h_pad], [w_pad, w_pad], [0, 0]], 'REFLECT')
Now that my model is loaded I want to translate images from domain A to domain B. Here I expected the output shape to be (140,140,1) but surprisingly it is (132,132,1). I checked the architecture summary for G_A2B_model that clearly shows that the output is of shape (132,132,1):
Model: "G_A2B_model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_5 (InputLayer) (None, 140, 140, 1) 0
__________________________________________________________________________________________________
reflection_padding2d_1 (Reflect (None, 142, 142, 1) 0 input_5[0][0]
__________________________________________________________________________________________________
conv2d_9 (Conv2D) (None, 136, 136, 32) 1600 reflection_padding2d_1[0][0]
__________________________________________________________________________________________________
instance_normalization_5 (Insta (None, 136, 136, 32) 64 conv2d_9[0][0]
__________________________________________________________________________________________________
...
__________________________________________________________________________________________________
instance_normalization_23 (Inst (None, 136, 136, 32) 64 conv2d_transpose_2[0][0]
__________________________________________________________________________________________________
activation_12 (Activation) (None, 136, 136, 32) 0 instance_normalization_23[0][0]
__________________________________________________________________________________________________
reflection_padding2d_16 (Reflec (None, 138, 138, 32) 0 activation_12[0][0]
__________________________________________________________________________________________________
conv2d_26 (Conv2D) (None, 132, 132, 1) 1569 reflection_padding2d_16[0][0]
__________________________________________________________________________________________________
activation_13 (Activation) (None, 132, 132, 1) 0 conv2d_26[0][0]
==================================================================================================
Total params: 2,258,177
Trainable params: 2,258,177
Non-trainable params: 0
What I don't understand is why the output shape is (132x132x1). But I can see that hte problem arises in the reflectionPadding2D where the output shape of the initialized generator is (146,146,1) and the output shape of save generator is (142,142,1). But I have no idea why this is happening? Because they should in theory be the same size.
When you persist your architecture using model.to_json, the method get_config is called so that the layer attributes are saved as well. As you are using a custom class without that method, the default value for padding is being used when you call model_from_json.
Using the following code for ReflectionPadding2D should solve your problem, just run the training step again and reload the model.
class ReflectionPadding2D(Layer):
def __init__(self, padding=(1,1), **kwargs):
self.padding = tuple(padding)
super(ReflectionPadding2D, self).__init__(**kwargs)
def compute_output_shape(self, s):
return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3])
def call(self, x, mask=None):
w_pad, h_pad = self.padding
return tf.pad(x, [[0, 0], [h_pad, h_pad], [w_pad, w_pad], [0, 0]], 'REFLECT')
# This is the relevant method that should be added
def get_config(self):
config = {
'padding': self.padding
}
base_config = super(ReflectionPadding2D, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
I am trying to implement ADDA in Keras. Here is my code :
class ADDA_Images(object):
def __init__(self,modelInput):
self.img_rows = 28
self.img_cols = 28
self.channels = 3
self.img_shape = (self.img_rows, self.img_cols, self.channels)
optimizer = opt.Adam(0.001)
self.source_generator = self.build_generator(modelInput)
self.target_generator = self.build_generator(modelInput)
outputFeatureExtraction = layers.Input(shape = self.target_generator.output_shape[1:])
self.source_classificator = self.build_classifier(outputFeatureExtraction)
self.discriminator_model = self.build_discriminator(outputFeatureExtraction)
self.discriminator_model.compile(optimizer, loss='binary_crossentropy', metrics=['acc'])
self.discriminator_model.name='disk'
input = layers.Input(shape=self.img_shape)
fe_rep = self.source_generator(input)
cl = self.source_classificator(fe_rep)
self.source_model = Model(input,cl)
self.source_model.compile(optimizer, loss='categorical_crossentropy', metrics=['acc'])
input = layers.Input(shape=self.img_shape)
fe_rep = self.target_generator(input)
cl = self.source_classificator(fe_rep)
self.target_model = Model(input, cl)
self.target_model.compile(optimizer, loss='categorical_crossentropy', metrics=['acc'])
self.combined_model = Sequential()
self.combined_model.add(self.target_generator)
self.combined_model.add(self.discriminator_model)
self.combined_model.get_layer('disk').trainable = False
self.combined_model.compile(optimizer, loss='binary_crossentropy', metrics=['acc'])
print('Source model')
self.source_model.summary()
print('Target model')
self.target_model.summary()
print('Discriminator')
self.discriminator_model.summary()
print('Combined model')
self.combined_model.summary()
def build_generator(self,modelInput):
gen = layers.Conv2D(filters=20, kernel_size=5, padding='valid')(modelInput)
gen = layers.MaxPooling2D(pool_size=2, strides=2)(gen)
gen = layers.Conv2D(filters=50, kernel_size=5, padding='valid')(gen)
gen = layers.MaxPooling2D(pool_size=2, strides=2)(gen)
gen = layers.Flatten()(gen)
model = Model(modelInput,gen)
print('Generator summary')
model.summary()
return model
def build_classifier(self,modelInput):
cl = layers.Dense(3072, activation='relu')(modelInput)
cl = layers.Dense(2048, activation='relu')(cl)
cl = layers.Dense(10, activation='softmax')(cl)
model = Model(modelInput,cl)
print('Classificatior summary')
model.summary()
return model
def build_discriminator(self,modelInput):
disc = layers.Dense(500, activation='relu')(modelInput)
disc = layers.Dense(500, activation='relu')(disc)
disc = layers.Dense(2, activation='softmax')(disc)
model = Model(modelInput,disc)
print('Discriminator summary')
model.summary()
return model
But, it seems that target_generator is not connected to target model. I loaded target model from pretrained source model and then train discriminator and combined model in ADDA way. But, target model is not changed. It has same predictions (accs and losses) as source model all the time.
Here is summary of models :
Source model
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) (None, 28, 28, 3) 0
_________________________________________________________________
model_1 (Model) (None, 800) 26570
_________________________________________________________________
model_3 (Model) (None, 10) 8774666
=================================================================
Total params: 8,801,236
Trainable params: 8,801,236
Non-trainable params: 0
_________________________________________________________________
Target model
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_3 (InputLayer) (None, 28, 28, 3) 0
_________________________________________________________________
model_2 (Model) (None, 800) 26570
_________________________________________________________________
model_3 (Model) (None, 10) 8774666
=================================================================
Total params: 8,801,236
Trainable params: 8,801,236
Non-trainable params: 0
_________________________________________________________________
Discriminator
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 800) 0
_________________________________________________________________
dense_4 (Dense) (None, 500) 400500
_________________________________________________________________
dense_5 (Dense) (None, 500) 250500
_________________________________________________________________
dense_6 (Dense) (None, 2) 1002
=================================================================
Total params: 1,304,004
Trainable params: 652,002
Non-trainable params: 652,002
_________________________________________________________________
Combined model
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
model_2 (Model) (None, 800) 26570
_________________________________________________________________
disk (Model) (None, 2) 652002
=================================================================
Total params: 678,572
Trainable params: 26,570
Non-trainable params: 652,002
I validated outputs from target_model's second layer (it should be target_generator by specification) and it is not same as output of target_generator (on same input). So, it seems that those two models are not connected as reported in summaries.
Can someone help me to figure out what is wrong?
I am using Keras 2, Tensorflow backend.
Problem was in the training part - I loaded into the target model pretrained source model (load_model) and that made problems because it changed reference to generator model. Instead of load_model, I should use load_weights
So, loading pretrained model which works and not make problems with references is :
source_model = load_model(modelName)
target_model.set_weights(source_model.get_weights())