I have a batch of images thus the shape [None, 256, 256, 3] (the batch is set to none for practical purposes on use).
I am trying to implement a layer that calculates the average of each of the of images or frames in the batch to result the shape [None, 1] or [None, 1, 1, 1]. I have checked to use tf.keras.layers.Average, but apparently it calculates across the batch, returning a tensor of the same shape.
In hindsight I tried implementing the following custom layer:
class ElementMean(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(ElementMean, self).__init__(**kwargs)
def call(self, inputs):
tensors = []
for ii in range(inputs.shape[0] if inputs.shape[0] is not None else 1):
tensors.append(inputs[ii, ...])
return tf.keras.layers.Average()(tensors)
but when it is used:
import tensorflow as tf
x = tf.keras.Input([256, 256, 3], None)
y = ElementMean()(x)
model = tf.keras.Model(inputs=x, outputs=y)
model.compile()
model.summary()
tf.keras.utils.plot_model(
model,
show_shapes=True,
show_dtype=True,
show_layer_activations=True,
show_layer_names=True
)
I get the result:
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 256, 256, 3)] 0
element_mean (ElementMean) (256, 256, 3) 0
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________
Which makes it entirely wrong.
I also tried this change on the call:
def call(self, inputs):
tensors = []
for ii in range(inputs.shape[0] if inputs.shape[0] is not None else 1):
tensors.append(tf.reduce_mean(inputs[ii, ...]))
return tf.convert_to_tensor(tensors)
Which in turn results to:
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 256, 256, 3)] 0
element_mean (ElementMean) (1,) 0
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________
Which is also wrong.
You can play around with the axes like this:
import tensorflow as tf
class ElementMean(tf.keras.layers.Layer):
def __init__(self, **kwargs):
super(ElementMean, self).__init__(**kwargs)
def call(self, inputs):
return tf.reduce_mean(inputs, axis=(1, 2, 3), keepdims=True)
x = tf.keras.layers.Input([256, 256, 3], None)
em = ElementMean()
y = em(x)
model = tf.keras.Model(x, y)
model.summary()
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 256, 256, 3)] 0
element_mean_1 (ElementMean (None, 1, 1, 1) 0
)
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________
there is another way with segment means that allowed you to segment by heights, widths, and channels by remain its properties.
Sample: Width x Height x Channels, mean of each channel represent its data as mean value and you may summarize them later.
import os
from os.path import exists
import tensorflow as tf
import tensorflow_io as tfio
import matplotlib.pyplot as plt
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
PATH = os.path.join('F:\\datasets\\downloads\\Actors\\train\\Pikaploy', '*.tif')
files = tf.data.Dataset.list_files(PATH)
list_file = []
for file in files.take(1):
image = tf.io.read_file( file )
image = tfio.experimental.image.decode_tiff(image, index=0)
image = tf.image.resize(image, [28,32], method='nearest')
list_file.append( image )
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Definitions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MyDenseLayer(tf.keras.layers.Layer):
def __init__(self, num_outputs):
super(MyDenseLayer, self).__init__()
self.num_outputs = num_outputs
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_outputs])
def call(self, inputs):
temp = tf.transpose( tf.constant(tf.cast(list_file, dtype=tf.int64), shape=(28, 32, 4), dtype=tf.int64) )
temp = tf.transpose( temp )
mean = tf.constant( tf.math.segment_mean( temp, tf.ones([28], dtype=tf.int64)).numpy() )
temp = tf.image.rot90(temp)
mean = tf.constant( tf.math.segment_mean( tf.constant(mean[1::], shape=(32, 4)), tf.ones([32], dtype=tf.int64)).numpy() )
return mean[1::]
layer = MyDenseLayer(10)
sample = tf.transpose( tf.constant(tf.cast(list_file, dtype=tf.int64), shape=(28, 32, 4), dtype=tf.int64) )
data = layer(sample)
print( data )
Output: Rx Gx Bx Yx
tf.Tensor([[161 166 171 255]], shape=(1, 4), dtype=int64)
Related
I am trying to create a DenseUnet neural network using the tensorflow subclass api.
To implement a dense encoder, I have imported a dense network using:
densenet121 = tf.keras.applications.DenseNet121(include_top=False,weights="imagenet",input_shape = (HEIGHT,WIDTH,3))
I then trimmed the network to my needs and passed it to the model class.
models.Model(inputs=densenet121.input, outputs=densenet121.get_layer("pool4_relu").output)
model = DensePretrainedDepthEstimationModel(input_shape=(HEIGHT, WIDTH, 3), feature_extractor=densenet)
In the class, I implemented skip connections for the decoder and a custom training loop.
class DensePretrainedDepthEstimationModel(tf.keras.Model):
def __init__(self, input_shape, feature_extractor, ssim_loss_weight = 0.85,
l1_loss_weight = 0.1, edge_loss_weight = 0.9):
super(DensePretrainedDepthEstimationModel, self).__init__(name = "Test_Model")
self.ssim_loss_weight = ssim_loss_weight
self.l1_loss_weight = l1_loss_weight
self.edge_loss_weight = edge_loss_weight
self.loss_metric = tf.keras.metrics.Mean(name="loss")
f = [16, 32, 64, 128, 256]
self.densenet = feature_extractor
self.densenet._name = 'dense_net'
self.densenet.trainable = False
self.s1 =self.densenet.layers[0].output ## 256
self.s2 = self.densenet.get_layer("conv1/relu").output ## 128
self.s3 = self.densenet.get_layer("pool2_relu").output ## 64
self.s4 = self.densenet.get_layer("pool3_relu").output ## 32
self.upscale_blocks = [
UpscaleBlock(f[3]),
UpscaleBlock(f[2]),
UpscaleBlock(f[1]),
UpscaleBlock(f[0]),
]
self.conv_layer = layers.Conv2D(1, (1, 1), padding="same", activation="tanh")
self.conv_layer._name = 'out'
def calculate_loss(self, target, pred):
# Edges
dy_true, dx_true = tf.image.image_gradients(target)
dy_pred, dx_pred = tf.image.image_gradients(pred)
weights_x = tf.exp(tf.reduce_mean(tf.abs(dx_true)))
weights_y = tf.exp(tf.reduce_mean(tf.abs(dy_true)))
# Depth smoothness
smoothness_x = dx_pred * weights_x
smoothness_y = dy_pred * weights_y
depth_smoothness_loss = tf.reduce_mean(abs(smoothness_x)) + tf.reduce_mean(
abs(smoothness_y)
)
# Structural similarity (SSIM) index
ssim_loss = tf.reduce_mean(
1
- tf.image.ssim(
target, pred, max_val=WIDTH, filter_size=7, k1=0.01 ** 2, k2=0.03 ** 2
)
)
# Point-wise depth
l1_loss = tf.reduce_mean(tf.abs(target - pred))
loss = (
(self.ssim_loss_weight * ssim_loss)
+ (self.l1_loss_weight * l1_loss)
+ (self.edge_loss_weight * depth_smoothness_loss)
)
return loss, l1_loss
#property
def metrics(self):
return [self.loss_metric]
#tf.function
def train_step(self, batch_data):
input, target = batch_data
with tf.GradientTape() as tape:
tape.watch(input)
pred = self(input, training=True)
loss, l1_loss = self.calculate_loss(target, pred)
gradients = tape.gradient(loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(gradients, self.trainable_weights))
self.loss_metric.update_state(loss)
return {
"loss": self.loss_metric.result(),
"Point-wise depth": l1_loss,
}
def test_step(self, batch_data):
input, target = batch_data
pred = self(input, training=False)
loss, l1_loss = self.calculate_loss(target, pred)
self.loss_metric.update_state(loss)
return {
"loss": self.loss_metric.result(),
"Point-wise depth": l1_loss,
}
def call(self, x):
d1 = self.densenet(x)
u1 = self.upscale_blocks[0](d1, self.s4)
u2 = self.upscale_blocks[1](u1, self.s3)
u3 = self.upscale_blocks[2](u2, self.s2)
u4 = self.upscale_blocks[3](u3, self.s1)
return self.conv_layer(u4)
Everything works fine when I use custom layers or standard layers as an encoder, but when I try to use a pre-trained model, as I did in this case, I get an error when calculating gradients:
AttributeError: in user code:
File "/usr/local/lib/python3.7/dist-packages/keras/engine/training.py", line 1021, in train_function *
return step_function(self, iterator)
File "<ipython-input-14-23ed6f014a3d>", line 80, in train_step *
gradients = tape.gradient(loss, self.trainable_weights)
AttributeError: 'KerasTensor' object has no attribute '_id'
I think I did something wrong when getting the output for skip connections. I would appreciate any help you can provide.
The compile and build stages of the network are performed here:
optimizer = tf.keras.optimizers.Adam(
learning_rate=LR,
amsgrad=False,
)
train_loader = DataGenerator(
data=df[:260].reset_index(drop="true"), batch_size=BATCH_SIZE, dim=(HEIGHT, WIDTH)
)
validation_loader = DataGenerator(
data=df[260:].reset_index(drop="true"), batch_size=BATCH_SIZE, dim=(HEIGHT, WIDTH)
)
# Define the loss function
cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True, reduction="none"
)
densenet121 = tf.keras.applications.DenseNet121(include_top=False,
weights="imagenet",
input_shape = (HEIGHT,WIDTH,3))
densenet = models.Model(inputs=densenet121.input,
outputs=densenet121.get_layer("pool4_relu").output)
model = DensePretrainedDepthEstimationModel(input_shape=(HEIGHT, WIDTH, 3),
feature_extractor=densenet)
model.build(input_shape=(None, HEIGHT, WIDTH, 3))
model.summary()
# Compile the model
model.compile(optimizer, loss=cross_entropy)
history = model.fit(
train_loader,
epochs=EPOCHS,
validation_data=validation_loader,
)
Model: "Test_Model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_net (Functional) (None, 32, 32, 1024) 4322880
upscale_block_12 (UpscaleBl (None, 64, 64, 128) 1918208
ock)
upscale_block_13 (UpscaleBl (None, 128, 128, 64) 258688
ock)
upscale_block_14 (UpscaleBl (None, 256, 256, 32) 46400
ock)
upscale_block_15 (UpscaleBl (None, 512, 512, 16) 7504
ock)
out (Conv2D) (None, 512, 512, 1) 17
=================================================================
Total params: 6,553,699
Trainable params: 6,499,297
Non-trainable params: 54,402
_________________________________________________________________
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class KerasSupervisedModelWrapper(keras.Model):
def __init__(self, batch_size, **kwargs):
super().__init__()
self.batch_size = batch_size
def summary(self, input_shape): # temporary fix for a bug
x = layers.Input(shape=input_shape)
model = keras.Model(inputs=[x], outputs=self.call(x))
return model.summary()
class ExampleModel(KerasSupervisedModelWrapper):
def __init__(self, batch_size):
super().__init__(batch_size)
self.conv1 = layers.Conv2D(32, kernel_size=(3, 3), activation='relu')
def call(self, x):
x = self.conv1(x)
return x
model = MyModel(15)
model.summary([28, 28, 1])
output:
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 28, 28, 1)] 0
conv2d_2 (Conv2D) (None, 26, 26, 32) 320
=================================================================
Total params: 320
Trainable params: 320
Non-trainable params: 0
_________________________________________________________________
I'm writting a wrapper for keras model to pre-define some useful method and variables as above.
And I'd like to modify the wrapper to get some layers to compose model as the keras.Sequential does.
Therefore, I added Sequential method that assigns new call method as below.
class KerasSupervisedModelWrapper(keras.Model):
...(continue)...
#staticmethod
def Sequential(layers, **kwargs):
model = KerasSupervisedModelWrapper(**kwargs)
pipe = keras.Sequential(layers)
def call(self, x):
return pipe(x)
model.call = call
return model
However, it seems not working as I intended. Instead, it shows below error message.
model = KerasSupervisedModelWrapper.Sequential([
layers.Conv2D(32, kernel_size=(3, 3), activation="relu")
], batch_size=15)
model.summary((28, 28, 1))
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/tmp/ipykernel_91471/2826773946.py in <module>
1 # model.build((None, 28, 28, 1))
2 # model.compile('adam', loss=keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy'])
----> 3 model.summary((28, 28, 1))
/tmp/ipykernel_91471/3696340317.py in summary(self, input_shape)
10 def summary(self, input_shape): # temporary fix for a bug
11 x = layers.Input(shape=input_shape)
---> 12 model = keras.Model(inputs=[x], outputs=self.call(x))
13 return model.summary()
14
TypeError: call() missing 1 required positional argument: 'x'
What can I do for the wrapper to get keras.Sequential model while usuing other properties?
You could try something like this:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class KerasSupervisedModelWrapper(keras.Model):
def __init__(self, batch_size, **kwargs):
super().__init__()
self.batch_size = batch_size
def summary(self, input_shape): # temporary fix for a bug
x = layers.Input(shape=input_shape)
model = keras.Model(inputs=[x], outputs=self.call(x))
return model.summary()
#staticmethod
def Sequential(layers, **kwargs):
model = KerasSupervisedModelWrapper(**kwargs)
pipe = keras.Sequential(layers)
model.call = pipe
return model
class ExampleModel(KerasSupervisedModelWrapper):
def __init__(self, batch_size):
super().__init__(batch_size)
self.conv1 = layers.Conv2D(32, kernel_size=(3, 3), activation='relu')
def call(self, x):
x = self.conv1(x)
return x
model = ExampleModel(15)
model.summary([28, 28, 1])
model = KerasSupervisedModelWrapper.Sequential([
layers.Conv2D(32, kernel_size=(3, 3), activation="relu")
], batch_size=15)
model.summary((28, 28, 1))
print(model(tf.random.normal((1, 28, 28, 1))).shape)
Model: "model_9"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_14 (InputLayer) [(None, 28, 28, 1)] 0
conv2d_17 (Conv2D) (None, 26, 26, 32) 320
=================================================================
Total params: 320
Trainable params: 320
Non-trainable params: 0
_________________________________________________________________
Model: "model_10"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_15 (InputLayer) [(None, 28, 28, 1)] 0
sequential_8 (Sequential) (None, 26, 26, 32) 320
=================================================================
Total params: 320
Trainable params: 320
Non-trainable params: 0
_________________________________________________________________
(1, 26, 26, 32)
class MyModel(tf.keras.Model):
def __init__(self):
super().__init__()
self.dense = tf.keras.layers.Dense(1)
self.build(input_shape=[None, 1])
def call(self, inputs, **kwargs):
return self.dense(inputs)
MyModel().summary()
The model plot does not work as well:
tf.keras.utils.plot_model(model, to_file='model_1.png', show_shapes=True)
I tried this code on several tensorflow versions 2.3.0, 2.3.1, and 2.4.1 and every time the output shape is multiple! Is it a bug? Any fix?
It's not the bug. Generally, we can't assume anything about the structure of a subclassed Model. That's why you can't get output shape in .summary() in model Subclasses API same as Functional or Sequential API like.
But here is a workaround to achieve this. You can achieve this as the following method.
import tensorflow as tf
class MyModel(tf.keras.Model):
def __init__(self):
super().__init__()
self.dense = tf.keras.layers.Dense(1)
self.build(input_shape=[None, 1])
def call(self, inputs, **kwargs):
return self.dense(inputs)
def build_graph(self):
x = tf.keras.layers.Input(shape=(1))
return tf.keras.Model(inputs=[x], outputs=self.call(x))
MyModel().build_graph().summary()
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 1)] 0
_________________________________________________________________
dense_3 (Dense) (None, 1) 2
=================================================================
Total params: 2
Trainable params: 2
Non-trainable params: 0
_________________________________________________________________
Same as plotting the model.
tf.keras.utils.plot_model(
MyModel().build_graph()
)
After much effort, I managed to build a tensorflow 2 implementation of an existing pytorch style-transfer project. Then I wanted to get all the nice extra features that are available through Keras standard learning, e.g. model.fit().
But the same model fails when learning through model.fit(). The model seems to learn the content features, but is unable to learn style features. This is the diagram of the model in quesion:
def vgg_layers19(content_layers, style_layers, input_shape=(256,256,3)):
""" creates a VGG model that returns output values for the given layers
see: https://keras.io/applications/#extract-features-from-an-arbitrary-intermediate-layer-with-vgg19
Returns:
function(x, preprocess=True):
Args:
x: image tuple/ndarray h,w,c(RGB), domain=(0.,255.)
Returns:
a tuple of lists, ([content_features], [style_features])
usage:
(content_features, style_features) = vgg_layers16(content_layers, style_layers)(x_train)
"""
preprocessingFn = tf.keras.applications.vgg19.preprocess_input
base_model = tf.keras.applications.VGG19(include_top=False, weights='imagenet', input_shape=input_shape)
base_model.trainable = False
content_features = [base_model.get_layer(name).output for name in content_layers]
style_features = [base_model.get_layer(name).output for name in style_layers]
output_features = content_features + style_features
model = Model( inputs=base_model.input, outputs=output_features, name="vgg_layers")
model.trainable = False
def _get_features(x, preprocess=True):
"""
Args:
x: expecting tensor, domain=255. hwcRGB
"""
if preprocess and callable(preprocessingFn):
x = preprocessingFn(x)
output = model(x) # call as tf.keras.Layer()
return ( output[:len(content_layers)], output[len(content_layers):] )
return _get_features
class VGG_Features():
""" get content and style features from VGG model """
def __init__(self, loss_model, style_image=None, target_style_gram=None):
self.loss_model = loss_model
if style_image is not None:
assert style_image.shape == (256,256,3), "ERROR: loss_model expecting input_shape=(256,256,3), got {}".format(style_image.shape)
self.style_image = style_image
self.target_style_gram = VGG_Features.get_style_gram(self.loss_model, self.style_image)
if target_style_gram is not None:
self.target_style_gram = target_style_gram
#staticmethod
def get_style_gram(vgg_features_model, style_image):
style_batch = tf.repeat( style_image[tf.newaxis,...], repeats=_batch_size, axis=0)
# show([style_image], w=128, domain=(0.,255.) )
# B, H, W, C = style_batch.shape
(_, style_features) = vgg_features_model( style_batch , preprocess=True ) # hwcRGB
target_style_gram = [ fnstf_utils.gram(value) for value in style_features ] # list
return target_style_gram
def __call__(self, input_batch):
content_features, style_features = self.loss_model( input_batch, preprocess=True )
style_gram = tuple(fnstf_utils.gram(value) for value in style_features) # tuple(<generator>)
return (content_features[0],) + style_gram # tuple = tuple + tuple
class TransformerNetwork_VGG(tf.keras.Model):
def __init__(self, transformer=transformer, vgg_features=vgg_features):
super(TransformerNetwork_VGG, self).__init__()
self.transformer = transformer
# type: tf.keras.models.Model
# input_shapes: (None, 256,256,3)
# output_shapes: (None, 256,256,3)
style_model = {
'content_layers':['block5_conv2'],
'style_layers': ['block1_conv1',
'block2_conv1',
'block3_conv1',
'block4_conv1',
'block5_conv1']
}
vgg_model = vgg_layers19( style_model['content_layers'], style_model['style_layers'] )
self.vgg_features = VGG_Features(vgg_model, style_image=style_image, batch_size=batch_size)
# input_shapes: (None, 256,256,3)
# output_shapes: [(None, 16, 16, 512), (None, 64, 64), (None, 128, 128), (None, 256, 256), (None, 512, 512), (None, 512, 512)]
# [ content_loss, style_loss_1, style_loss_2, style_loss_3, style_loss_4, style_loss_5 ]
def call(self, inputs):
x = inputs # shape=(None, 256,256,3)
# shape=(None, 256,256,3)
generated_image = self.transformer(x)
# shape=[(None, 16, 16, 512), (None, 64, 64), (None, 128, 128), (None, 256, 256), (None, 512, 512), (None, 512, 512)]
vgg_feature_losses = self.vgg(generated_image)
return vgg_feature_losses # tuple(content1, style1, style2, style3, style4, style5)
Style Image
FEATURE_WEIGHTS= [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
GradientTape learning
With the tf.GradientTape() loop, I'm manually handling the multiple outputs, e.g. tuple of 6 tensors, from TransformerNetwork_VGG(x_train). This method learns correctly.
#tf.function()
def train_step(x_train, y_true, loss_weights=None, log_freq=10):
with tf.GradientTape() as tape:
y_pred = TransformerNetwork_VGG(x_train)
generated_content_features = y_pred[:1]
generated_style_gram = y_pred[1:]
y_true = TransformerNetwork_VGG.vgg(x_train)
target_content_features = y_true[:1]
target_style_gram = TransformerNetwork_VGG.vgg.target_style_gram
content_loss = get_MEAN_mse_loss(target_content_features, generated_content_features, weights)
style_loss = tuple(get_MEAN_mse_loss(x,y)*w for x,y,w in zip(target_style_gram, generated_style_gram, weights))
total_loss = content_loss + = tf.reduce_sum(style_loss)
TransformerNetwork = TransformerNetwork_VGG.transformer
grads = tape.gradient(total_loss, TransformerNetwork.trainable_weights)
optimizer.apply_gradients(zip(grads, TransformerNetwork.trainable_weights))
# GradientTape epoch=5:
# losses: [ 6078.71 70.23 4495.13 13817.65 88217.99 48.36]
model.fit() learning
With tf.keras.models.Model.fit(), the multiple outputs, e.g. tuple of 6 tensors, are fed to the loss function individually as loss(y_pred, y_true) and then multipled by the correct weight on reduction. This method does learn to approximate the content_image, but does not learn to minimize the style losses! II cannot figure out why.
history = TransformerNetwork_VGG.fit(
x=train_dataset.repeat(NUM_EPOCHS),
epochs=NUM_EPOCHS,
steps_per_epoch=NUM_BATCHES,
callbacks=callbacks,
)
# model.fit() epoch=5:
# losses: [ 4661.08 219.95 6959.01 4897.39 209201.16 84.68]]
50 epochs, with boosted style_weights,
FEATURE_WEIGHTS= [ 0.1854, 1605.23, 25.08, 8.16, 1.28, 2330.79] # boost style loss x100
step=50, losses=[269899.45 337.5 69617.7 38424.96 9192.36 85903.44 66423.51]
check mse losses * weights
I tested my model with losses and weights fixed as follows
* FEATURE_WEIGHTS = SEQ = [1.,2.,3.,4.,5.,6.,]
* MSELoss(y_true, y_pred) == tf.ones() of equal shape
and confirmed that model.fit() is handling multiple output losses * weights correctly
I've checked everything I can think of, but I cannot figure out how to make the model learn correctly with model.fit(). What am I missing??
The full notebook is available here: https://colab.research.google.com/github/mixuala/fast_neural_style_pytorch/blob/master/notebook/%5BSO%5D_FastStyleTransfer.ipynb
I am trying to implement ADDA in Keras. Here is my code :
class ADDA_Images(object):
def __init__(self,modelInput):
self.img_rows = 28
self.img_cols = 28
self.channels = 3
self.img_shape = (self.img_rows, self.img_cols, self.channels)
optimizer = opt.Adam(0.001)
self.source_generator = self.build_generator(modelInput)
self.target_generator = self.build_generator(modelInput)
outputFeatureExtraction = layers.Input(shape = self.target_generator.output_shape[1:])
self.source_classificator = self.build_classifier(outputFeatureExtraction)
self.discriminator_model = self.build_discriminator(outputFeatureExtraction)
self.discriminator_model.compile(optimizer, loss='binary_crossentropy', metrics=['acc'])
self.discriminator_model.name='disk'
input = layers.Input(shape=self.img_shape)
fe_rep = self.source_generator(input)
cl = self.source_classificator(fe_rep)
self.source_model = Model(input,cl)
self.source_model.compile(optimizer, loss='categorical_crossentropy', metrics=['acc'])
input = layers.Input(shape=self.img_shape)
fe_rep = self.target_generator(input)
cl = self.source_classificator(fe_rep)
self.target_model = Model(input, cl)
self.target_model.compile(optimizer, loss='categorical_crossentropy', metrics=['acc'])
self.combined_model = Sequential()
self.combined_model.add(self.target_generator)
self.combined_model.add(self.discriminator_model)
self.combined_model.get_layer('disk').trainable = False
self.combined_model.compile(optimizer, loss='binary_crossentropy', metrics=['acc'])
print('Source model')
self.source_model.summary()
print('Target model')
self.target_model.summary()
print('Discriminator')
self.discriminator_model.summary()
print('Combined model')
self.combined_model.summary()
def build_generator(self,modelInput):
gen = layers.Conv2D(filters=20, kernel_size=5, padding='valid')(modelInput)
gen = layers.MaxPooling2D(pool_size=2, strides=2)(gen)
gen = layers.Conv2D(filters=50, kernel_size=5, padding='valid')(gen)
gen = layers.MaxPooling2D(pool_size=2, strides=2)(gen)
gen = layers.Flatten()(gen)
model = Model(modelInput,gen)
print('Generator summary')
model.summary()
return model
def build_classifier(self,modelInput):
cl = layers.Dense(3072, activation='relu')(modelInput)
cl = layers.Dense(2048, activation='relu')(cl)
cl = layers.Dense(10, activation='softmax')(cl)
model = Model(modelInput,cl)
print('Classificatior summary')
model.summary()
return model
def build_discriminator(self,modelInput):
disc = layers.Dense(500, activation='relu')(modelInput)
disc = layers.Dense(500, activation='relu')(disc)
disc = layers.Dense(2, activation='softmax')(disc)
model = Model(modelInput,disc)
print('Discriminator summary')
model.summary()
return model
But, it seems that target_generator is not connected to target model. I loaded target model from pretrained source model and then train discriminator and combined model in ADDA way. But, target model is not changed. It has same predictions (accs and losses) as source model all the time.
Here is summary of models :
Source model
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) (None, 28, 28, 3) 0
_________________________________________________________________
model_1 (Model) (None, 800) 26570
_________________________________________________________________
model_3 (Model) (None, 10) 8774666
=================================================================
Total params: 8,801,236
Trainable params: 8,801,236
Non-trainable params: 0
_________________________________________________________________
Target model
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_3 (InputLayer) (None, 28, 28, 3) 0
_________________________________________________________________
model_2 (Model) (None, 800) 26570
_________________________________________________________________
model_3 (Model) (None, 10) 8774666
=================================================================
Total params: 8,801,236
Trainable params: 8,801,236
Non-trainable params: 0
_________________________________________________________________
Discriminator
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 800) 0
_________________________________________________________________
dense_4 (Dense) (None, 500) 400500
_________________________________________________________________
dense_5 (Dense) (None, 500) 250500
_________________________________________________________________
dense_6 (Dense) (None, 2) 1002
=================================================================
Total params: 1,304,004
Trainable params: 652,002
Non-trainable params: 652,002
_________________________________________________________________
Combined model
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
model_2 (Model) (None, 800) 26570
_________________________________________________________________
disk (Model) (None, 2) 652002
=================================================================
Total params: 678,572
Trainable params: 26,570
Non-trainable params: 652,002
I validated outputs from target_model's second layer (it should be target_generator by specification) and it is not same as output of target_generator (on same input). So, it seems that those two models are not connected as reported in summaries.
Can someone help me to figure out what is wrong?
I am using Keras 2, Tensorflow backend.
Problem was in the training part - I loaded into the target model pretrained source model (load_model) and that made problems because it changed reference to generator model. Instead of load_model, I should use load_weights
So, loading pretrained model which works and not make problems with references is :
source_model = load_model(modelName)
target_model.set_weights(source_model.get_weights())