I would like to create a custom preprocessing layer using the tf.keras.layers.experimental.preprocessing.PreprocessingLayer layer.
In this custom layer, placed after the input layer, I would like to normalize my image using tf.cast(img, tf.float32) / 255.
I tried to find some code or example showing how to create this preprocessing layer, but I couldn't find.
Please, can someone provide a full example creating and using the PreprocessingLayer layer ?
If you want to have a custom preprocessing layer, actually you don't need to use PreprocessingLayer. You can simply subclass Layer
Take the simplest preprocessing layer Rescaling as an example, it is under the tf.keras.layers.experimental.preprocessing.Rescaling namespace. However, if you check the actual implementation, it is just subclass Layer class Source Code Link Here but has #keras_export('keras.layers.experimental.preprocessing.Rescaling')
#keras_export('keras.layers.experimental.preprocessing.Rescaling')
class Rescaling(Layer):
"""Multiply inputs by `scale` and adds `offset`.
For instance:
1. To rescale an input in the `[0, 255]` range
to be in the `[0, 1]` range, you would pass `scale=1./255`.
2. To rescale an input in the `[0, 255]` range to be in the `[-1, 1]` range,
you would pass `scale=1./127.5, offset=-1`.
The rescaling is applied both during training and inference.
Input shape:
Arbitrary.
Output shape:
Same as input.
Arguments:
scale: Float, the scale to apply to the inputs.
offset: Float, the offset to apply to the inputs.
name: A string, the name of the layer.
"""
def __init__(self, scale, offset=0., name=None, **kwargs):
self.scale = scale
self.offset = offset
super(Rescaling, self).__init__(name=name, **kwargs)
def call(self, inputs):
dtype = self._compute_dtype
scale = math_ops.cast(self.scale, dtype)
offset = math_ops.cast(self.offset, dtype)
return math_ops.cast(inputs, dtype) * scale + offset
def compute_output_shape(self, input_shape):
return input_shape
def get_config(self):
config = {
'scale': self.scale,
'offset': self.offset,
}
base_config = super(Rescaling, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
So it proves that Rescaling preprocessing is just another normal layer.
The main part is the def call(self, inputs) function. You can create whatever complicated logic to preprocess your inputs and then return.
A easier documentation about custom layer can be find here
In a nutshell, you can do the preprocessing by layer, either by Lambda which for simple operation or by subclassing Layer to achieve your goal.
I think that the best and cleaner solution to do this is using a simple Lambda layer where you can wrap your pre-processing function
this is a dummy working example
import numpy as np
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
X = np.random.randint(0,256, (200,32,32,3))
y = np.random.randint(0,3, 200)
inp = Input((32,32,3))
x = Lambda(lambda x: x/255)(inp)
x = Conv2D(8, 3, activation='relu')(x)
x = Flatten()(x)
out = Dense(3, activation='softmax')(x)
m = Model(inp, out)
m.compile(loss='sparse_categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
history = m.fit(X, y, epochs=10)
Related
I am currently working on medical image segmentation project and I am using U-Net architecture to train my dataset and perform segmentation. I wanted to view how my coded architecture looks like and i tried many ways to achive the same.I was not able to plot the architecture of my code on U-Net.
This is the code. It is the code for the U-net architecture
from torchvision.transforms import CenterCrop
from torch.nn import functional as F
import torch
class Block(Module):
def __init__(self, inChannels, outChannels):
super().__init__()
# store the convolution and RELU layers
self.conv1 = Conv2d(inChannels, outChannels, 3)
self.relu = ReLU()
self.conv2 = Conv2d(outChannels, outChannels, 3)
def forward(self, x):
# apply CONV => RELU => CONV block to the inputs and return it
return self.conv2(self.relu(self.conv1(x)))
class Encoder(Module):
def __init__(self, channels=(3, 16, 32, 64)):
super().__init__()
# store the encoder blocks and maxpooling layer
self.encBlocks = ModuleList(
[Block(channels[i], channels[i + 1])
for i in range(len(channels) - 1)])
self.pool = MaxPool2d(2)
def forward(self, x):
# initialize an empty list to store the intermediate outputs
blockOutputs = []
# loop through the encoder blocks
for block in self.encBlocks:
# pass the inputs through the current encoder block, store
# the outputs, and then apply maxpooling on the output
x = block(x)
blockOutputs.append(x)
x = self.pool(x)
# return the list containing the intermediate outputs
return blockOutputs
class Decoder(Module):
def __init__(self, channels=(64, 32, 16)):
super().__init__()
# initialize the number of channels, upsampler blocks, and
# decoder blocks
self.channels = channels
self.upconvs = ModuleList(
[ConvTranspose2d(channels[i], channels[i + 1], 2, 2)
for i in range(len(channels) - 1)])
self.dec_blocks = ModuleList(
[Block(channels[i], channels[i + 1])
for i in range(len(channels) - 1)])
#deco = self.upconvs + self.dec_blocks
#3return deco
def forward(self, x, encFeatures):
# loop through the number of channels
for i in range(len(self.channels) - 1):
# pass the inputs through the upsampler blocks
x = self.upconvs[i](x)
# crop the current features from the encoder blocks,
# concatenate them with the current upsampled features,
# and pass the concatenated output through the current
# decoder block
encFeat = self.crop(encFeatures[i], x)
x = torch.cat([x, encFeat], dim=1)
x = self.dec_blocks[i](x)
# return the final decoder output
return x
def crop(self, encFeatures, x):
# grab the dimensions of the inputs, and crop the encoder
# features to match the dimensions
(_, _, H, W) = x.shape
encFeatures = CenterCrop([H, W])(encFeatures)
# return the cropped features
return encFeatures
class UNet(Module):
def __init__(self, encChannels=(3, 16, 32, 64),
decChannels=(64, 32, 16),
nbClasses=1, retainDim=True,
outSize=(config.INPUT_IMAGE_HEIGHT, config.INPUT_IMAGE_WIDTH)):
super().__init__()
# initialize the encoder and decoder
self.encoder = Encoder(encChannels)
self.decoder = Decoder(decChannels)
# initialize the regression head and store the class variables
self.head = Conv2d(decChannels[-1], nbClasses, 1)
self.retainDim = retainDim
self.outSize = outSize
def forward(self, x):
# grab the features from the encoder
encFeatures = self.encoder(x)
# pass the encoder features through decoder making sure that
# their dimensions are suited for concatenation
decFeatures = self.decoder(encFeatures[::-1][0],
encFeatures[::-1][1:])
# pass the decoder features through the regression head to
# obtain the segmentation mask
map = self.head(decFeatures)
# check to see if we are retaining the original output
# dimensions and if so, then resize the output to match them
if self.retainDim:
map = F.interpolate(map, self.outSize)
# return the segmentation map
return map
I used plot_model function but i am not getting any result.
I used a code similar to this to visualize the U-Net architecture.
from keras.utils import plot_model
# Build your model
model = UNet
# Visualize the model
plot_model(model, to_file='model2.png')
Please help me plot a network architecture based on my U-Net code.
I have a model
class NewModel(nn.Module):
def __init__(self,output_layer,*args):
self.output_layer = output_layer
super().__init__(*args)
self.output_layer = output_layer
self.selected_out = None
#PRETRAINED MODEL
self.pretrained = models.resnet18(pretrained=True)
#TAKING OUTPUT FROM AN INTERMEDIATE LAYER
#self._layers = []
for l in list(self.pretrained._modules.keys()):
#self._layers.append(l)
if l == self.output_layer:
handle = getattr(self.pretrained,l).register_forward_hook(self.hook)
def hook(self,module, input,output):
self.selected_out = output
def forward(self, x):
return x = self.pretrained(x)
I have two target outputs, one which is same as any label of an image and the second one is the same dimensions as the output obtained from self.output_layer, called target_feature
out = model(img)
layerout = model.selected_out
Now, if I want to calculate the loss of layerout with the target feature map, can it be done like the line written below?
loss = criterion(y_true, out) + feature_criterion(layerout, target_feature)
Or do I need to add backward_hooks?
In this Kaggle notebook
https://www.kaggle.com/sironghuang/understanding-pytorch-hooks
it is written that loss.backward() cannot be used when using backward_hooks.
Quoting the author
# backprop once to get the backward hook results
out.backward(torch.tensor([1,1],dtype=torch.float),retain_graph=True)
#! loss.backward(retain_graph=True) # doesn't work with backward hooks,
#! since it's not a network layer but an aggregated result from the outputs of last layer vs target
Then how can be gradient be calculated based on the loss function?
If I understand you correctly, you want to get two outputs from your model, calculate two losses, then combine them and backpropagate. I imagine you come from Tensorflow & Keras from the way you tried implementing it. In Pytorch, it's actually fairly straight foward, you can do this very easily because of its purely functional aspect.
This is just an example:
class NewModel(nn.Module):
def __init__(self, output_layer, *args):
super(MyModel, self).__init__()
self.pretrained = models.resnet18(pretrained=True)
self.output_layer = output_layer
def forward(self, x):
out = self.pretrained(x)
features = self.output_layer(out)
return out, features
On inference, you will get two results per call:
>>> m = NewModel(nn.Linear(1000, 10))
>>> x = torch.rand(16, 3, 224, 224)
>>> y_pred, y_feature = m(x)
Call you loss functions:
>>> loss = criterion(y_pred, y_true) + feature_criterion(y_feature, target_feature)
Then, backpropagate with loss.backward().
So no need for hooks, nor complicated gradient on your .backward call!
Edit - If you wish to extract an intermediate layer output, keep the hook, that's good. And just modify the forward definition.
def forward(self, x):
out = self.pretrained(x)
return out, self.selected_out
For example:
>>> m = NewModel(output_layer='layer1')
>>> x = torch.rand(16, 3, 224, 224)
>>> y_pred, y_feature = m(x)
>>> y_pred.shape, y_feature.shape
(torch.Size([16, 1000]), torch.Size([16, 64, 56, 56]))
Also, what I said above about the loss stills stands. Compute your loss, then call loss.backward().
Typically a forward function strings together a bunch of layers and returns the output of the last one. Can I do some additional processing after that last layer before returning? For example, some scalar multiplication and reshaping via .view?
I know that the autograd somehow figures out gradients. So I don’t know if my additional processing will somehow screw that up. Thanks.
pytorch tracks the gradients via the computational graph of the tensors, not through the functions. As long as your tensors has requires_grad=True property and their grad is not None you can do (almost) whatever you like and still be able to backprop.
As long as you are using pytorch's operations (e.g., those listed in here and here) you should be okay.
For more info see this.
For example (taken from torchvision's VGG implementation):
class VGG(nn.Module):
def __init__(self, features, num_classes=1000, init_weights=True):
super(VGG, self).__init__()
# ...
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1) # <-- what you were asking about
x = self.classifier(x)
return x
A more complex example can be seen in torchvision's implementation of ResNet:
class Bottleneck(nn.Module):
def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
base_width=64, dilation=1, norm_layer=None):
super(Bottleneck, self).__init__()
# ...
def forward(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None: # <-- conditional execution!
identity = self.downsample(x)
out += identity # <-- inplace operations
out = self.relu(out)
return out
I am trying to swap between multiple different "expert" layers based on the output of a "gating" layer (as a mixture of experts).
I created a custom layer that takes in the outputs of the expert and gating layers, but this ends up throwing away some outputs rather than not computing them in the first place.
How can I make the model "short circuit" to only evaluate the gating layer and the selected expert layer(s) to save computation time?
I am using tensorflow 2.0 gpu and the keras functional api
Keras models can be implemented fully dynamically, to support the efficient routing that you mentioned. The following example shows one way in which this can be done. The example is written with the following premises:
It assumes there are two experts (LayerA and LayerB)
It assumes that a mix-of-experts model (MixOfExpertsModel) switches dynamically between the two expert layer classes depending on the per-example output of a Keras Dense layer
It satisfies the need to run training on the model in a batch fashion.
Pay attention to the comments in the code to see how the switching is done.
import numpy as np
import tensorflow as tf
# This is your Expert A class.
class LayerA(tf.keras.layers.Layer):
def build(self, input_shape):
self.weight = self.add_weight("weight_a", shape=input_shape[1:])
#tf.function
def call(self, x):
return x + self.weight
# This is your Expert B class.
class LayerB(tf.keras.layers.Layer):
def build(self, input_shape):
self.weight = self.add_weight("weight_b", shape=input_shape[1:])
#tf.function
def call(self, x):
return x * self.weight
class MixOfExpertsModel(tf.keras.models.Model):
def __init__(self):
super(MixOfExpertsModel, self).__init__()
self._expert_a = LayerA()
self._expert_b = LayerB()
self._gating_layer = tf.keras.layers.Dense(1, activation="sigmoid")
#tf.function
def call(self, x):
z = self._gating_layer(x)
# The switching logic:
# - examples with gating output <= 0.5 are routed to expert A
# - examples with gating output > 0.5 are routed to expert B.
mask_a = tf.squeeze(tf.less_equal(z, 0.5), axis=-1)
mask_b = tf.squeeze(tf.greater(z, 0.5), axis=-1)
# `input_a` is a subset of slices of the original input (`x`).
# So is `input_b`. As such, no compute is wasted.
input_a = tf.boolean_mask(x, mask_a, axis=0)
input_b = tf.boolean_mask(x, mask_b, axis=0)
if tf.size(input_a) > 0:
output_a = self._expert_a(input_a)
else:
output_a = tf.zeros_like(input_a)
if tf.size(input_b) > 0:
output_b = self._expert_b(input_b)
else:
output_b = tf.zeros_like(input_b)
# Return `mask_a`, and `mask_b`, so that the caller can know
# which example is routed to which expert and whether its output
# appears in `output_a` or `output_b`. # This is necessary
# for writing a (custom) loss function for this class.
return output_a, output_b, mask_a, mask_b
# Create an intance of the mix-of-experts model.
mix_of_experts_model = MixOfExpertsModel()
# Generate some dummy data.
num_examples = 32
xs = np.random.random([num_examples, 8]).astype(np.float32)
# Call the model.
print(mix_of_experts_model(xs))
I didn't write a custom loss function that would support the training of this class. But that's doable by using the return values of MixOfExpertsModel.call(), namely the outputs and masks.
I would like to extract and store the dropout mask [array of 1/0s] from a dropout layer in a Sequential Keras model at each batch while training. I was wondering if there was a straight forward way way to do this within Keras or if I would need to switch over to tensorflow (How to get the dropout mask in Tensorflow).
Would appreciate any help! I'm quite new to TensorFlow and Keras.
There are a couple of functions (dropout_layer.get_output_mask(), dropout_layer.get_input_mask()) for the dropout layer that I tried using but got None after calling on the previous layer.
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(name="flat", input_shape=(28, 28, 1)))
model.add(tf.keras.layers.Dense(
512,
activation='relu',
name = 'dense_1',
kernel_initializer=tf.keras.initializers.GlorotUniform(seed=123),
bias_initializer='zeros'))
dropout = tf.keras.layers.Dropout(0.2, name = 'dropout') #want this layer's mask
model.add(dropout)
x = dropout.output_mask
y = dropout.input_mask
model.add(tf.keras.layers.Dense(
10,
activation='softmax',
name='dense_2',
kernel_initializer=tf.keras.initializers.GlorotUniform(seed=123),
bias_initializer='zeros'))
model.compile(...)
model.fit(...)
It's not easily exposed in Keras. It goes deep until it calls the Tensorflow dropout.
So, although you're using Keras, it's will also be a tensor in the graph that can be gotten by name (finding it's name: In Tensorflow, get the names of all the Tensors in a graph).
This option, of course will lack some keras information, you should probably have to do that inside a Lambda layer so Keras adds certain information to the tensor. And you must take extra care because the tensor will exist even when not training (where the mask is skipped)
Now, you can also use a less hacky way, that may consume a little processing:
def getMask(x):
boolMask = tf.not_equal(x, 0)
floatMask = tf.cast(boolMask, tf.float32) #or tf.float64
return floatMask
Use a Lambda(getMasc)(output_of_dropout_layer)
But instead of using a Sequential model, you will need a functional API Model.
inputs = tf.keras.layers.Input((28, 28, 1))
outputs = tf.keras.layers.Flatten(name="flat")(inputs)
outputs = tf.keras.layers.Dense(
512,
# activation='relu', #relu will be a problem here
name = 'dense_1',
kernel_initializer=tf.keras.initializers.GlorotUniform(seed=123),
bias_initializer='zeros')(outputs)
outputs = tf.keras.layers.Dropout(0.2, name = 'dropout')(outputs)
mask = Lambda(getMask)(outputs)
#there isn't "input_mask"
#add the missing relu:
outputs = tf.keras.layers.Activation('relu')(outputs)
outputs = tf.keras.layers.Dense(
10,
activation='softmax',
name='dense_2',
kernel_initializer=tf.keras.initializers.GlorotUniform(seed=123),
bias_initializer='zeros')(outputs)
model = Model(inputs, outputs)
model.compile(...)
model.fit(...)
Training and predicting
Since you can't train the masks (it doesn't make any sense), it should not be an output of the model for training.
Now, we could try this:
trainingModel = Model(inputs, outputs)
predictingModel = Model(inputs, [output, mask])
But masks don't exist in prediction, because dropout is only applied in training. So this doesn't bring us anything good in the end.
The only way for training is then using a dummy loss and dummy targets:
def dummyLoss(y_true, y_pred):
return y_true #but this might evoke a "None" gradient problem since it's not trainable, there is no connection to any weights, etc.
model.compile(loss=[loss_for_main_output, dummyLoss], ....)
model.fit(x_train, [y_train, np.zeros((len(y_Train),) + mask_shape), ...)
It's not guaranteed that these will work.
I found a very hacky way to do this by trivially extending the provided dropout layer. (Almost all code from TF.)
class MyDR(tf.keras.layers.Layer):
def __init__(self,rate,**kwargs):
super(MyDR, self).__init__(**kwargs)
self.noise_shape = None
self.rate = rate
def _get_noise_shape(self,x, noise_shape=None):
# If noise_shape is none return immediately.
if noise_shape is None:
return array_ops.shape(x)
try:
# Best effort to figure out the intended shape.
# If not possible, let the op to handle it.
# In eager mode exception will show up.
noise_shape_ = tensor_shape.as_shape(noise_shape)
except (TypeError, ValueError):
return noise_shape
if x.shape.dims is not None and len(x.shape.dims) == len(noise_shape_.dims):
new_dims = []
for i, dim in enumerate(x.shape.dims):
if noise_shape_.dims[i].value is None and dim.value is not None:
new_dims.append(dim.value)
else:
new_dims.append(noise_shape_.dims[i].value)
return tensor_shape.TensorShape(new_dims)
return noise_shape
def build(self, input_shape):
self.noise_shape = input_shape
print(self.noise_shape)
super(MyDR,self).build(input_shape)
#tf.function
def call(self,input):
self.noise_shape = self._get_noise_shape(input)
random_tensor = tf.random.uniform(self.noise_shape, seed=1235, dtype=input.dtype)
keep_prob = 1 - self.rate
scale = 1 / keep_prob
# NOTE: if (1.0 + rate) - 1 is equal to rate, then we want to consider that
# float to be selected, hence we use a >= comparison.
self.keep_mask = random_tensor >= self.rate
#NOTE: here is where I save the binary masks.
#the file grows quite big!
tf.print(self.keep_mask,output_stream="file://temp/droput_mask.txt")
ret = input * scale * math_ops.cast(self.keep_mask, input.dtype)
return ret