Why does my Keras Custom Layer only gets called once? - python

I have to work with tensorflow 1.15 and need a custom layer. A very simplistic layer can look like this:
class Dummy(keras.layers.Layer):
def __init__(self, units=32, input_dim=32):
super(Dummy, self).__init__()
self.cnt = 1
def call(self, inputs):
self.cnt += 1
return inputs
If I use this Dummy Layer in any architecture the variable cnt was only set to two. What am I missing?
Here is a very simplistic dummy script to shwocase my issue:
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Conv2D, Activation
from tensorflow import set_random_seed
from numpy.random import seed
seed(312991)
set_random_seed(3121991)
class Dummy(keras.layers.Layer):
def __init__(self, units=32, input_dim=32):
super(Dummy, self).__init__()
self.cnt = 1
def call(self, inputs):
self.cnt += 1
return inputs
# creating the input image
input_img = np.ones(shape=(8,8,3))
#adjust range
input_img_adjusted = input_img / 255
target = input_img_adjusted[:,:,0:2]
model = Sequential()
model.add(Conv2D(2, (3, 3),input_shape=input_img.shape, padding='same'))
model.add(Dummy())
model.add(Activation('sigmoid'))
opt = keras.optimizers.Adam(0.001)
model.compile(optimizer=opt,
loss="mean_absolute_error")
hist = model.fit(np.array(2048*[input_img_adjusted]),np.array(2048*[target]),epochs=100,batch_size=32)
print("called the Dummy Layer:", model.layers[-2].cnt)
My assumption would have been that it is something like 32,32*100 or something similar.

Have to use tf.Variable and assign_add for initialization and adding
class Dummy(keras.layers.Layer):
def __init__(self, units=32, input_dim=32):
super(Dummy, self).__init__()
self.cnt = tf.Variable(1, trainable=False)
def call(self, inputs):
self.cnt = self.cnt.assign_add(1)
return inputs

Related

PyTorch: How to create a Parameter without specifying the dimension

Say I want to defined a module. In this module, the __init__() function will create a Parameter called self.weight without known the input_dim of the module. My question is, how can I expand the self.weight and initialize it when I first call the forward() function?
For example, I want my module looks like this:
class MyModel(torch.nn.Module):
def __init__(self, out_dim):
super(MyModel, self).__init__()
# I don't know the input_dim yet
self.weight = torch.nn.Parameter(torch.FloatTensor(None, out_dim))
self.init_weight = False
def init_parameters(self, in_dim):
# what should I do in this function?
# Is this correct?
self.weight = self.weight.expand(in_dim, -1)
torch.nn.init.xvaier_normal_(self.weight)
self.init_weight = True
def forward(self, X):
if not self.init_weight:
# first call, so now I can initialize the weight since I know the input_dim
self.init_parameters(X.shape[1])
# do some forward ops
return torch.sigmoid(torch.matmul(X, self.weight))
And my training code looks like this (The parameter self.weight is passed to the optimizer after I create the model):
def train(X_train, y_train):
model = MyModel(y_train.shape[1])
optimize = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
for epoch in range(10000):
optimize.zero_grad()
prediction = model(X_train)
loss = loss_fn(prediction, y_train)
loss.backward()
optimize.step()
After all, it works for me using the way I explained in the comments - to allocate the weights parameter right in the init_parameters function.
import torch
class MyModel(torch.nn.Module):
def __init__(self, out_dim):
super(MyModel, self).__init__()
self.weight = torch.nn.Parameter(torch.FloatTensor([0.0]))
self.out_dim = out_dim
self.init_weight = False
def init_parameters(self, in_dim):
self.weight = torch.nn.Parameter(torch.FloatTensor(in_dim, self.out_dim), requires_grad=True)
torch.nn.init.xavier_normal_(self.weight)
self.init_weight = True
def forward(self, X):
if not self.init_weight:
# first call, so now I can initialize the weight since I know the input_dim
self.init_parameters(X.shape[1])
# do some forward ops
result = torch.sigmoid(torch.matmul(X, self.weight))
print(X.shape, result.shape)
return result
def train(X_train, y_train):
model = torch.nn.Sequential(MyModel(out_dim=100), MyModel(out_dim=20))
optimize = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
for epoch in range(10000):
#print('.', end='')
optimize.zero_grad()
prediction = model(X_train)
loss = loss_fn(prediction, y_train)
loss.backward()
optimize.step()
batch_size, in_dim, out_dim = 100, 5, 20
X_train=torch.randn((batch_size, in_dim))
y_train=torch.randn((batch_size, out_dim))
train(X_train, y_train)

tf.function causes out of scope error when inputs passed to add_loss

Can loss terms be manually added with add_loss inside a tensorflow graph? The below example for using add_loss is largely copied from
https://www.tensorflow.org/guide/keras/custom_layers_and_models#the_add_loss_method
but with #tf.function added to the layer's call method.
import tensorflow as tf
from tensorflow import keras
def main():
layer = ActivityRegularizationLayer()
inputs = tf.constant(5.)
with tf.GradientTape() as tape:
y = layer(inputs)
loss = tf.reduce_mean(y)
loss += sum(layer.losses)
grad = tape.gradient(loss, layer.trainable_weights)
print(f"loss={float(loss)}, grad={grad}")
class ActivityRegularizationLayer(keras.layers.Layer):
def __init__(self, rate=1e-2):
super().__init__()
self.rate = rate
#tf.function
def call(self, inputs):
self.add_loss(self.rate * tf.reduce_sum(inputs))
return inputs
Running the above leads to the error
The tensor <tf.Tensor 'mul:0' shape=() dtype=float32> cannot be accessed from here, because it was defined in FuncGraph(name=call, id=46917885252656), which is out of scope.
Removing the decorator makes things run successfully
loss=5.050000190734863, grad=[]
as does removing the line adding sum(layer.losses) to the total loss
loss=5.0, grad=[]
Additional details
python 3.9.12
tensorflow 2.8.0
This is addressed here:
https://github.com/tensorflow/tensorflow/issues/32058#issuecomment-592664998
In summary, this is a known behavior and the solution is to "wrap your whole training step or training loop in a tf.function"
def main():
model = MyModel()
inputs = tf.constant(5.)
loss, grad = model.train_step(inputs)
print(f"loss={float(loss)}, grad={grad}")
class MyModel(keras.models.Model):
def __init__(self):
super().__init__()
self.reg = ActivityRegularizationLayer()
def call(self, inputs):
return self.reg(inputs)
#tf.function
def train_step(self, data):
with tf.GradientTape() as tape:
y = self(data)
loss = tf.reduce_mean(y)
loss += sum(self.losses)
grad = tape.gradient(loss, self.trainable_weights)
return loss, grad
class ActivityRegularizationLayer(keras.layers.Layer):
def __init__(self, rate=1e-2):
super().__init__()
self.rate = rate
def call(self, inputs):
self.add_loss(self.rate * tf.reduce_sum(inputs))
return inputs

separately save the model weight in pytorch

I am using PyTorch to train a deep learning model. I wonder if it is possible for me to separately save the model weight. For example:
class my_model(nn.Module):
def __init__(self):
super(my_model, self).__init__()
self.bert = transformers.AutoModel.from_pretrained(BERT_PATH)
self.out = nn.Linear(768,1)
def forward(self, ids, mask, token_type):
x = self.bert(ids, mask, token_type)[1]
x = self.out(x)
return x
I have the BERT model as the base model and an additional linear layer on the top. After I train this model, can I save the weight for the BERT model and this linear layer separately?
Alternatively to the previous answer, You can create two separated class of nn.module. One for the BERT model and another one for the linear layer:
class bert_model(nn.Module):
def __init__(self):
super(bert_model, self).__init__()
self.bert = transformers.AutoModel.from_pretrained(BERT_PATH)
def forward(self, ids, mask, token_type):
x = self.bert(ids, mask, token_type)[1]
return x
class linear_layer(nn.Module):
def __init__(self):
super(linear_layer, self).__init__()
self.out = nn.Linear(768,1)
def forward(self, x):
x = self.out(x)
return x
Then you can save the two part of the model separately with:
bert_model = bert_model()
linear_layer = linear_layer()
#train
torch.save(bert_model.state_dict(), PATH)
torch.save(linear_layer.state_dict(), PATH)
You can:
model = my_model()
# train ...
torch.save({'bert': model.bert.state_dict(), 'out': model.out.state_dict()}, 'checkpoint.pth')

using ModuleList, still getting ValueError: optimizer got an empty parameter list

With Pytorch I am attempting to use ModuleList to ensure model parameters are detected, and can be optimized. When calling the SGD optimizer I get the following error:
ValueError: optimizer got an empty parameter list
Can you please review the code below and advise?
class LR(nn.Module):
def ___init___(self):
super(LR, self).___init___()
self.linear = nn.ModuleList()
self.linear.append(nn.Linear(in_features=28*28, out_features=128, bias=True))
def forward(self, x):
y_p = torch.sigmoid(self.linear(x))
return y_p
LR_model = LR()
optimizer = torch.optim.SGD(params = LR_model.parameters(), lr=learn_rate)
This seems to be a copy-paste issue: your __init__ has 3 underscores instead of 2, both at __init__(self) and super(LR, self).__init__(). Thus the init itself failed. Delete the extra underscores and try again or try the below code:
class LR(nn.Module):
def __init__(self):
super(LR, self).__init__()
self.linear = nn.ModuleList()
self.linear.append(nn.Linear(in_features=28*28,
out_features=128,
bias=True))
def forward(self, x):
y_p = torch.sigmoid(self.linear(x))
return y_p
LR_model = LR()
optimizer = torch.optim.SGD(params = list(LR_model.parameters()),
lr=learn_rate)

MultiLayer Neural Network

Below is the code (taken from somewhere on the internet) that I am trying to use for Multilayer neural network.
import math
import random
BIAS = -1
"""
To view the structure of the Neural Network, type
print network_name
"""
class Neuron:
def __init__(self, n_inputs ):
self.n_inputs = n_inputs
self.set_weights( [random.uniform(0,1) for x in range(0,n_inputs+1)] ) # +1 for bias weight
def sum(self, inputs ):
# Does not include the bias
return sum(val*self.weights[i] for i,val in enumerate(inputs))
def set_weights(self, weights ):
self.weights = weights
def __str__(self):
return 'Weights: %s, Bias: %s' % ( str(self.weights[:-1]),str(self.weights[-1]) )
class NeuronLayer:
def __init__(self, n_neurons, n_inputs):
self.n_neurons = n_neurons
self.neurons = [Neuron( n_inputs ) for _ in range(0,self.n_neurons)]
def __str__(self):
return 'Layer:\n\t'+'\n\t'.join([str(neuron) for neuron in self.neurons])+''
class NeuralNetwork:
def __init__(self, n_inputs, n_outputs, n_neurons_to_hl, n_hidden_layers):
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.n_hidden_layers = n_hidden_layers
self.n_neurons_to_hl = n_neurons_to_hl
# Do not touch
self._create_network()
self._n_weights = None
# end
def _create_network(self):
if self.n_hidden_layers>0:
# create the first layer
self.layers = [NeuronLayer( self.n_neurons_to_hl,self.n_inputs )]
# create hidden layers
self.layers += [NeuronLayer( self.n_neurons_to_hl,self.n_neurons_to_hl ) for _ in range(0,self.n_hidden_layers)]
# hidden-to-output layer
self.layers += [NeuronLayer( self.n_outputs,self.n_neurons_to_hl )]
else:
# If we don't require hidden layers
self.layers = [NeuronLayer( self.n_outputs,self.n_inputs )]
def get_weights(self):
weights = []
for layer in self.layers:
for neuron in layer.neurons:
weights += neuron.weights
return weights
#property
def n_weights(self):
if not self._n_weights:
self._n_weights = 0
for layer in self.layers:
for neuron in layer.neurons:
self._n_weights += neuron.n_inputs+1 # +1 for bias weight
return self._n_weights
def set_weights(self, weights ):
assert len(weights)==self.n_weights, "Incorrect amount of weights."
stop = 0
for layer in self.layers:
for neuron in layer.neurons:
start, stop = stop, stop+(neuron.n_inputs+1)
neuron.set_weights( weights[start:stop] )
return self
def update(self, inputs ):
assert len(inputs)==self.n_inputs, "Incorrect amount of inputs."
for layer in self.layers:
outputs = []
for neuron in layer.neurons:
tot = neuron.sum(inputs) + neuron.weights[-1]*BIAS
outputs.append( self.sigmoid(tot) )
inputs = outputs
return outputs
def sigmoid(self, activation,response=1 ):
# the activation function
try:
return 1/(1+math.e**(-activation/response))
except OverflowError:
return float("inf")
def __str__(self):
return '\n'.join([str(i+1)+' '+str(layer) for i,layer in enumerate(self.layers)])
My input is a text file containing multiple rows. Each row contains a data-point of dimension 16 (the first 16 elements of the row) and 17th element is the class to which the data point belongs.
Input.txt
1,3,2,2,1,10,2,2,1,9,2,9,1,6,2,8,2
2,1,3,2,2,8,7,7,5,7,6,8,2,8,3,8,4
4,6,6,5,5,8,8,3,5,7,8,7,5,10,4,6,2
6,9,6,11,6,7,7,8,5,9,8,8,4,9,7,9,5
1,1,2,2,1,7,7,8,5,7,6,7,2,8,7,9,1
3,8,5,6,4,10,6,3,6,11,4,7,3,8,2,9,3
4,7,5,5,5,8,6,6,7,6,6,6,2,8,7,10,1
6,10,9,8,8,11,6,2,4,9,4,6,9,6,2,8,11
The class is always in the range [1,11]
After learning the input, the expected output for each data-point should be its corresponding class (it may not be right always).
I am creating an object of NeuralNetwork class and then trying to use the update method.
But, I am not clear how to use it properly. Please guide me how to use it for the above input.
This is my open source code
How to
You have to initialize a new network object as: network = NeuralNetwork(inputs, n_outputs, n_neurons_to_hl, n_hidden_layers).
inputs: the number of input values (in your case: 16)
n_outputs: the number of output values (in your case: 11)
n_neurons_to_hl: the number of neurons in each hidden layer
n_hidden_layers: the number of hidden layers
The update( input_signals ) method is a forward calculation on the network - that is: this is the method you wish to use to classify an input instance.

Categories

Resources