Say I want to defined a module. In this module, the __init__() function will create a Parameter called self.weight without known the input_dim of the module. My question is, how can I expand the self.weight and initialize it when I first call the forward() function?
For example, I want my module looks like this:
class MyModel(torch.nn.Module):
def __init__(self, out_dim):
super(MyModel, self).__init__()
# I don't know the input_dim yet
self.weight = torch.nn.Parameter(torch.FloatTensor(None, out_dim))
self.init_weight = False
def init_parameters(self, in_dim):
# what should I do in this function?
# Is this correct?
self.weight = self.weight.expand(in_dim, -1)
torch.nn.init.xvaier_normal_(self.weight)
self.init_weight = True
def forward(self, X):
if not self.init_weight:
# first call, so now I can initialize the weight since I know the input_dim
self.init_parameters(X.shape[1])
# do some forward ops
return torch.sigmoid(torch.matmul(X, self.weight))
And my training code looks like this (The parameter self.weight is passed to the optimizer after I create the model):
def train(X_train, y_train):
model = MyModel(y_train.shape[1])
optimize = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
for epoch in range(10000):
optimize.zero_grad()
prediction = model(X_train)
loss = loss_fn(prediction, y_train)
loss.backward()
optimize.step()
After all, it works for me using the way I explained in the comments - to allocate the weights parameter right in the init_parameters function.
import torch
class MyModel(torch.nn.Module):
def __init__(self, out_dim):
super(MyModel, self).__init__()
self.weight = torch.nn.Parameter(torch.FloatTensor([0.0]))
self.out_dim = out_dim
self.init_weight = False
def init_parameters(self, in_dim):
self.weight = torch.nn.Parameter(torch.FloatTensor(in_dim, self.out_dim), requires_grad=True)
torch.nn.init.xavier_normal_(self.weight)
self.init_weight = True
def forward(self, X):
if not self.init_weight:
# first call, so now I can initialize the weight since I know the input_dim
self.init_parameters(X.shape[1])
# do some forward ops
result = torch.sigmoid(torch.matmul(X, self.weight))
print(X.shape, result.shape)
return result
def train(X_train, y_train):
model = torch.nn.Sequential(MyModel(out_dim=100), MyModel(out_dim=20))
optimize = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
for epoch in range(10000):
#print('.', end='')
optimize.zero_grad()
prediction = model(X_train)
loss = loss_fn(prediction, y_train)
loss.backward()
optimize.step()
batch_size, in_dim, out_dim = 100, 5, 20
X_train=torch.randn((batch_size, in_dim))
y_train=torch.randn((batch_size, out_dim))
train(X_train, y_train)
Can loss terms be manually added with add_loss inside a tensorflow graph? The below example for using add_loss is largely copied from
https://www.tensorflow.org/guide/keras/custom_layers_and_models#the_add_loss_method
but with #tf.function added to the layer's call method.
import tensorflow as tf
from tensorflow import keras
def main():
layer = ActivityRegularizationLayer()
inputs = tf.constant(5.)
with tf.GradientTape() as tape:
y = layer(inputs)
loss = tf.reduce_mean(y)
loss += sum(layer.losses)
grad = tape.gradient(loss, layer.trainable_weights)
print(f"loss={float(loss)}, grad={grad}")
class ActivityRegularizationLayer(keras.layers.Layer):
def __init__(self, rate=1e-2):
super().__init__()
self.rate = rate
#tf.function
def call(self, inputs):
self.add_loss(self.rate * tf.reduce_sum(inputs))
return inputs
Running the above leads to the error
The tensor <tf.Tensor 'mul:0' shape=() dtype=float32> cannot be accessed from here, because it was defined in FuncGraph(name=call, id=46917885252656), which is out of scope.
Removing the decorator makes things run successfully
loss=5.050000190734863, grad=[]
as does removing the line adding sum(layer.losses) to the total loss
loss=5.0, grad=[]
Additional details
python 3.9.12
tensorflow 2.8.0
This is addressed here:
https://github.com/tensorflow/tensorflow/issues/32058#issuecomment-592664998
In summary, this is a known behavior and the solution is to "wrap your whole training step or training loop in a tf.function"
def main():
model = MyModel()
inputs = tf.constant(5.)
loss, grad = model.train_step(inputs)
print(f"loss={float(loss)}, grad={grad}")
class MyModel(keras.models.Model):
def __init__(self):
super().__init__()
self.reg = ActivityRegularizationLayer()
def call(self, inputs):
return self.reg(inputs)
#tf.function
def train_step(self, data):
with tf.GradientTape() as tape:
y = self(data)
loss = tf.reduce_mean(y)
loss += sum(self.losses)
grad = tape.gradient(loss, self.trainable_weights)
return loss, grad
class ActivityRegularizationLayer(keras.layers.Layer):
def __init__(self, rate=1e-2):
super().__init__()
self.rate = rate
def call(self, inputs):
self.add_loss(self.rate * tf.reduce_sum(inputs))
return inputs
I am using PyTorch to train a deep learning model. I wonder if it is possible for me to separately save the model weight. For example:
class my_model(nn.Module):
def __init__(self):
super(my_model, self).__init__()
self.bert = transformers.AutoModel.from_pretrained(BERT_PATH)
self.out = nn.Linear(768,1)
def forward(self, ids, mask, token_type):
x = self.bert(ids, mask, token_type)[1]
x = self.out(x)
return x
I have the BERT model as the base model and an additional linear layer on the top. After I train this model, can I save the weight for the BERT model and this linear layer separately?
Alternatively to the previous answer, You can create two separated class of nn.module. One for the BERT model and another one for the linear layer:
class bert_model(nn.Module):
def __init__(self):
super(bert_model, self).__init__()
self.bert = transformers.AutoModel.from_pretrained(BERT_PATH)
def forward(self, ids, mask, token_type):
x = self.bert(ids, mask, token_type)[1]
return x
class linear_layer(nn.Module):
def __init__(self):
super(linear_layer, self).__init__()
self.out = nn.Linear(768,1)
def forward(self, x):
x = self.out(x)
return x
Then you can save the two part of the model separately with:
bert_model = bert_model()
linear_layer = linear_layer()
#train
torch.save(bert_model.state_dict(), PATH)
torch.save(linear_layer.state_dict(), PATH)
You can:
model = my_model()
# train ...
torch.save({'bert': model.bert.state_dict(), 'out': model.out.state_dict()}, 'checkpoint.pth')
With Pytorch I am attempting to use ModuleList to ensure model parameters are detected, and can be optimized. When calling the SGD optimizer I get the following error:
ValueError: optimizer got an empty parameter list
Can you please review the code below and advise?
class LR(nn.Module):
def ___init___(self):
super(LR, self).___init___()
self.linear = nn.ModuleList()
self.linear.append(nn.Linear(in_features=28*28, out_features=128, bias=True))
def forward(self, x):
y_p = torch.sigmoid(self.linear(x))
return y_p
LR_model = LR()
optimizer = torch.optim.SGD(params = LR_model.parameters(), lr=learn_rate)
This seems to be a copy-paste issue: your __init__ has 3 underscores instead of 2, both at __init__(self) and super(LR, self).__init__(). Thus the init itself failed. Delete the extra underscores and try again or try the below code:
class LR(nn.Module):
def __init__(self):
super(LR, self).__init__()
self.linear = nn.ModuleList()
self.linear.append(nn.Linear(in_features=28*28,
out_features=128,
bias=True))
def forward(self, x):
y_p = torch.sigmoid(self.linear(x))
return y_p
LR_model = LR()
optimizer = torch.optim.SGD(params = list(LR_model.parameters()),
lr=learn_rate)
Below is the code (taken from somewhere on the internet) that I am trying to use for Multilayer neural network.
import math
import random
BIAS = -1
"""
To view the structure of the Neural Network, type
print network_name
"""
class Neuron:
def __init__(self, n_inputs ):
self.n_inputs = n_inputs
self.set_weights( [random.uniform(0,1) for x in range(0,n_inputs+1)] ) # +1 for bias weight
def sum(self, inputs ):
# Does not include the bias
return sum(val*self.weights[i] for i,val in enumerate(inputs))
def set_weights(self, weights ):
self.weights = weights
def __str__(self):
return 'Weights: %s, Bias: %s' % ( str(self.weights[:-1]),str(self.weights[-1]) )
class NeuronLayer:
def __init__(self, n_neurons, n_inputs):
self.n_neurons = n_neurons
self.neurons = [Neuron( n_inputs ) for _ in range(0,self.n_neurons)]
def __str__(self):
return 'Layer:\n\t'+'\n\t'.join([str(neuron) for neuron in self.neurons])+''
class NeuralNetwork:
def __init__(self, n_inputs, n_outputs, n_neurons_to_hl, n_hidden_layers):
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.n_hidden_layers = n_hidden_layers
self.n_neurons_to_hl = n_neurons_to_hl
# Do not touch
self._create_network()
self._n_weights = None
# end
def _create_network(self):
if self.n_hidden_layers>0:
# create the first layer
self.layers = [NeuronLayer( self.n_neurons_to_hl,self.n_inputs )]
# create hidden layers
self.layers += [NeuronLayer( self.n_neurons_to_hl,self.n_neurons_to_hl ) for _ in range(0,self.n_hidden_layers)]
# hidden-to-output layer
self.layers += [NeuronLayer( self.n_outputs,self.n_neurons_to_hl )]
else:
# If we don't require hidden layers
self.layers = [NeuronLayer( self.n_outputs,self.n_inputs )]
def get_weights(self):
weights = []
for layer in self.layers:
for neuron in layer.neurons:
weights += neuron.weights
return weights
#property
def n_weights(self):
if not self._n_weights:
self._n_weights = 0
for layer in self.layers:
for neuron in layer.neurons:
self._n_weights += neuron.n_inputs+1 # +1 for bias weight
return self._n_weights
def set_weights(self, weights ):
assert len(weights)==self.n_weights, "Incorrect amount of weights."
stop = 0
for layer in self.layers:
for neuron in layer.neurons:
start, stop = stop, stop+(neuron.n_inputs+1)
neuron.set_weights( weights[start:stop] )
return self
def update(self, inputs ):
assert len(inputs)==self.n_inputs, "Incorrect amount of inputs."
for layer in self.layers:
outputs = []
for neuron in layer.neurons:
tot = neuron.sum(inputs) + neuron.weights[-1]*BIAS
outputs.append( self.sigmoid(tot) )
inputs = outputs
return outputs
def sigmoid(self, activation,response=1 ):
# the activation function
try:
return 1/(1+math.e**(-activation/response))
except OverflowError:
return float("inf")
def __str__(self):
return '\n'.join([str(i+1)+' '+str(layer) for i,layer in enumerate(self.layers)])
My input is a text file containing multiple rows. Each row contains a data-point of dimension 16 (the first 16 elements of the row) and 17th element is the class to which the data point belongs.
Input.txt
1,3,2,2,1,10,2,2,1,9,2,9,1,6,2,8,2
2,1,3,2,2,8,7,7,5,7,6,8,2,8,3,8,4
4,6,6,5,5,8,8,3,5,7,8,7,5,10,4,6,2
6,9,6,11,6,7,7,8,5,9,8,8,4,9,7,9,5
1,1,2,2,1,7,7,8,5,7,6,7,2,8,7,9,1
3,8,5,6,4,10,6,3,6,11,4,7,3,8,2,9,3
4,7,5,5,5,8,6,6,7,6,6,6,2,8,7,10,1
6,10,9,8,8,11,6,2,4,9,4,6,9,6,2,8,11
The class is always in the range [1,11]
After learning the input, the expected output for each data-point should be its corresponding class (it may not be right always).
I am creating an object of NeuralNetwork class and then trying to use the update method.
But, I am not clear how to use it properly. Please guide me how to use it for the above input.
This is my open source code
How to
You have to initialize a new network object as: network = NeuralNetwork(inputs, n_outputs, n_neurons_to_hl, n_hidden_layers).
inputs: the number of input values (in your case: 16)
n_outputs: the number of output values (in your case: 11)
n_neurons_to_hl: the number of neurons in each hidden layer
n_hidden_layers: the number of hidden layers
The update( input_signals ) method is a forward calculation on the network - that is: this is the method you wish to use to classify an input instance.