Say I want to defined a module. In this module, the __init__() function will create a Parameter called self.weight without known the input_dim of the module. My question is, how can I expand the self.weight and initialize it when I first call the forward() function?
For example, I want my module looks like this:
class MyModel(torch.nn.Module):
def __init__(self, out_dim):
super(MyModel, self).__init__()
# I don't know the input_dim yet
self.weight = torch.nn.Parameter(torch.FloatTensor(None, out_dim))
self.init_weight = False
def init_parameters(self, in_dim):
# what should I do in this function?
# Is this correct?
self.weight = self.weight.expand(in_dim, -1)
torch.nn.init.xvaier_normal_(self.weight)
self.init_weight = True
def forward(self, X):
if not self.init_weight:
# first call, so now I can initialize the weight since I know the input_dim
self.init_parameters(X.shape[1])
# do some forward ops
return torch.sigmoid(torch.matmul(X, self.weight))
And my training code looks like this (The parameter self.weight is passed to the optimizer after I create the model):
def train(X_train, y_train):
model = MyModel(y_train.shape[1])
optimize = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
for epoch in range(10000):
optimize.zero_grad()
prediction = model(X_train)
loss = loss_fn(prediction, y_train)
loss.backward()
optimize.step()
After all, it works for me using the way I explained in the comments - to allocate the weights parameter right in the init_parameters function.
import torch
class MyModel(torch.nn.Module):
def __init__(self, out_dim):
super(MyModel, self).__init__()
self.weight = torch.nn.Parameter(torch.FloatTensor([0.0]))
self.out_dim = out_dim
self.init_weight = False
def init_parameters(self, in_dim):
self.weight = torch.nn.Parameter(torch.FloatTensor(in_dim, self.out_dim), requires_grad=True)
torch.nn.init.xavier_normal_(self.weight)
self.init_weight = True
def forward(self, X):
if not self.init_weight:
# first call, so now I can initialize the weight since I know the input_dim
self.init_parameters(X.shape[1])
# do some forward ops
result = torch.sigmoid(torch.matmul(X, self.weight))
print(X.shape, result.shape)
return result
def train(X_train, y_train):
model = torch.nn.Sequential(MyModel(out_dim=100), MyModel(out_dim=20))
optimize = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
for epoch in range(10000):
#print('.', end='')
optimize.zero_grad()
prediction = model(X_train)
loss = loss_fn(prediction, y_train)
loss.backward()
optimize.step()
batch_size, in_dim, out_dim = 100, 5, 20
X_train=torch.randn((batch_size, in_dim))
y_train=torch.randn((batch_size, out_dim))
train(X_train, y_train)
Related
Can loss terms be manually added with add_loss inside a tensorflow graph? The below example for using add_loss is largely copied from
https://www.tensorflow.org/guide/keras/custom_layers_and_models#the_add_loss_method
but with #tf.function added to the layer's call method.
import tensorflow as tf
from tensorflow import keras
def main():
layer = ActivityRegularizationLayer()
inputs = tf.constant(5.)
with tf.GradientTape() as tape:
y = layer(inputs)
loss = tf.reduce_mean(y)
loss += sum(layer.losses)
grad = tape.gradient(loss, layer.trainable_weights)
print(f"loss={float(loss)}, grad={grad}")
class ActivityRegularizationLayer(keras.layers.Layer):
def __init__(self, rate=1e-2):
super().__init__()
self.rate = rate
#tf.function
def call(self, inputs):
self.add_loss(self.rate * tf.reduce_sum(inputs))
return inputs
Running the above leads to the error
The tensor <tf.Tensor 'mul:0' shape=() dtype=float32> cannot be accessed from here, because it was defined in FuncGraph(name=call, id=46917885252656), which is out of scope.
Removing the decorator makes things run successfully
loss=5.050000190734863, grad=[]
as does removing the line adding sum(layer.losses) to the total loss
loss=5.0, grad=[]
Additional details
python 3.9.12
tensorflow 2.8.0
This is addressed here:
https://github.com/tensorflow/tensorflow/issues/32058#issuecomment-592664998
In summary, this is a known behavior and the solution is to "wrap your whole training step or training loop in a tf.function"
def main():
model = MyModel()
inputs = tf.constant(5.)
loss, grad = model.train_step(inputs)
print(f"loss={float(loss)}, grad={grad}")
class MyModel(keras.models.Model):
def __init__(self):
super().__init__()
self.reg = ActivityRegularizationLayer()
def call(self, inputs):
return self.reg(inputs)
#tf.function
def train_step(self, data):
with tf.GradientTape() as tape:
y = self(data)
loss = tf.reduce_mean(y)
loss += sum(self.losses)
grad = tape.gradient(loss, self.trainable_weights)
return loss, grad
class ActivityRegularizationLayer(keras.layers.Layer):
def __init__(self, rate=1e-2):
super().__init__()
self.rate = rate
def call(self, inputs):
self.add_loss(self.rate * tf.reduce_sum(inputs))
return inputs
I am using PyTorch to train a deep learning model. I wonder if it is possible for me to separately save the model weight. For example:
class my_model(nn.Module):
def __init__(self):
super(my_model, self).__init__()
self.bert = transformers.AutoModel.from_pretrained(BERT_PATH)
self.out = nn.Linear(768,1)
def forward(self, ids, mask, token_type):
x = self.bert(ids, mask, token_type)[1]
x = self.out(x)
return x
I have the BERT model as the base model and an additional linear layer on the top. After I train this model, can I save the weight for the BERT model and this linear layer separately?
Alternatively to the previous answer, You can create two separated class of nn.module. One for the BERT model and another one for the linear layer:
class bert_model(nn.Module):
def __init__(self):
super(bert_model, self).__init__()
self.bert = transformers.AutoModel.from_pretrained(BERT_PATH)
def forward(self, ids, mask, token_type):
x = self.bert(ids, mask, token_type)[1]
return x
class linear_layer(nn.Module):
def __init__(self):
super(linear_layer, self).__init__()
self.out = nn.Linear(768,1)
def forward(self, x):
x = self.out(x)
return x
Then you can save the two part of the model separately with:
bert_model = bert_model()
linear_layer = linear_layer()
#train
torch.save(bert_model.state_dict(), PATH)
torch.save(linear_layer.state_dict(), PATH)
You can:
model = my_model()
# train ...
torch.save({'bert': model.bert.state_dict(), 'out': model.out.state_dict()}, 'checkpoint.pth')
Here is a simple example:
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
A = Variable(torch.randn((1)), requires_grad=True)
self.A = nn.Parameter(A)
self.B = nn.Linear(2,2)
self.register_parameter("Ablah",self.A)
def forward(self,x):
self.B.weight.data = self.B.weight * self.A
return self.B(x)
net = Net()
input = torch.ones(2,2)
input.requires_grad_(True)
output = net(input)
optim = torch.optim.Adam(net.parameters(), lr = 0.01)
loss = torch.norm(output)
print(net.A)
loss.backward()
optim.step()
print(net.A)
I am trying to modify a PyTorch model's parameter self.B with another parameter self.A by using the code self.B.weight.data = self.B.weight * self.A, but it seems that A has no grad and it can not be updated.
It has confused me for a long time. Is there anything wrong?
Please give me some advice. Thank you!
With Pytorch I am attempting to use ModuleList to ensure model parameters are detected, and can be optimized. When calling the SGD optimizer I get the following error:
ValueError: optimizer got an empty parameter list
Can you please review the code below and advise?
class LR(nn.Module):
def ___init___(self):
super(LR, self).___init___()
self.linear = nn.ModuleList()
self.linear.append(nn.Linear(in_features=28*28, out_features=128, bias=True))
def forward(self, x):
y_p = torch.sigmoid(self.linear(x))
return y_p
LR_model = LR()
optimizer = torch.optim.SGD(params = LR_model.parameters(), lr=learn_rate)
This seems to be a copy-paste issue: your __init__ has 3 underscores instead of 2, both at __init__(self) and super(LR, self).__init__(). Thus the init itself failed. Delete the extra underscores and try again or try the below code:
class LR(nn.Module):
def __init__(self):
super(LR, self).__init__()
self.linear = nn.ModuleList()
self.linear.append(nn.Linear(in_features=28*28,
out_features=128,
bias=True))
def forward(self, x):
y_p = torch.sigmoid(self.linear(x))
return y_p
LR_model = LR()
optimizer = torch.optim.SGD(params = list(LR_model.parameters()),
lr=learn_rate)
I'm attempting to reassigning the weights of a neuron to the weights it has in a previous epoch. In order to do this, I'm trying to save old weights but I'm having trouble making copies of the matrices.
I don't know how to copy an eager tensor within a custom layer. I tried using tf.identity and the copy library but both gave me errors although it is definitely possible I didn't implement it correctly. Any advice would be appreciated. I attached one of the errors below which says it should work if eager execution is on which confused me since I'm using tensorflow 2 and eager execution should be on by default.
class RevertWeightMatrixDenseLayer(keras.layers.Layer):
def __init__(self, units, prob, **kwargs):
super(RevertWeightMatrixDenseLayer, self).__init__(**kwargs)
self.units = units
self.prob = prob
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,), initializer="random_normal", trainable=True
)
self.last_weight_1 = self.w
self.last_weight_2 = self.w
def call(self, inputs, training=False):
current_weights = self.w
if training:
if self.prob > random.random():
self.w.assign(self.last_weight_2) # Assign preserves tf.Variable
#deep copy all the weights here here before assignement
self.last_weight_2 = self.last_weight_1
self.last_weight_1 = current_weights
else:
pass #could think about multiplying all weights by a constant here
return tf.nn.relu(tf.matmul(inputs, self.w) + self.b)
model = make_base_model() #sets up a sequential model with some conv layers
model.add(ResetWeightMatrixDenseLayer(units=dense_units, prob=0.1)) #Custom layer
model.add(ResetWeightMatrixDenseLayer(units=dense_units, prob=0.1)) #Custom layer
model.add(layers.Dense(classes, activation='softmax'))
model.compile(loss = 'CategoricalCrossentropy',
optimizer = 'adam',
metrics=['accuracy'])
history = model.fit(train_dataset, validation_data=validation_dataset, epochs=epochs)
plot(history)
Attempting to deepcopy where I commented leads to the following error * NotImplementedError: deepcopy() is only available when eager execution is enabled.*
You want to keep states in your layer: that's exactly what tf.Variable are for. (See the guide: Introduction to Variables)
Set your last_weights as a non trainable tf.Variable, and use assign to copy the values around.
class RevertWeightMatrixDenseLayer(keras.layers.Layer):
def __init__(self, units, prob, **kwargs):
super(RevertWeightMatrixDenseLayer, self).__init__(**kwargs)
self.units = units
self.prob = prob
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,), initializer="random_normal", trainable=True
)
self.last_weight_1 = tf.Variable(self.w, trainable=False)
self.last_weight_2 = tf.Variable(self.w, trainable=False)
# we need an extra Variable to store the original value of w
# when shuffling around
self.tmp = tf.Variable(self.w, trainable=False)
def call(self, inputs, training=False):
self.tmp.assign(self.w)
if training:
if self.prob > random.random():
self.w.assign(self.last_weight_2) # Assign preserves tf.Variable
self.last_weight_2.assign(self.last_weight_1)
self.last_weight_1.assign(self.tmp)
else:
pass #could think about multiplying all weights by a constant here
return tf.nn.relu(tf.matmul(inputs, self.w) + self.b)