Pytorch: model weights cannot be Updated” - python

Here is a simple example:
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
A = Variable(torch.randn((1)), requires_grad=True)
self.A = nn.Parameter(A)
self.B = nn.Linear(2,2)
self.register_parameter("Ablah",self.A)
def forward(self,x):
self.B.weight.data = self.B.weight * self.A
return self.B(x)
net = Net()
input = torch.ones(2,2)
input.requires_grad_(True)
output = net(input)
optim = torch.optim.Adam(net.parameters(), lr = 0.01)
loss = torch.norm(output)
print(net.A)
loss.backward()
optim.step()
print(net.A)
I am trying to modify a PyTorch model's parameter self.B with another parameter self.A by using the code self.B.weight.data = self.B.weight * self.A, but it seems that A has no grad and it can not be updated.
It has confused me for a long time. Is there anything wrong?
Please give me some advice. Thank you!

Related

PyTorch: How to create a Parameter without specifying the dimension

Say I want to defined a module. In this module, the __init__() function will create a Parameter called self.weight without known the input_dim of the module. My question is, how can I expand the self.weight and initialize it when I first call the forward() function?
For example, I want my module looks like this:
class MyModel(torch.nn.Module):
def __init__(self, out_dim):
super(MyModel, self).__init__()
# I don't know the input_dim yet
self.weight = torch.nn.Parameter(torch.FloatTensor(None, out_dim))
self.init_weight = False
def init_parameters(self, in_dim):
# what should I do in this function?
# Is this correct?
self.weight = self.weight.expand(in_dim, -1)
torch.nn.init.xvaier_normal_(self.weight)
self.init_weight = True
def forward(self, X):
if not self.init_weight:
# first call, so now I can initialize the weight since I know the input_dim
self.init_parameters(X.shape[1])
# do some forward ops
return torch.sigmoid(torch.matmul(X, self.weight))
And my training code looks like this (The parameter self.weight is passed to the optimizer after I create the model):
def train(X_train, y_train):
model = MyModel(y_train.shape[1])
optimize = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
for epoch in range(10000):
optimize.zero_grad()
prediction = model(X_train)
loss = loss_fn(prediction, y_train)
loss.backward()
optimize.step()
After all, it works for me using the way I explained in the comments - to allocate the weights parameter right in the init_parameters function.
import torch
class MyModel(torch.nn.Module):
def __init__(self, out_dim):
super(MyModel, self).__init__()
self.weight = torch.nn.Parameter(torch.FloatTensor([0.0]))
self.out_dim = out_dim
self.init_weight = False
def init_parameters(self, in_dim):
self.weight = torch.nn.Parameter(torch.FloatTensor(in_dim, self.out_dim), requires_grad=True)
torch.nn.init.xavier_normal_(self.weight)
self.init_weight = True
def forward(self, X):
if not self.init_weight:
# first call, so now I can initialize the weight since I know the input_dim
self.init_parameters(X.shape[1])
# do some forward ops
result = torch.sigmoid(torch.matmul(X, self.weight))
print(X.shape, result.shape)
return result
def train(X_train, y_train):
model = torch.nn.Sequential(MyModel(out_dim=100), MyModel(out_dim=20))
optimize = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
for epoch in range(10000):
#print('.', end='')
optimize.zero_grad()
prediction = model(X_train)
loss = loss_fn(prediction, y_train)
loss.backward()
optimize.step()
batch_size, in_dim, out_dim = 100, 5, 20
X_train=torch.randn((batch_size, in_dim))
y_train=torch.randn((batch_size, out_dim))
train(X_train, y_train)

AttributeError: 'float' object has no attribute 'dot' : return X.dot(self.W) + self.b

I have a problem that I can not get to solve.
I have two classes, class 1 deals with the view, class 2 with the calculation.
Class 1 accesses class 2. In class 1 is an instance of class 2 to access the predict method. The method predict has one argument. I set this argument over the input field in class 1. Then this error always occurs :
AttributeError: 'float' object has no attribute 'dot' : return X.dot(self.W) + self.b
Error when i push the button
Class 1:
class Example1:
self.linearregression = LinearRegression(self,None)
test = tk.DoubleVar(self)
test.set("1.0")
self.predictbutton = ttk.Button(self.plotframe, text = "predict", command = lambda:self.linearregression.predict(test.get()))
self.predictbutton.grid(row=1, column=0)
Class 2:
class LinearRegression():
def __init__(self, learning_rate, iterations):
self.learning_rate = learning_rate
self.iterations = iterations
# Function for model training
def fit(self, X, Y):
# no_of_training_examples, no_of_features
self.m, self.n = X.shape
# weight initialization
self.W = np.zeros(self.n)
self.b = 0
self.X = X
self.Y = Y
# gradient descent learning
for i in range(self.iterations):
self.update_weights()
return self
# Helper function to update weights in gradient descent
def update_weights(self):
Y_pred = self.predict(self.X)
# calculate gradients
dW = - (2 * (self.X.T).dot(self.Y - Y_pred)) / self.m
db = - 2 * np.sum(self.Y - Y_pred) / self.m
# update weights
self.W = self.W - self.learning_rate * dW
self.b = self.b - self.learning_rate * db
return self
# Hypothetical function h( x )
def predict(self, X):
return X.dot(self.W) + self.b

Simple VAE fails to reproduce long chains of repetitive inputs

I've been coding a VAE to process one-hot encoded strings (as 248x46 arrays, where 248 = the length of a padded string and 46 = the number of possible characters). I've been trying to regenerate input strings using the following model architecture. It works well enough on strings with a variety of characters, but on strings that are just repeating characters (e.g. "ccc...ccc" or "ababab...ababab") it does really poorly, and will just output random characters. This only happens once the repeating string gets to be around 80+ characters.
I'm using BCE loss and have been testing using a single string as input with 200 epochs worth of training. So I would think that the model should be overfitting, but I will consistently get random outputs e.g. "ccc...ccc" will become something like "#I4ccci4#c44Ic45I..."
Has anyone encountered this issue before, where repeating inputs can't seem to be learned properly by a VAE? What are some possible explanations for this?
class Encoder(nn.Module):
def __init__(self, latent_dim):
super(Encoder, self).__init__()
self.latent_dim = latent_dim
self.linear = nn.Linear(in_features=46, out_features=435)
self.linear_mu = nn.Linear(in_features=435, out_features=self.latent_dim)
self.linear_var = nn.Linear(in_features=435, out_features=self.latent_dim)
def forward(self, x):
x = self.linear(x)
mu = self.linear_mu(x)
log_var = self.linear_var(x)
return mu, log_var
class Decoder(nn.Module):
def __init__(self, latent_dim):
super(Decoder, self).__init__()
self.latent_dim = latent_dim
self.linear_1 = nn.Linear(in_features=self.latent_dim, out_features=435)
self.linear_2 = nn.Linear(in_features=435, out_features=46)
def forward(self, z):
x = self.linear_1(z)
x = self.linear_2(x)
out = F.softmax(x.view(-1, 248, 46), dim=1)
return out
class VAE(nn.Module):
def __init__(self, latent_dim=100):
super(VAE, self).__init__()
self.encoder = Encoder(latent_dim)
self.decoder = Decoder(latent_dim)
def sample(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return eps * std + mu
def forward(self, x):
mu, log_var = self.encoder(x)
z = self.sample(mu, log_var)
out = self.decoder(z)
return out, mu, log_var

tf.function causes out of scope error when inputs passed to add_loss

Can loss terms be manually added with add_loss inside a tensorflow graph? The below example for using add_loss is largely copied from
https://www.tensorflow.org/guide/keras/custom_layers_and_models#the_add_loss_method
but with #tf.function added to the layer's call method.
import tensorflow as tf
from tensorflow import keras
def main():
layer = ActivityRegularizationLayer()
inputs = tf.constant(5.)
with tf.GradientTape() as tape:
y = layer(inputs)
loss = tf.reduce_mean(y)
loss += sum(layer.losses)
grad = tape.gradient(loss, layer.trainable_weights)
print(f"loss={float(loss)}, grad={grad}")
class ActivityRegularizationLayer(keras.layers.Layer):
def __init__(self, rate=1e-2):
super().__init__()
self.rate = rate
#tf.function
def call(self, inputs):
self.add_loss(self.rate * tf.reduce_sum(inputs))
return inputs
Running the above leads to the error
The tensor <tf.Tensor 'mul:0' shape=() dtype=float32> cannot be accessed from here, because it was defined in FuncGraph(name=call, id=46917885252656), which is out of scope.
Removing the decorator makes things run successfully
loss=5.050000190734863, grad=[]
as does removing the line adding sum(layer.losses) to the total loss
loss=5.0, grad=[]
Additional details
python 3.9.12
tensorflow 2.8.0
This is addressed here:
https://github.com/tensorflow/tensorflow/issues/32058#issuecomment-592664998
In summary, this is a known behavior and the solution is to "wrap your whole training step or training loop in a tf.function"
def main():
model = MyModel()
inputs = tf.constant(5.)
loss, grad = model.train_step(inputs)
print(f"loss={float(loss)}, grad={grad}")
class MyModel(keras.models.Model):
def __init__(self):
super().__init__()
self.reg = ActivityRegularizationLayer()
def call(self, inputs):
return self.reg(inputs)
#tf.function
def train_step(self, data):
with tf.GradientTape() as tape:
y = self(data)
loss = tf.reduce_mean(y)
loss += sum(self.losses)
grad = tape.gradient(loss, self.trainable_weights)
return loss, grad
class ActivityRegularizationLayer(keras.layers.Layer):
def __init__(self, rate=1e-2):
super().__init__()
self.rate = rate
def call(self, inputs):
self.add_loss(self.rate * tf.reduce_sum(inputs))
return inputs

using ModuleList, still getting ValueError: optimizer got an empty parameter list

With Pytorch I am attempting to use ModuleList to ensure model parameters are detected, and can be optimized. When calling the SGD optimizer I get the following error:
ValueError: optimizer got an empty parameter list
Can you please review the code below and advise?
class LR(nn.Module):
def ___init___(self):
super(LR, self).___init___()
self.linear = nn.ModuleList()
self.linear.append(nn.Linear(in_features=28*28, out_features=128, bias=True))
def forward(self, x):
y_p = torch.sigmoid(self.linear(x))
return y_p
LR_model = LR()
optimizer = torch.optim.SGD(params = LR_model.parameters(), lr=learn_rate)
This seems to be a copy-paste issue: your __init__ has 3 underscores instead of 2, both at __init__(self) and super(LR, self).__init__(). Thus the init itself failed. Delete the extra underscores and try again or try the below code:
class LR(nn.Module):
def __init__(self):
super(LR, self).__init__()
self.linear = nn.ModuleList()
self.linear.append(nn.Linear(in_features=28*28,
out_features=128,
bias=True))
def forward(self, x):
y_p = torch.sigmoid(self.linear(x))
return y_p
LR_model = LR()
optimizer = torch.optim.SGD(params = list(LR_model.parameters()),
lr=learn_rate)

Categories

Resources