PyTorch computes gradients as None

PyTorch computes gradients as None - python

class Parent(nn.Module):
def __init__(self,in_features,z_dim, img_dim):
super().__init__()
self.my_child1 = Child1 (z_dim, img_dim)
self.my_child2 = Child2 (in_features)
def forward(self,input):
input=self.my_child1(input)
input=self.my_child2(input)
return input
def forward1(self,input):
input=self.my_child1(input)
return input
def forward2(self,input):
input=self.my_child2(input)
return input
class Child2(nn.Module):
def __init__(self, in_features):
super().__init__()
self.child2 = nn.Sequential(
nn.Linear(in_features, 128),
nn.LeakyReLU(0.01),
nn.Linear(128, 1),
nn.Sigmoid(),
)
def forward(self, x):
return self.child2(x)
class Child1(nn.Module):
def __init__(self, z_dim, img_dim):
super().__init__()
self.child1 = nn.Sequential(
nn.Linear(z_dim, 256),
nn.LeakyReLU(0.01),
nn.Linear(256, img_dim),
nn.Tanh(),
)
def forward(self, x):
return self.child1(x)
criterion=nn.BCELoss()
noise = torch.randn(batch_size, z_dim).to(device)
model=Parent(in_features,z_dim, img_dim)
output1=model(noise)
loss1=criterion(output1,torch.ones_like(output1))
loss2=criterion(output1,torch.zeroes_like(output1))
loss3=(loss1+loss2)/2
model.zero_grad()
loss3.backward(retain_graph=True)
print(loss3.grad)
I have not used any optimizer here because updating the parameters are done using a seperate formula which I will employ only after I get the gradients. The formula requires the gradients to be stored in a matrix. However, the gradient always prints “None”.

You can get the computed gradient for every parameter in your model with:
gradient = [el.grad for el in model.parameters()]
print(gradient)

Related

Why does PyGad fitness_function not work when inside of a class?

I am trying to train a genetic algorithm but for some reason it does not work when it's stored inside of a class. I have two equivalent pieces of code but the one stored inside of a class fails. It returns this..
raise ValueError("The fitness function must accept 2 parameters:
1) A solution to calculate its fitness value.
2) The solution's index within the population.
The passed fitness function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount))
ValueError: The fitness function must accept 2 parameters:
1) A solution to calculate its fitness value.
2) The solution's index within the population.
The passed fitness function named 'fitness_func' accepts 3 parameter(s).
Here is the simplified version of the one that doesnt work.
import torch
import torch.nn as nn
import pygad.torchga
import pygad
class NN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.linear2 = nn.Linear(hidden_size, hidden_size)
self.linear3 = nn.Linear(hidden_size, hidden_size)
self.linear4 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.linear1(x)
x = self.linear2(x)
x = self.linear3(x)
x = self.linear4(x)
return x
class Coin:
def __init__(self):
self.NeuralNet = NN(1440, 1440, 3)
def fitness_func(self, solution, solution_idx):
return 0
def trainModel(self):
torch_ga = pygad.torchga.TorchGA(model=self.NeuralNet, num_solutions=10)
ga_instance = pygad.GA(num_generations=10,
num_parents_mating=2,
initial_population=torch_ga.population_weights,
fitness_func=self.fitness_func)
ga_instance.run()
if __name__ == "__main__":
coin = Coin()
coin.trainModel()
Here is the simplified version of the one that does work.
import torch
import torch.nn as nn
import pygad.torchga
import pygad
class NN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.linear2 = nn.Linear(hidden_size, hidden_size)
self.linear3 = nn.Linear(hidden_size, hidden_size)
self.linear4 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.linear1(x)
x = self.linear2(x)
x = self.linear3(x)
x = self.linear4(x)
return x
def fitness_func(solution, solution_idx):
return 0
def trainModel():
NeuralNet = NN(1440, 1440, 3)
torch_ga = pygad.torchga.TorchGA(model=NeuralNet, num_solutions=10)
ga_instance = pygad.GA(num_generations=10,
num_parents_mating=2,
initial_population=torch_ga.population_weights,
fitness_func=fitness_func)
ga_instance.run()
if __name__ == "__main__":
trainModel()
Both of these should work the same but they don't

When you look at the pygad code you can see it's explicitly checking that the fitness function has exactly two parameters:
# Check if the fitness function accepts 2 paramaters.
if (fitness_func.__code__.co_argcount == 2):
self.fitness_func = fitness_func
else:
self.valid_parameters = False
raise ValueError("The fitness function must accept 2 parameters:\n1) A solution to calculate its fitness value.\n2) The solution's index within the population.\n\nThe passed fitness function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount))
So if you want to use it in a class you'll need to make it a static method so you aren't required to pass in self:
#staticmethod
def fitness_func(solution, solution_idx):
return 0

Forcing NN weights to always be in a certain range

I have a simple model:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(3, 10)
self.fc2 = nn.Linear(10, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.tanh(self.fc3(x))
return x
net = Model()
How can I keep the weights to always be between a certain value (eg -1,1)?
I tried the following:
self.fc1 = torch.tanh(nn.Linear(3, 10))
Which I'm not entirely sure that will always keep them in that value (even if the gradient update is trying to push them farther).
But got the following error:
TypeError: tanh(): argument 'input' (position 1) must be Tensor, not Linear

According to the discuss.pytorch you can create extra class to clip weights between a given range. Link to the discussion.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(3, 10)
self.fc2 = nn.Linear(10, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.tanh(self.fc3(x))
return x
You should add weight clipper:
class WeightClipper(object):
def __call__(self, module):
# filter the variables to get the ones you want
if hasattr(module, 'weight'):
w = module.weight.data
w = w.clamp(-1,1)
module.weight.data = w
model = Model()
clipper = WeightClipper()
model.apply(clipper)

How to use custom torch.autograd.Function in nn.Sequential model

Is there any way that I can use custom torch.autograd.Function in a nn.Sequential object or should I use explicitly an nn.Module object with forward function. Specifically I am trying to implement a sparse autoencoder and I need to add L1 distance of the code(hidden representation) to the loss.
I have defined custom torch.autograd.Function L1Penalty below then tried to use it inside a nn.Sequential object as below. However when I run I got the error TypeError: __main__.L1Penalty is not a Module subclass How can I solve this issue?
class L1Penalty(torch.autograd.Function):
#staticmethod
def forward(ctx, input, l1weight = 0.1):
ctx.save_for_backward(input)
ctx.l1weight = l1weight
return input, None
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_variables
grad_input = input.clone().sign().mul(ctx.l1weight)
grad_input+=grad_output
return grad_input
model = nn.Sequential(
nn.Linear(10, 10),
nn.ReLU(),
nn.Linear(10, 6),
nn.ReLU(),
# sparsity
L1Penalty(),
nn.Linear(6, 10),
nn.ReLU(),
nn.Linear(10, 10),
nn.ReLU()
).to(device)

The right way to do that would be this
import torch, torch.nn as nn
class L1Penalty(torch.autograd.Function):
#staticmethod
def forward(ctx, input, l1weight = 0.1):
ctx.save_for_backward(input)
ctx.l1weight = l1weight
return input
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_variables
grad_input = input.clone().sign().mul(ctx.l1weight)
grad_input+=grad_output
return grad_input
Creating a Lambda class that acts as a wrapper
class Lambda(nn.Module):
"""
Input: A Function
Returns : A Module that can be used
inside nn.Sequential
"""
def __init__(self, func):
super().__init__()
self.func = func
def forward(self, x): return self.func(x)
TA-DA!
model = nn.Sequential(
nn.Linear(10, 10),
nn.ReLU(),
nn.Linear(10, 6),
nn.ReLU(),
# sparsity
Lambda(L1Penalty.apply),
nn.Linear(6, 10),
nn.ReLU(),
nn.Linear(10, 10),
nn.ReLU())
a = torch.rand(50,10)
b = model(a)
print(b.shape)

The nn.Module API seems to work fine but you should not return None in your L1Penalty forward method.
import torch, torch.nn as nn
class L1Penalty(torch.autograd.Function):
#staticmethod
def forward(ctx, input, l1weight = 0.1):
ctx.save_for_backward(input)
ctx.l1weight = l1weight
return input
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_variables
grad_input = input.clone().sign().mul(ctx.l1weight)
grad_input+=grad_output
return grad_input
class Model(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(10,10)
self.fc2 = nn.Linear(10,6)
self.fc3 = nn.Linear(6,10)
self.fc4 = nn.Linear(10,10)
self.relu = nn.ReLU(inplace=True)
self.penalty = L1Penalty()
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.penalty.apply(x)
x = self.fc3(x)
x = self.relu(x)
x = self.fc4(x)
x = self.relu(x)
return x
model = Model()
a = torch.rand(50,10)
b = model(a)
print(b.shape)

What is the difference between these two neural network structures?

first using nn.Parameter
class ModelOne(nn.Module):
def __init__(self):
super().__init__()
self.weights = nn.Parameter(torch.randn(300, 10))
self.bias = nn.Parameter(torch.zeros(10))
def forward(self, x):
return x # self.weights + self.bias
when I do
mo = ModelOne()
[len(param) for param in mo.parameters()]
it gives
[300, 10]
second using nn.Linear
class ModelTwo(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(300, 10)
def forward(self, x):
return self.linear(x)
same thing here gives
[10, 10]

The difference lies in how nn.Linear initializes weights and bias:
class Linear(Module):
def __init__(self, in_features, out_features, bias=True):
super(Linear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.Tensor(out_features, in_features))
if bias:
self.bias = Parameter(torch.Tensor(out_features))
...
So, when you write nn.Linear(300, 10) the weight is (10, 300) and bias is (10). But, in the ModelOne, weights has dimension (300, 10).
You can confirm it using
for name, param in mo.named_parameters():
print(name, param.shape)
The output in ModelOne:
weights torch.Size([300, 10])
bias torch.Size([10])
In ModelTwo:
linear.weight torch.Size([10, 300])
linear.bias torch.Size([10])
Now, the reason you're getting [300, 10] in first case and [10, 10] in second case is because if you print the length of a 2d Tensor, then it'll only give its first dimension i.e.
a = torch.Tensor(10, 300)
b = torch.Tensor(10)
print(len(a), len(b))
(10, 10)

inconsistent error of : TypeError: super(type, obj): obj must be an instance or subtype of type

I have a python code with the following two classes.
import torch
import torch.nn as nn
import torch.nn.functional as F
class QNet_baseline(nn.Module):
"""
A MLP with 2 hidden layer
observation_dim (int): number of observation features
action_dim (int): Dimension of each action
seed (int): Random seed
"""
def __init__(self, observation_dim, action_dim, seed):
super(QNet_baseline, self).__init__()
self.seed = torch.manual_seed(seed)
self.fc1 = nn.Linear(observation_dim, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, action_dim)
def forward(self, observations):
"""
Forward propagation of neural network
"""
x = F.relu(self.fc1(observations))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
class QNet_3hidden(nn.Module):
"""
A MLP with 3 hidden layer
observation_dim (int): number of observation features
action_dim (int): Dimension of each action
seed (int): Random seed
"""
def __init__(self, observation_dim, action_dim, seed):
super(QNet_3hidden, self).__init__()
self.seed = torch.manual_seed(seed)
self.fc1 = nn.Linear(observation_dim, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 64)
self.fc4 = nn.Linear(64, action_dim)
def forward(self, observations):
"""
Forward propagation of neural network
"""
x = F.relu(self.fc1(observations))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
return x
I used the same code to instantiate both classes. QNet_baseline works fine, but I got the following error for QNet_3hidden. Why would QNet_baseline works but QNet_3hidden has an error? What did I miss here? Thanks!
/home/workspace/QNetworks.py in __init__(self, observation_dim, action_dim, seed)
44
45 def __init__(self, observation_dim, action_dim, seed):
---> 46 super(QNet_3hidden, self).__init__()
47 self.seed = torch.manual_seed(seed)
48 self.fc1 = nn.Linear(observation_dim, 128)
TypeError: super(type, obj): obj must be an instance or subtype of type
Also, below is how the two classes instantiated:
class DDQN_Agent():
"""Interacts with and learns from the environment.
Attributes:
state_size (int): dimension of each state
action_size (int): dimension of each action
seed (int): random seed
"""
def __init__(self, state_size, action_size, seed, qnet="baseline", filename=None):
"""Initialize an Agent object.
Args:
filename: path of .pth file with trained weights
"""
self.state_size = state_size
self.action_size = action_size
self.seed = random.seed(seed)
# Q-Network
if qnet=="3hidden":
self.qnetwork_local = QNet_3hidden(state_size, action_size, seed).to(device)
self.qnetwork_target = QNet_3hidden(state_size, action_size, seed).to(device)
self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
else:
self.qnetwork_local = QNet_baseline(state_size, action_size, seed).to(device)
self.qnetwork_target = QNet_baseline(state_size, action_size, seed).to(device)
self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
if filename:
weights = torch.load(filename)
self.qnetwork_local.load_state_dict(weights)
self.qnetwork_target.load_state_dict(weights)
# Replay memory
self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
# Initialize time step (for updating every UPDATE_EVERY steps)
self.t_step = 0

I encountered a similar problem, and completely restarting the kernel helped.
As suggested in this Comment by keitakurita:
Are you running the code in a Jupyter notebook and have not restarted the kernel? If so, there's a chance that your kernel is referencing the wrong class.
I suspect this might be the reason because I encountered the error after rewriting the class.
This would also explain why this is a hard-to-reproduce error. Followings a list of similar questions asked to help keep a track of the same:
Gitmemory
From July '20
From Feb '18

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

PyTorch computes gradients as None - python

You can get the computed gradient for every parameter in your model with: gradient = [el.grad for el in model.parameters()] print(gradient)

Related

Why does PyGad fitness_function not work when inside of a class?

Forcing NN weights to always be in a certain range

How to use custom torch.autograd.Function in nn.Sequential model

What is the difference between these two neural network structures?

inconsistent error of : TypeError: super(type, obj): obj must be an instance or subtype of type

Categories

Resources