I have a simple model:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(3, 10)
self.fc2 = nn.Linear(10, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.tanh(self.fc3(x))
return x
net = Model()
How can I keep the weights to always be between a certain value (eg -1,1)?
I tried the following:
self.fc1 = torch.tanh(nn.Linear(3, 10))
Which I'm not entirely sure that will always keep them in that value (even if the gradient update is trying to push them farther).
But got the following error:
TypeError: tanh(): argument 'input' (position 1) must be Tensor, not Linear
According to the discuss.pytorch you can create extra class to clip weights between a given range. Link to the discussion.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(3, 10)
self.fc2 = nn.Linear(10, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.tanh(self.fc3(x))
return x
You should add weight clipper:
class WeightClipper(object):
def __call__(self, module):
# filter the variables to get the ones you want
if hasattr(module, 'weight'):
w = module.weight.data
w = w.clamp(-1,1)
module.weight.data = w
model = Model()
clipper = WeightClipper()
model.apply(clipper)
Related
I am trying to train a genetic algorithm but for some reason it does not work when it's stored inside of a class. I have two equivalent pieces of code but the one stored inside of a class fails. It returns this..
raise ValueError("The fitness function must accept 2 parameters:
1) A solution to calculate its fitness value.
2) The solution's index within the population.
The passed fitness function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount))
ValueError: The fitness function must accept 2 parameters:
1) A solution to calculate its fitness value.
2) The solution's index within the population.
The passed fitness function named 'fitness_func' accepts 3 parameter(s).
Here is the simplified version of the one that doesnt work.
import torch
import torch.nn as nn
import pygad.torchga
import pygad
class NN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.linear2 = nn.Linear(hidden_size, hidden_size)
self.linear3 = nn.Linear(hidden_size, hidden_size)
self.linear4 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.linear1(x)
x = self.linear2(x)
x = self.linear3(x)
x = self.linear4(x)
return x
class Coin:
def __init__(self):
self.NeuralNet = NN(1440, 1440, 3)
def fitness_func(self, solution, solution_idx):
return 0
def trainModel(self):
torch_ga = pygad.torchga.TorchGA(model=self.NeuralNet, num_solutions=10)
ga_instance = pygad.GA(num_generations=10,
num_parents_mating=2,
initial_population=torch_ga.population_weights,
fitness_func=self.fitness_func)
ga_instance.run()
if __name__ == "__main__":
coin = Coin()
coin.trainModel()
Here is the simplified version of the one that does work.
import torch
import torch.nn as nn
import pygad.torchga
import pygad
class NN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.linear2 = nn.Linear(hidden_size, hidden_size)
self.linear3 = nn.Linear(hidden_size, hidden_size)
self.linear4 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.linear1(x)
x = self.linear2(x)
x = self.linear3(x)
x = self.linear4(x)
return x
def fitness_func(solution, solution_idx):
return 0
def trainModel():
NeuralNet = NN(1440, 1440, 3)
torch_ga = pygad.torchga.TorchGA(model=NeuralNet, num_solutions=10)
ga_instance = pygad.GA(num_generations=10,
num_parents_mating=2,
initial_population=torch_ga.population_weights,
fitness_func=fitness_func)
ga_instance.run()
if __name__ == "__main__":
trainModel()
Both of these should work the same but they don't
When you look at the pygad code you can see it's explicitly checking that the fitness function has exactly two parameters:
# Check if the fitness function accepts 2 paramaters.
if (fitness_func.__code__.co_argcount == 2):
self.fitness_func = fitness_func
else:
self.valid_parameters = False
raise ValueError("The fitness function must accept 2 parameters:\n1) A solution to calculate its fitness value.\n2) The solution's index within the population.\n\nThe passed fitness function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount))
So if you want to use it in a class you'll need to make it a static method so you aren't required to pass in self:
#staticmethod
def fitness_func(solution, solution_idx):
return 0
Is there any way to use a "pre-trained model as an layer" in a custom net?
Pseudocode:
pretrained_model = torch.load('model')
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.model_layer = pretrained_model # ?
self.fc1 = nn.Linear(num_classes_of_model_layer, 320)
self.fc2 = nn.Linear(320, 160)
self.fc3 = nn.Linear(160, num_classes)
def forward(self, x):
x = pretrained_model. # ?
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
Yes you can absolutely use another model a part of your Module, as the other model is also a Module
Do:
self.model_layer = pretrained_model
and make inference as always with x = self.model_layer(x)
I want to implement learned size quantization algorithm. And I create a quante Linear layer
class QLinear(nn.Module):
def __init__(self, input_dim, out_dim, bits=8):
super(QLinear, self).__init__()
# create a tensor requires_grad=True
self.up = 2 ** bits - 1
self.down = 0
self.fc = nn.Linear(input_dim, out_dim)
weight = self.fc.weight.data
self.scale = nn.Parameter(torch.Tensor((torch.max(weight) - torch.min(weight)) / (self.up - self.down)), requires_grad=True)
self.zero_point = nn.Parameter(torch.Tensor(self.down - (torch.min(weight) / self.scale).round()), requires_grad=True)
def forward(self, x):
weight = self.fc.weight
quant_weight = (round_ste(weight / self.scale) + self.zero_point)
quant_weight = torch.clamp(quant_weight, self.down, self.up)
dequant_weight = ((quant_weight - self.zero_point) * self.scale)
self.fc.weight.data = dequant_weight
return self.fc(x)
class QNet(nn.Module):
def __init__(self):
super(QNet, self).__init__()
self.fc1 = QLinear(28 * 28, 100)
self.fc2 = QLinear(100, 10)
def forward(self, x):
x = x.view(-1, 28 * 28)
x = F.relu(self.fc1(x))
x = self.fc2(x)
x = F.softmax(x)
return x
when I train this network,scale's grad always return None. Why this happen and how can i solve it?
The issue is that you are passing dequant_weight through data attribute of your parameter which ends up not being registered by autograd. A simple alternative would be to handle weight as a nn.Parameter and apply a linear operator manually in the forward definition directly with the computed weight dequant_weight.
Here is a minimal example that should work:
class QLinear(nn.Module):
def __init__(self, input_dim, out_dim, bits=8):
super().__init__()
self.up = 2 ** bits - 1
self.down = 0
self.weight = nn.Parameter(torch.rand(out_dim, input_dim))
self.scale = nn.Parameter(
torch.Tensor((self.weight.max() - self.weight.min()) / (self.up - self.down)))
self.zero_point = nn.Parameter(
torch.Tensor(self.down - (self.weight.min() / self.scale).round()))
def forward(self, x):
quant_weight = (torch.round(self.weight / self.scale) + self.zero_point)
quant_weight = torch.clamp(quant_weight, self.down, self.up)
dequant_weight = ((quant_weight - self.zero_point) * self.scale)
return F.linear(x, dequant_weight)
Side notes:
nn.Parameter requires gradient computation by default (no need to provide requires_grad=True.
Additionally you can reformat QNet by inheriting from nn.Sequential to avoid boilerplate code:
class QNet(nn.Sequential):
def __init__(self):
super().__init__(nn.Flatten(),
QLinear(28 * 28, 100),
nn.ReLU(),
QLinear(100, 10),
nn.Softmax())
Is there any way that I can use custom torch.autograd.Function in a nn.Sequential object or should I use explicitly an nn.Module object with forward function. Specifically I am trying to implement a sparse autoencoder and I need to add L1 distance of the code(hidden representation) to the loss.
I have defined custom torch.autograd.Function L1Penalty below then tried to use it inside a nn.Sequential object as below. However when I run I got the error TypeError: __main__.L1Penalty is not a Module subclass How can I solve this issue?
class L1Penalty(torch.autograd.Function):
#staticmethod
def forward(ctx, input, l1weight = 0.1):
ctx.save_for_backward(input)
ctx.l1weight = l1weight
return input, None
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_variables
grad_input = input.clone().sign().mul(ctx.l1weight)
grad_input+=grad_output
return grad_input
model = nn.Sequential(
nn.Linear(10, 10),
nn.ReLU(),
nn.Linear(10, 6),
nn.ReLU(),
# sparsity
L1Penalty(),
nn.Linear(6, 10),
nn.ReLU(),
nn.Linear(10, 10),
nn.ReLU()
).to(device)
The right way to do that would be this
import torch, torch.nn as nn
class L1Penalty(torch.autograd.Function):
#staticmethod
def forward(ctx, input, l1weight = 0.1):
ctx.save_for_backward(input)
ctx.l1weight = l1weight
return input
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_variables
grad_input = input.clone().sign().mul(ctx.l1weight)
grad_input+=grad_output
return grad_input
Creating a Lambda class that acts as a wrapper
class Lambda(nn.Module):
"""
Input: A Function
Returns : A Module that can be used
inside nn.Sequential
"""
def __init__(self, func):
super().__init__()
self.func = func
def forward(self, x): return self.func(x)
TA-DA!
model = nn.Sequential(
nn.Linear(10, 10),
nn.ReLU(),
nn.Linear(10, 6),
nn.ReLU(),
# sparsity
Lambda(L1Penalty.apply),
nn.Linear(6, 10),
nn.ReLU(),
nn.Linear(10, 10),
nn.ReLU())
a = torch.rand(50,10)
b = model(a)
print(b.shape)
The nn.Module API seems to work fine but you should not return None in your L1Penalty forward method.
import torch, torch.nn as nn
class L1Penalty(torch.autograd.Function):
#staticmethod
def forward(ctx, input, l1weight = 0.1):
ctx.save_for_backward(input)
ctx.l1weight = l1weight
return input
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_variables
grad_input = input.clone().sign().mul(ctx.l1weight)
grad_input+=grad_output
return grad_input
class Model(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(10,10)
self.fc2 = nn.Linear(10,6)
self.fc3 = nn.Linear(6,10)
self.fc4 = nn.Linear(10,10)
self.relu = nn.ReLU(inplace=True)
self.penalty = L1Penalty()
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.penalty.apply(x)
x = self.fc3(x)
x = self.relu(x)
x = self.fc4(x)
x = self.relu(x)
return x
model = Model()
a = torch.rand(50,10)
b = model(a)
print(b.shape)
I want to build a stacked auto encoder or recursive network. These are necessary to build a dynamic neural network, which can change its structure in each iteration.
For example, I first train
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784,500)
self.fc2 = nn.Linear(500,784)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return x
next, I want to train using previous fc1 and fc2
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784,500)
self.fc3 = nn.Linear(500,10)
self.fc4 = nn.Linear(10,500)
self.fc2 = nn.Linear(500,784)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
x = F.relu(self.fc2(x))
return x
How can I build these networks in single model?
You can simply add an argument to your forward function, which can switch between the two possibilities you want:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784,500)
self.fc3 = nn.Linear(500,10)
self.fc4 = nn.Linear(10,500)
self.fc2 = nn.Linear(500,784)
def forward(self, x, n_layers=2):
if 2 == n_layers:
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return x
elif 4 == n_layers:
x = F.relu(self.fc1(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
x = F.relu(self.fc2(x))
return x
else:
raise Exception("Not implemented")