torch Parameter grad return none - python

I want to implement learned size quantization algorithm. And I create a quante Linear layer
class QLinear(nn.Module):
def __init__(self, input_dim, out_dim, bits=8):
super(QLinear, self).__init__()
# create a tensor requires_grad=True
self.up = 2 ** bits - 1
self.down = 0
self.fc = nn.Linear(input_dim, out_dim)
weight = self.fc.weight.data
self.scale = nn.Parameter(torch.Tensor((torch.max(weight) - torch.min(weight)) / (self.up - self.down)), requires_grad=True)
self.zero_point = nn.Parameter(torch.Tensor(self.down - (torch.min(weight) / self.scale).round()), requires_grad=True)
def forward(self, x):
weight = self.fc.weight
quant_weight = (round_ste(weight / self.scale) + self.zero_point)
quant_weight = torch.clamp(quant_weight, self.down, self.up)
dequant_weight = ((quant_weight - self.zero_point) * self.scale)
self.fc.weight.data = dequant_weight
return self.fc(x)
class QNet(nn.Module):
def __init__(self):
super(QNet, self).__init__()
self.fc1 = QLinear(28 * 28, 100)
self.fc2 = QLinear(100, 10)
def forward(self, x):
x = x.view(-1, 28 * 28)
x = F.relu(self.fc1(x))
x = self.fc2(x)
x = F.softmax(x)
return x
when I train this network,scale's grad always return None. Why this happen and how can i solve it?

The issue is that you are passing dequant_weight through data attribute of your parameter which ends up not being registered by autograd. A simple alternative would be to handle weight as a nn.Parameter and apply a linear operator manually in the forward definition directly with the computed weight dequant_weight.
Here is a minimal example that should work:
class QLinear(nn.Module):
def __init__(self, input_dim, out_dim, bits=8):
super().__init__()
self.up = 2 ** bits - 1
self.down = 0
self.weight = nn.Parameter(torch.rand(out_dim, input_dim))
self.scale = nn.Parameter(
torch.Tensor((self.weight.max() - self.weight.min()) / (self.up - self.down)))
self.zero_point = nn.Parameter(
torch.Tensor(self.down - (self.weight.min() / self.scale).round()))
def forward(self, x):
quant_weight = (torch.round(self.weight / self.scale) + self.zero_point)
quant_weight = torch.clamp(quant_weight, self.down, self.up)
dequant_weight = ((quant_weight - self.zero_point) * self.scale)
return F.linear(x, dequant_weight)
Side notes:
nn.Parameter requires gradient computation by default (no need to provide requires_grad=True.
Additionally you can reformat QNet by inheriting from nn.Sequential to avoid boilerplate code:
class QNet(nn.Sequential):
def __init__(self):
super().__init__(nn.Flatten(),
QLinear(28 * 28, 100),
nn.ReLU(),
QLinear(100, 10),
nn.Softmax())

Related

Why does PyGad fitness_function not work when inside of a class?

I am trying to train a genetic algorithm but for some reason it does not work when it's stored inside of a class. I have two equivalent pieces of code but the one stored inside of a class fails. It returns this..
raise ValueError("The fitness function must accept 2 parameters:
1) A solution to calculate its fitness value.
2) The solution's index within the population.
The passed fitness function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount))
ValueError: The fitness function must accept 2 parameters:
1) A solution to calculate its fitness value.
2) The solution's index within the population.
The passed fitness function named 'fitness_func' accepts 3 parameter(s).
Here is the simplified version of the one that doesnt work.
import torch
import torch.nn as nn
import pygad.torchga
import pygad
class NN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.linear2 = nn.Linear(hidden_size, hidden_size)
self.linear3 = nn.Linear(hidden_size, hidden_size)
self.linear4 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.linear1(x)
x = self.linear2(x)
x = self.linear3(x)
x = self.linear4(x)
return x
class Coin:
def __init__(self):
self.NeuralNet = NN(1440, 1440, 3)
def fitness_func(self, solution, solution_idx):
return 0
def trainModel(self):
torch_ga = pygad.torchga.TorchGA(model=self.NeuralNet, num_solutions=10)
ga_instance = pygad.GA(num_generations=10,
num_parents_mating=2,
initial_population=torch_ga.population_weights,
fitness_func=self.fitness_func)
ga_instance.run()
if __name__ == "__main__":
coin = Coin()
coin.trainModel()
Here is the simplified version of the one that does work.
import torch
import torch.nn as nn
import pygad.torchga
import pygad
class NN(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.linear2 = nn.Linear(hidden_size, hidden_size)
self.linear3 = nn.Linear(hidden_size, hidden_size)
self.linear4 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.linear1(x)
x = self.linear2(x)
x = self.linear3(x)
x = self.linear4(x)
return x
def fitness_func(solution, solution_idx):
return 0
def trainModel():
NeuralNet = NN(1440, 1440, 3)
torch_ga = pygad.torchga.TorchGA(model=NeuralNet, num_solutions=10)
ga_instance = pygad.GA(num_generations=10,
num_parents_mating=2,
initial_population=torch_ga.population_weights,
fitness_func=fitness_func)
ga_instance.run()
if __name__ == "__main__":
trainModel()
Both of these should work the same but they don't
When you look at the pygad code you can see it's explicitly checking that the fitness function has exactly two parameters:
# Check if the fitness function accepts 2 paramaters.
if (fitness_func.__code__.co_argcount == 2):
self.fitness_func = fitness_func
else:
self.valid_parameters = False
raise ValueError("The fitness function must accept 2 parameters:\n1) A solution to calculate its fitness value.\n2) The solution's index within the population.\n\nThe passed fitness function named '{funcname}' accepts {argcount} parameter(s).".format(funcname=fitness_func.__code__.co_name, argcount=fitness_func.__code__.co_argcount))
So if you want to use it in a class you'll need to make it a static method so you aren't required to pass in self:
#staticmethod
def fitness_func(solution, solution_idx):
return 0

AttributeError: 'float' object has no attribute 'dot' : return X.dot(self.W) + self.b

I have a problem that I can not get to solve.
I have two classes, class 1 deals with the view, class 2 with the calculation.
Class 1 accesses class 2. In class 1 is an instance of class 2 to access the predict method. The method predict has one argument. I set this argument over the input field in class 1. Then this error always occurs :
AttributeError: 'float' object has no attribute 'dot' : return X.dot(self.W) + self.b
Error when i push the button
Class 1:
class Example1:
self.linearregression = LinearRegression(self,None)
test = tk.DoubleVar(self)
test.set("1.0")
self.predictbutton = ttk.Button(self.plotframe, text = "predict", command = lambda:self.linearregression.predict(test.get()))
self.predictbutton.grid(row=1, column=0)
Class 2:
class LinearRegression():
def __init__(self, learning_rate, iterations):
self.learning_rate = learning_rate
self.iterations = iterations
# Function for model training
def fit(self, X, Y):
# no_of_training_examples, no_of_features
self.m, self.n = X.shape
# weight initialization
self.W = np.zeros(self.n)
self.b = 0
self.X = X
self.Y = Y
# gradient descent learning
for i in range(self.iterations):
self.update_weights()
return self
# Helper function to update weights in gradient descent
def update_weights(self):
Y_pred = self.predict(self.X)
# calculate gradients
dW = - (2 * (self.X.T).dot(self.Y - Y_pred)) / self.m
db = - 2 * np.sum(self.Y - Y_pred) / self.m
# update weights
self.W = self.W - self.learning_rate * dW
self.b = self.b - self.learning_rate * db
return self
# Hypothetical function h( x )
def predict(self, X):
return X.dot(self.W) + self.b

Forcing NN weights to always be in a certain range

I have a simple model:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(3, 10)
self.fc2 = nn.Linear(10, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.tanh(self.fc3(x))
return x
net = Model()
How can I keep the weights to always be between a certain value (eg -1,1)?
I tried the following:
self.fc1 = torch.tanh(nn.Linear(3, 10))
Which I'm not entirely sure that will always keep them in that value (even if the gradient update is trying to push them farther).
But got the following error:
TypeError: tanh(): argument 'input' (position 1) must be Tensor, not Linear
According to the discuss.pytorch you can create extra class to clip weights between a given range. Link to the discussion.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(3, 10)
self.fc2 = nn.Linear(10, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.tanh(self.fc3(x))
return x
You should add weight clipper:
class WeightClipper(object):
def __call__(self, module):
# filter the variables to get the ones you want
if hasattr(module, 'weight'):
w = module.weight.data
w = w.clamp(-1,1)
module.weight.data = w
model = Model()
clipper = WeightClipper()
model.apply(clipper)

While performing contrastive divergnece for the Boltzmann machine, i am getting the following error. i am using the latest version of pytorch

class RBM():
def __init__(self, nv, nh):
self.W = torch.randn(nh, nv)
self.a = torch.randn(1, nh)
self.b = torch.randn(1, nv)
def sample_h(self, x):
wx = torch.mm(x, self.W.t())
activation = wx + self.a.expand_as(wx)
p_h_given_v = torch.sigmoid(activation)
return p_h_given_v, torch.bernoulli(p_h_given_v)
def sample_v(self, y):
wy = torch.mm(y, self.W)
activation = wy + self.b.expand_as(wy)
p_v_given_h = torch.sigmoid(activation)
return p_v_given_h, torch.bernoulli(p_v_given_h)
def train(self, v0, vk, ph0, phk):
self.W += torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
self.b += torch.sum((v0 - vk), 0)
self.a += torch.sum((ph0 - phk), 0)
Error :
in train(self, v0, vk, ph0, phk)
19 return p_v_given_h, torch.bernoulli(p_v_given_h)
20 def train(self, v0, vk, ph0, phk):
---> 21 self.W += torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
22 self.b += torch.sum((v0 - vk), 0)
23 self.a += torch.sum((ph0 - phk), 0)
RuntimeError: The expanded size of the tensor (1682) must match the existing size (100) at non-singleton dimension 1
print(rbm.W.size()) will show you torch.Size([100, 1682])
print((torch.mm(v0.t(), ph0)-torch.mm(vk.t(), phk)).size()) will show you torch.Size([1682, 100])
so it looks like it should be something like that:
self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
instead of self.W += torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
Change this line:
self.W += torch.mm (v0.t(), ph0) - torch.mm (vk.t(), phk)
To this:
self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
try this ...
def __init__(self, nv, nh):
self.W = torch.randn(nv, nh) # was torch.randn(nh, nv)
self.a = torch.randn(1, nh)
self.b = torch.randn(1, nv)
def sample_h(self, x):
wx = torch.mm(x, self.W) # was torch.mm(x, self.W.t())
activation = wx + self.a.expand_as(wx)
p_h_given_v = torch.sigmoid(activation)
return p_h_given_v, torch.bernoulli(p_h_given_v)
def sample_v(self, y):
wy = torch.mm(y, self.W.t()) # was torch.mm(y, self.W)
activation = wy + self.b.expand_as(wy)
p_v_given_h = torch.sigmoid(activation)
return p_v_given_h, torch.bernoulli(p_v_given_h)
Whenever you have such dimensional error, try to write dimensions of all variable in front of them. As mentioned above you have to take transport in first line of train function.
class RBM():
def __init__(self, nv, nh):
self.W = torch.randn(nh, nv) #100*1682
self.a = torch.randn(1, nh) #1*100
self.b = torch.randn(1, nv) #1*1682
def sample_h(self, x):
wx = torch.mm(x, self.W.t()) #100*1682 * 1682*100 = 100*100
activation = wx + self.a.expand_as(wx)
p_h_given_v = torch.sigmoid(activation) #100*100
return p_h_given_v, torch.bernoulli(p_h_given_v)
def sample_v(self, y):
wy = torch.mm(y, self.W) #100*100 * 100*1682 = 100*1682
activation = wy + self.b.expand_as(wy)
p_v_given_h = torch.sigmoid(activation) #100*1682
return p_v_given_h, torch.bernoulli(p_v_given_h)
def train(self, v0, vk, ph0, phk):
self.W += (torch.mm(v0.t(), ph0)- torch.mm(vk.t(), phk)).t() #100*1682
#!= 1682*100 * 100*100 - 1682*100 * 100*100 = 1682*100
self.b += torch.sum((v0-vk), 0)
self.a += torch.sum((ph0-phk), 0)

How to change the structure of a model in PyTorch

I want to build a stacked auto encoder or recursive network. These are necessary to build a dynamic neural network, which can change its structure in each iteration.
For example, I first train
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784,500)
self.fc2 = nn.Linear(500,784)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return x
next, I want to train using previous fc1 and fc2
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784,500)
self.fc3 = nn.Linear(500,10)
self.fc4 = nn.Linear(10,500)
self.fc2 = nn.Linear(500,784)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
x = F.relu(self.fc2(x))
return x
How can I build these networks in single model?
You can simply add an argument to your forward function, which can switch between the two possibilities you want:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784,500)
self.fc3 = nn.Linear(500,10)
self.fc4 = nn.Linear(10,500)
self.fc2 = nn.Linear(500,784)
def forward(self, x, n_layers=2):
if 2 == n_layers:
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return x
elif 4 == n_layers:
x = F.relu(self.fc1(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
x = F.relu(self.fc2(x))
return x
else:
raise Exception("Not implemented")

Categories

Resources