What is the difference between these two neural network structures? - python

first using nn.Parameter
class ModelOne(nn.Module):
def __init__(self):
super().__init__()
self.weights = nn.Parameter(torch.randn(300, 10))
self.bias = nn.Parameter(torch.zeros(10))
def forward(self, x):
return x # self.weights + self.bias
when I do
mo = ModelOne()
[len(param) for param in mo.parameters()]
it gives
[300, 10]
second using nn.Linear
class ModelTwo(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(300, 10)
def forward(self, x):
return self.linear(x)
same thing here gives
[10, 10]

The difference lies in how nn.Linear initializes weights and bias:
class Linear(Module):
def __init__(self, in_features, out_features, bias=True):
super(Linear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.Tensor(out_features, in_features))
if bias:
self.bias = Parameter(torch.Tensor(out_features))
...
So, when you write nn.Linear(300, 10) the weight is (10, 300) and bias is (10). But, in the ModelOne, weights has dimension (300, 10).
You can confirm it using
for name, param in mo.named_parameters():
print(name, param.shape)
The output in ModelOne:
weights torch.Size([300, 10])
bias torch.Size([10])
In ModelTwo:
linear.weight torch.Size([10, 300])
linear.bias torch.Size([10])
Now, the reason you're getting [300, 10] in first case and [10, 10] in second case is because if you print the length of a 2d Tensor, then it'll only give its first dimension i.e.
a = torch.Tensor(10, 300)
b = torch.Tensor(10)
print(len(a), len(b))
(10, 10)

Related

torch Parameter grad return none

I want to implement learned size quantization algorithm. And I create a quante Linear layer
class QLinear(nn.Module):
def __init__(self, input_dim, out_dim, bits=8):
super(QLinear, self).__init__()
# create a tensor requires_grad=True
self.up = 2 ** bits - 1
self.down = 0
self.fc = nn.Linear(input_dim, out_dim)
weight = self.fc.weight.data
self.scale = nn.Parameter(torch.Tensor((torch.max(weight) - torch.min(weight)) / (self.up - self.down)), requires_grad=True)
self.zero_point = nn.Parameter(torch.Tensor(self.down - (torch.min(weight) / self.scale).round()), requires_grad=True)
def forward(self, x):
weight = self.fc.weight
quant_weight = (round_ste(weight / self.scale) + self.zero_point)
quant_weight = torch.clamp(quant_weight, self.down, self.up)
dequant_weight = ((quant_weight - self.zero_point) * self.scale)
self.fc.weight.data = dequant_weight
return self.fc(x)
class QNet(nn.Module):
def __init__(self):
super(QNet, self).__init__()
self.fc1 = QLinear(28 * 28, 100)
self.fc2 = QLinear(100, 10)
def forward(self, x):
x = x.view(-1, 28 * 28)
x = F.relu(self.fc1(x))
x = self.fc2(x)
x = F.softmax(x)
return x
when I train this network,scale's grad always return None. Why this happen and how can i solve it?
The issue is that you are passing dequant_weight through data attribute of your parameter which ends up not being registered by autograd. A simple alternative would be to handle weight as a nn.Parameter and apply a linear operator manually in the forward definition directly with the computed weight dequant_weight.
Here is a minimal example that should work:
class QLinear(nn.Module):
def __init__(self, input_dim, out_dim, bits=8):
super().__init__()
self.up = 2 ** bits - 1
self.down = 0
self.weight = nn.Parameter(torch.rand(out_dim, input_dim))
self.scale = nn.Parameter(
torch.Tensor((self.weight.max() - self.weight.min()) / (self.up - self.down)))
self.zero_point = nn.Parameter(
torch.Tensor(self.down - (self.weight.min() / self.scale).round()))
def forward(self, x):
quant_weight = (torch.round(self.weight / self.scale) + self.zero_point)
quant_weight = torch.clamp(quant_weight, self.down, self.up)
dequant_weight = ((quant_weight - self.zero_point) * self.scale)
return F.linear(x, dequant_weight)
Side notes:
nn.Parameter requires gradient computation by default (no need to provide requires_grad=True.
Additionally you can reformat QNet by inheriting from nn.Sequential to avoid boilerplate code:
class QNet(nn.Sequential):
def __init__(self):
super().__init__(nn.Flatten(),
QLinear(28 * 28, 100),
nn.ReLU(),
QLinear(100, 10),
nn.Softmax())

PyTorch computes gradients as None

class Parent(nn.Module):
def __init__(self,in_features,z_dim, img_dim):
super().__init__()
self.my_child1 = Child1 (z_dim, img_dim)
self.my_child2 = Child2 (in_features)
def forward(self,input):
input=self.my_child1(input)
input=self.my_child2(input)
return input
def forward1(self,input):
input=self.my_child1(input)
return input
def forward2(self,input):
input=self.my_child2(input)
return input
class Child2(nn.Module):
def __init__(self, in_features):
super().__init__()
self.child2 = nn.Sequential(
nn.Linear(in_features, 128),
nn.LeakyReLU(0.01),
nn.Linear(128, 1),
nn.Sigmoid(),
)
def forward(self, x):
return self.child2(x)
class Child1(nn.Module):
def __init__(self, z_dim, img_dim):
super().__init__()
self.child1 = nn.Sequential(
nn.Linear(z_dim, 256),
nn.LeakyReLU(0.01),
nn.Linear(256, img_dim),
nn.Tanh(),
)
def forward(self, x):
return self.child1(x)
criterion=nn.BCELoss()
noise = torch.randn(batch_size, z_dim).to(device)
model=Parent(in_features,z_dim, img_dim)
output1=model(noise)
loss1=criterion(output1,torch.ones_like(output1))
loss2=criterion(output1,torch.zeroes_like(output1))
loss3=(loss1+loss2)/2
model.zero_grad()
loss3.backward(retain_graph=True)
print(loss3.grad)
I have not used any optimizer here because updating the parameters are done using a seperate formula which I will employ only after I get the gradients. The formula requires the gradients to be stored in a matrix. However, the gradient always prints “None”.
You can get the computed gradient for every parameter in your model with:
gradient = [el.grad for el in model.parameters()]
print(gradient)

Forcing NN weights to always be in a certain range

I have a simple model:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(3, 10)
self.fc2 = nn.Linear(10, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.tanh(self.fc3(x))
return x
net = Model()
How can I keep the weights to always be between a certain value (eg -1,1)?
I tried the following:
self.fc1 = torch.tanh(nn.Linear(3, 10))
Which I'm not entirely sure that will always keep them in that value (even if the gradient update is trying to push them farther).
But got the following error:
TypeError: tanh(): argument 'input' (position 1) must be Tensor, not Linear
According to the discuss.pytorch you can create extra class to clip weights between a given range. Link to the discussion.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(3, 10)
self.fc2 = nn.Linear(10, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.tanh(self.fc3(x))
return x
You should add weight clipper:
class WeightClipper(object):
def __call__(self, module):
# filter the variables to get the ones you want
if hasattr(module, 'weight'):
w = module.weight.data
w = w.clamp(-1,1)
module.weight.data = w
model = Model()
clipper = WeightClipper()
model.apply(clipper)

inconsistent error of : TypeError: super(type, obj): obj must be an instance or subtype of type

I have a python code with the following two classes.
import torch
import torch.nn as nn
import torch.nn.functional as F
class QNet_baseline(nn.Module):
"""
A MLP with 2 hidden layer
observation_dim (int): number of observation features
action_dim (int): Dimension of each action
seed (int): Random seed
"""
def __init__(self, observation_dim, action_dim, seed):
super(QNet_baseline, self).__init__()
self.seed = torch.manual_seed(seed)
self.fc1 = nn.Linear(observation_dim, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, action_dim)
def forward(self, observations):
"""
Forward propagation of neural network
"""
x = F.relu(self.fc1(observations))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
class QNet_3hidden(nn.Module):
"""
A MLP with 3 hidden layer
observation_dim (int): number of observation features
action_dim (int): Dimension of each action
seed (int): Random seed
"""
def __init__(self, observation_dim, action_dim, seed):
super(QNet_3hidden, self).__init__()
self.seed = torch.manual_seed(seed)
self.fc1 = nn.Linear(observation_dim, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 64)
self.fc4 = nn.Linear(64, action_dim)
def forward(self, observations):
"""
Forward propagation of neural network
"""
x = F.relu(self.fc1(observations))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
return x
I used the same code to instantiate both classes. QNet_baseline works fine, but I got the following error for QNet_3hidden. Why would QNet_baseline works but QNet_3hidden has an error? What did I miss here? Thanks!
/home/workspace/QNetworks.py in __init__(self, observation_dim, action_dim, seed)
44
45 def __init__(self, observation_dim, action_dim, seed):
---> 46 super(QNet_3hidden, self).__init__()
47 self.seed = torch.manual_seed(seed)
48 self.fc1 = nn.Linear(observation_dim, 128)
TypeError: super(type, obj): obj must be an instance or subtype of type
Also, below is how the two classes instantiated:
class DDQN_Agent():
"""Interacts with and learns from the environment.
Attributes:
state_size (int): dimension of each state
action_size (int): dimension of each action
seed (int): random seed
"""
def __init__(self, state_size, action_size, seed, qnet="baseline", filename=None):
"""Initialize an Agent object.
Args:
filename: path of .pth file with trained weights
"""
self.state_size = state_size
self.action_size = action_size
self.seed = random.seed(seed)
# Q-Network
if qnet=="3hidden":
self.qnetwork_local = QNet_3hidden(state_size, action_size, seed).to(device)
self.qnetwork_target = QNet_3hidden(state_size, action_size, seed).to(device)
self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
else:
self.qnetwork_local = QNet_baseline(state_size, action_size, seed).to(device)
self.qnetwork_target = QNet_baseline(state_size, action_size, seed).to(device)
self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
if filename:
weights = torch.load(filename)
self.qnetwork_local.load_state_dict(weights)
self.qnetwork_target.load_state_dict(weights)
# Replay memory
self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
# Initialize time step (for updating every UPDATE_EVERY steps)
self.t_step = 0
I encountered a similar problem, and completely restarting the kernel helped.
As suggested in this Comment by keitakurita:
Are you running the code in a Jupyter notebook and have not restarted the kernel? If so, there's a chance that your kernel is referencing the wrong class.
I suspect this might be the reason because I encountered the error after rewriting the class.
This would also explain why this is a hard-to-reproduce error. Followings a list of similar questions asked to help keep a track of the same:
Gitmemory
From July '20
From Feb '18

How to change the structure of a model in PyTorch

I want to build a stacked auto encoder or recursive network. These are necessary to build a dynamic neural network, which can change its structure in each iteration.
For example, I first train
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784,500)
self.fc2 = nn.Linear(500,784)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return x
next, I want to train using previous fc1 and fc2
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784,500)
self.fc3 = nn.Linear(500,10)
self.fc4 = nn.Linear(10,500)
self.fc2 = nn.Linear(500,784)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
x = F.relu(self.fc2(x))
return x
How can I build these networks in single model?
You can simply add an argument to your forward function, which can switch between the two possibilities you want:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784,500)
self.fc3 = nn.Linear(500,10)
self.fc4 = nn.Linear(10,500)
self.fc2 = nn.Linear(500,784)
def forward(self, x, n_layers=2):
if 2 == n_layers:
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return x
elif 4 == n_layers:
x = F.relu(self.fc1(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
x = F.relu(self.fc2(x))
return x
else:
raise Exception("Not implemented")

Categories

Resources