Is there any way to use a "pre-trained model as an layer" in a custom net?
Pseudocode:
pretrained_model = torch.load('model')
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.model_layer = pretrained_model # ?
self.fc1 = nn.Linear(num_classes_of_model_layer, 320)
self.fc2 = nn.Linear(320, 160)
self.fc3 = nn.Linear(160, num_classes)
def forward(self, x):
x = pretrained_model. # ?
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
Yes you can absolutely use another model a part of your Module, as the other model is also a Module
Do:
self.model_layer = pretrained_model
and make inference as always with x = self.model_layer(x)
Related
I am using PyTorch to train a deep learning model. I wonder if it is possible for me to separately save the model weight. For example:
class my_model(nn.Module):
def __init__(self):
super(my_model, self).__init__()
self.bert = transformers.AutoModel.from_pretrained(BERT_PATH)
self.out = nn.Linear(768,1)
def forward(self, ids, mask, token_type):
x = self.bert(ids, mask, token_type)[1]
x = self.out(x)
return x
I have the BERT model as the base model and an additional linear layer on the top. After I train this model, can I save the weight for the BERT model and this linear layer separately?
Alternatively to the previous answer, You can create two separated class of nn.module. One for the BERT model and another one for the linear layer:
class bert_model(nn.Module):
def __init__(self):
super(bert_model, self).__init__()
self.bert = transformers.AutoModel.from_pretrained(BERT_PATH)
def forward(self, ids, mask, token_type):
x = self.bert(ids, mask, token_type)[1]
return x
class linear_layer(nn.Module):
def __init__(self):
super(linear_layer, self).__init__()
self.out = nn.Linear(768,1)
def forward(self, x):
x = self.out(x)
return x
Then you can save the two part of the model separately with:
bert_model = bert_model()
linear_layer = linear_layer()
#train
torch.save(bert_model.state_dict(), PATH)
torch.save(linear_layer.state_dict(), PATH)
You can:
model = my_model()
# train ...
torch.save({'bert': model.bert.state_dict(), 'out': model.out.state_dict()}, 'checkpoint.pth')
I have a simple model:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(3, 10)
self.fc2 = nn.Linear(10, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.tanh(self.fc3(x))
return x
net = Model()
How can I keep the weights to always be between a certain value (eg -1,1)?
I tried the following:
self.fc1 = torch.tanh(nn.Linear(3, 10))
Which I'm not entirely sure that will always keep them in that value (even if the gradient update is trying to push them farther).
But got the following error:
TypeError: tanh(): argument 'input' (position 1) must be Tensor, not Linear
According to the discuss.pytorch you can create extra class to clip weights between a given range. Link to the discussion.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(3, 10)
self.fc2 = nn.Linear(10, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.tanh(self.fc3(x))
return x
You should add weight clipper:
class WeightClipper(object):
def __call__(self, module):
# filter the variables to get the ones you want
if hasattr(module, 'weight'):
w = module.weight.data
w = w.clamp(-1,1)
module.weight.data = w
model = Model()
clipper = WeightClipper()
model.apply(clipper)
Is there any way that I can use custom torch.autograd.Function in a nn.Sequential object or should I use explicitly an nn.Module object with forward function. Specifically I am trying to implement a sparse autoencoder and I need to add L1 distance of the code(hidden representation) to the loss.
I have defined custom torch.autograd.Function L1Penalty below then tried to use it inside a nn.Sequential object as below. However when I run I got the error TypeError: __main__.L1Penalty is not a Module subclass How can I solve this issue?
class L1Penalty(torch.autograd.Function):
#staticmethod
def forward(ctx, input, l1weight = 0.1):
ctx.save_for_backward(input)
ctx.l1weight = l1weight
return input, None
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_variables
grad_input = input.clone().sign().mul(ctx.l1weight)
grad_input+=grad_output
return grad_input
model = nn.Sequential(
nn.Linear(10, 10),
nn.ReLU(),
nn.Linear(10, 6),
nn.ReLU(),
# sparsity
L1Penalty(),
nn.Linear(6, 10),
nn.ReLU(),
nn.Linear(10, 10),
nn.ReLU()
).to(device)
The right way to do that would be this
import torch, torch.nn as nn
class L1Penalty(torch.autograd.Function):
#staticmethod
def forward(ctx, input, l1weight = 0.1):
ctx.save_for_backward(input)
ctx.l1weight = l1weight
return input
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_variables
grad_input = input.clone().sign().mul(ctx.l1weight)
grad_input+=grad_output
return grad_input
Creating a Lambda class that acts as a wrapper
class Lambda(nn.Module):
"""
Input: A Function
Returns : A Module that can be used
inside nn.Sequential
"""
def __init__(self, func):
super().__init__()
self.func = func
def forward(self, x): return self.func(x)
TA-DA!
model = nn.Sequential(
nn.Linear(10, 10),
nn.ReLU(),
nn.Linear(10, 6),
nn.ReLU(),
# sparsity
Lambda(L1Penalty.apply),
nn.Linear(6, 10),
nn.ReLU(),
nn.Linear(10, 10),
nn.ReLU())
a = torch.rand(50,10)
b = model(a)
print(b.shape)
The nn.Module API seems to work fine but you should not return None in your L1Penalty forward method.
import torch, torch.nn as nn
class L1Penalty(torch.autograd.Function):
#staticmethod
def forward(ctx, input, l1weight = 0.1):
ctx.save_for_backward(input)
ctx.l1weight = l1weight
return input
#staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_variables
grad_input = input.clone().sign().mul(ctx.l1weight)
grad_input+=grad_output
return grad_input
class Model(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(10,10)
self.fc2 = nn.Linear(10,6)
self.fc3 = nn.Linear(6,10)
self.fc4 = nn.Linear(10,10)
self.relu = nn.ReLU(inplace=True)
self.penalty = L1Penalty()
def forward(self, x):
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.relu(x)
x = self.penalty.apply(x)
x = self.fc3(x)
x = self.relu(x)
x = self.fc4(x)
x = self.relu(x)
return x
model = Model()
a = torch.rand(50,10)
b = model(a)
print(b.shape)
I have a python code with the following two classes.
import torch
import torch.nn as nn
import torch.nn.functional as F
class QNet_baseline(nn.Module):
"""
A MLP with 2 hidden layer
observation_dim (int): number of observation features
action_dim (int): Dimension of each action
seed (int): Random seed
"""
def __init__(self, observation_dim, action_dim, seed):
super(QNet_baseline, self).__init__()
self.seed = torch.manual_seed(seed)
self.fc1 = nn.Linear(observation_dim, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, action_dim)
def forward(self, observations):
"""
Forward propagation of neural network
"""
x = F.relu(self.fc1(observations))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
class QNet_3hidden(nn.Module):
"""
A MLP with 3 hidden layer
observation_dim (int): number of observation features
action_dim (int): Dimension of each action
seed (int): Random seed
"""
def __init__(self, observation_dim, action_dim, seed):
super(QNet_3hidden, self).__init__()
self.seed = torch.manual_seed(seed)
self.fc1 = nn.Linear(observation_dim, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 64)
self.fc4 = nn.Linear(64, action_dim)
def forward(self, observations):
"""
Forward propagation of neural network
"""
x = F.relu(self.fc1(observations))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
return x
I used the same code to instantiate both classes. QNet_baseline works fine, but I got the following error for QNet_3hidden. Why would QNet_baseline works but QNet_3hidden has an error? What did I miss here? Thanks!
/home/workspace/QNetworks.py in __init__(self, observation_dim, action_dim, seed)
44
45 def __init__(self, observation_dim, action_dim, seed):
---> 46 super(QNet_3hidden, self).__init__()
47 self.seed = torch.manual_seed(seed)
48 self.fc1 = nn.Linear(observation_dim, 128)
TypeError: super(type, obj): obj must be an instance or subtype of type
Also, below is how the two classes instantiated:
class DDQN_Agent():
"""Interacts with and learns from the environment.
Attributes:
state_size (int): dimension of each state
action_size (int): dimension of each action
seed (int): random seed
"""
def __init__(self, state_size, action_size, seed, qnet="baseline", filename=None):
"""Initialize an Agent object.
Args:
filename: path of .pth file with trained weights
"""
self.state_size = state_size
self.action_size = action_size
self.seed = random.seed(seed)
# Q-Network
if qnet=="3hidden":
self.qnetwork_local = QNet_3hidden(state_size, action_size, seed).to(device)
self.qnetwork_target = QNet_3hidden(state_size, action_size, seed).to(device)
self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
else:
self.qnetwork_local = QNet_baseline(state_size, action_size, seed).to(device)
self.qnetwork_target = QNet_baseline(state_size, action_size, seed).to(device)
self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
if filename:
weights = torch.load(filename)
self.qnetwork_local.load_state_dict(weights)
self.qnetwork_target.load_state_dict(weights)
# Replay memory
self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
# Initialize time step (for updating every UPDATE_EVERY steps)
self.t_step = 0
I encountered a similar problem, and completely restarting the kernel helped.
As suggested in this Comment by keitakurita:
Are you running the code in a Jupyter notebook and have not restarted the kernel? If so, there's a chance that your kernel is referencing the wrong class.
I suspect this might be the reason because I encountered the error after rewriting the class.
This would also explain why this is a hard-to-reproduce error. Followings a list of similar questions asked to help keep a track of the same:
Gitmemory
From July '20
From Feb '18
I want to build a stacked auto encoder or recursive network. These are necessary to build a dynamic neural network, which can change its structure in each iteration.
For example, I first train
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784,500)
self.fc2 = nn.Linear(500,784)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return x
next, I want to train using previous fc1 and fc2
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784,500)
self.fc3 = nn.Linear(500,10)
self.fc4 = nn.Linear(10,500)
self.fc2 = nn.Linear(500,784)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
x = F.relu(self.fc2(x))
return x
How can I build these networks in single model?
You can simply add an argument to your forward function, which can switch between the two possibilities you want:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784,500)
self.fc3 = nn.Linear(500,10)
self.fc4 = nn.Linear(10,500)
self.fc2 = nn.Linear(500,784)
def forward(self, x, n_layers=2):
if 2 == n_layers:
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return x
elif 4 == n_layers:
x = F.relu(self.fc1(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
x = F.relu(self.fc2(x))
return x
else:
raise Exception("Not implemented")