separately save the model weight in pytorch - python

I am using PyTorch to train a deep learning model. I wonder if it is possible for me to separately save the model weight. For example:
class my_model(nn.Module):
def __init__(self):
super(my_model, self).__init__()
self.bert = transformers.AutoModel.from_pretrained(BERT_PATH)
self.out = nn.Linear(768,1)
def forward(self, ids, mask, token_type):
x = self.bert(ids, mask, token_type)[1]
x = self.out(x)
return x
I have the BERT model as the base model and an additional linear layer on the top. After I train this model, can I save the weight for the BERT model and this linear layer separately?

Alternatively to the previous answer, You can create two separated class of nn.module. One for the BERT model and another one for the linear layer:
class bert_model(nn.Module):
def __init__(self):
super(bert_model, self).__init__()
self.bert = transformers.AutoModel.from_pretrained(BERT_PATH)
def forward(self, ids, mask, token_type):
x = self.bert(ids, mask, token_type)[1]
return x
class linear_layer(nn.Module):
def __init__(self):
super(linear_layer, self).__init__()
self.out = nn.Linear(768,1)
def forward(self, x):
x = self.out(x)
return x
Then you can save the two part of the model separately with:
bert_model = bert_model()
linear_layer = linear_layer()
#train
torch.save(bert_model.state_dict(), PATH)
torch.save(linear_layer.state_dict(), PATH)

You can:
model = my_model()
# train ...
torch.save({'bert': model.bert.state_dict(), 'out': model.out.state_dict()}, 'checkpoint.pth')

Related

PyTorch: How to create a Parameter without specifying the dimension

Say I want to defined a module. In this module, the __init__() function will create a Parameter called self.weight without known the input_dim of the module. My question is, how can I expand the self.weight and initialize it when I first call the forward() function?
For example, I want my module looks like this:
class MyModel(torch.nn.Module):
def __init__(self, out_dim):
super(MyModel, self).__init__()
# I don't know the input_dim yet
self.weight = torch.nn.Parameter(torch.FloatTensor(None, out_dim))
self.init_weight = False
def init_parameters(self, in_dim):
# what should I do in this function?
# Is this correct?
self.weight = self.weight.expand(in_dim, -1)
torch.nn.init.xvaier_normal_(self.weight)
self.init_weight = True
def forward(self, X):
if not self.init_weight:
# first call, so now I can initialize the weight since I know the input_dim
self.init_parameters(X.shape[1])
# do some forward ops
return torch.sigmoid(torch.matmul(X, self.weight))
And my training code looks like this (The parameter self.weight is passed to the optimizer after I create the model):
def train(X_train, y_train):
model = MyModel(y_train.shape[1])
optimize = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
for epoch in range(10000):
optimize.zero_grad()
prediction = model(X_train)
loss = loss_fn(prediction, y_train)
loss.backward()
optimize.step()
After all, it works for me using the way I explained in the comments - to allocate the weights parameter right in the init_parameters function.
import torch
class MyModel(torch.nn.Module):
def __init__(self, out_dim):
super(MyModel, self).__init__()
self.weight = torch.nn.Parameter(torch.FloatTensor([0.0]))
self.out_dim = out_dim
self.init_weight = False
def init_parameters(self, in_dim):
self.weight = torch.nn.Parameter(torch.FloatTensor(in_dim, self.out_dim), requires_grad=True)
torch.nn.init.xavier_normal_(self.weight)
self.init_weight = True
def forward(self, X):
if not self.init_weight:
# first call, so now I can initialize the weight since I know the input_dim
self.init_parameters(X.shape[1])
# do some forward ops
result = torch.sigmoid(torch.matmul(X, self.weight))
print(X.shape, result.shape)
return result
def train(X_train, y_train):
model = torch.nn.Sequential(MyModel(out_dim=100), MyModel(out_dim=20))
optimize = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
for epoch in range(10000):
#print('.', end='')
optimize.zero_grad()
prediction = model(X_train)
loss = loss_fn(prediction, y_train)
loss.backward()
optimize.step()
batch_size, in_dim, out_dim = 100, 5, 20
X_train=torch.randn((batch_size, in_dim))
y_train=torch.randn((batch_size, out_dim))
train(X_train, y_train)

Why does my Keras Custom Layer only gets called once?

I have to work with tensorflow 1.15 and need a custom layer. A very simplistic layer can look like this:
class Dummy(keras.layers.Layer):
def __init__(self, units=32, input_dim=32):
super(Dummy, self).__init__()
self.cnt = 1
def call(self, inputs):
self.cnt += 1
return inputs
If I use this Dummy Layer in any architecture the variable cnt was only set to two. What am I missing?
Here is a very simplistic dummy script to shwocase my issue:
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Conv2D, Activation
from tensorflow import set_random_seed
from numpy.random import seed
seed(312991)
set_random_seed(3121991)
class Dummy(keras.layers.Layer):
def __init__(self, units=32, input_dim=32):
super(Dummy, self).__init__()
self.cnt = 1
def call(self, inputs):
self.cnt += 1
return inputs
# creating the input image
input_img = np.ones(shape=(8,8,3))
#adjust range
input_img_adjusted = input_img / 255
target = input_img_adjusted[:,:,0:2]
model = Sequential()
model.add(Conv2D(2, (3, 3),input_shape=input_img.shape, padding='same'))
model.add(Dummy())
model.add(Activation('sigmoid'))
opt = keras.optimizers.Adam(0.001)
model.compile(optimizer=opt,
loss="mean_absolute_error")
hist = model.fit(np.array(2048*[input_img_adjusted]),np.array(2048*[target]),epochs=100,batch_size=32)
print("called the Dummy Layer:", model.layers[-2].cnt)
My assumption would have been that it is something like 32,32*100 or something similar.
Have to use tf.Variable and assign_add for initialization and adding
class Dummy(keras.layers.Layer):
def __init__(self, units=32, input_dim=32):
super(Dummy, self).__init__()
self.cnt = tf.Variable(1, trainable=False)
def call(self, inputs):
self.cnt = self.cnt.assign_add(1)
return inputs

using ModuleList, still getting ValueError: optimizer got an empty parameter list

With Pytorch I am attempting to use ModuleList to ensure model parameters are detected, and can be optimized. When calling the SGD optimizer I get the following error:
ValueError: optimizer got an empty parameter list
Can you please review the code below and advise?
class LR(nn.Module):
def ___init___(self):
super(LR, self).___init___()
self.linear = nn.ModuleList()
self.linear.append(nn.Linear(in_features=28*28, out_features=128, bias=True))
def forward(self, x):
y_p = torch.sigmoid(self.linear(x))
return y_p
LR_model = LR()
optimizer = torch.optim.SGD(params = LR_model.parameters(), lr=learn_rate)
This seems to be a copy-paste issue: your __init__ has 3 underscores instead of 2, both at __init__(self) and super(LR, self).__init__(). Thus the init itself failed. Delete the extra underscores and try again or try the below code:
class LR(nn.Module):
def __init__(self):
super(LR, self).__init__()
self.linear = nn.ModuleList()
self.linear.append(nn.Linear(in_features=28*28,
out_features=128,
bias=True))
def forward(self, x):
y_p = torch.sigmoid(self.linear(x))
return y_p
LR_model = LR()
optimizer = torch.optim.SGD(params = list(LR_model.parameters()),
lr=learn_rate)

Why the weight constraints and dropout function cannot be used in custom layers on Keras

I would like to implement dropout and weight constraints in the custom layer. The program did not report an error, but neither did these two functions
The custom layer is:
'''
class Softmax_Decode(Layer):
""" A layer which uses a key to decode a sparse representation into a softmax.
Makes it easier to train spiking classifiers by allowing the use of
softmax and catagorical-crossentropy loss. Allows for encodings that are
n-hot where 'n' is the number of outputs assigned to each class. Allows
encodings to overlap, where a given output neuron can contribute
to the probability of more than one class.
# Arguments
key: A numpy array (num_classes, input_dims) with an input_dim-sized
{0,1}-vector representative for each class.
size: A tuple (num_classes, input_dim). If ``key`` is not specified, then
size must be specified. In which case, a key will automatically be generated.
"""
def __init__(self, key=None,size=None,kernel_regularizer=None,kernel_constraint=None,dropout=0.0,**kwargs):
super(Softmax_Decode, self).__init__(**kwargs)
self.key = _key_check(key, size)
if type(self.key) is dict and 'value' in self.key.keys():
self.key = np.array(self.key['value'], dtype=np.float32)
elif type(self.key) is list:
self.key = np.array(self.key, dtype=np.float32)
#self._rescaled_key = K.variable(np.transpose(2*self.key-1))
self._rescaled_key = K.variable(2*np.transpose(self.key)-1)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.dropout = dropout
def build(self, input_shape):
self.kernel = self.add_weight(name='kernel',
#shape=(input_shape[1], self.key.shape[1]),
initializer='uniform',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
super(Softmax_Decode, self).build(input_shape)
def call(self, inputs):
#return K.softmax(K.dot(2*(1-inputs),self._rescaled_key))
return K.softmax(K.dot(2*inputs-1, self._rescaled_key))
def compute_output_shape(self, input_shape):
return (input_shape[0],self.key.shape[0])
def get_config(self):
config={
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint)
}
base_config = super(Softmax_Decode, self).get_config()
return dict(list(base_config.items()) + [('key', self.key)])
'''
main code:
model.add(Softmax_Decode(key,kernel_constraint=MinMaxNorm(min_value=0, max_value=1.0, rate=1.0, axis=0),dropout=0.3))
Would you help me? Thank you so much for your time.

MultiLayer Neural Network

Below is the code (taken from somewhere on the internet) that I am trying to use for Multilayer neural network.
import math
import random
BIAS = -1
"""
To view the structure of the Neural Network, type
print network_name
"""
class Neuron:
def __init__(self, n_inputs ):
self.n_inputs = n_inputs
self.set_weights( [random.uniform(0,1) for x in range(0,n_inputs+1)] ) # +1 for bias weight
def sum(self, inputs ):
# Does not include the bias
return sum(val*self.weights[i] for i,val in enumerate(inputs))
def set_weights(self, weights ):
self.weights = weights
def __str__(self):
return 'Weights: %s, Bias: %s' % ( str(self.weights[:-1]),str(self.weights[-1]) )
class NeuronLayer:
def __init__(self, n_neurons, n_inputs):
self.n_neurons = n_neurons
self.neurons = [Neuron( n_inputs ) for _ in range(0,self.n_neurons)]
def __str__(self):
return 'Layer:\n\t'+'\n\t'.join([str(neuron) for neuron in self.neurons])+''
class NeuralNetwork:
def __init__(self, n_inputs, n_outputs, n_neurons_to_hl, n_hidden_layers):
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.n_hidden_layers = n_hidden_layers
self.n_neurons_to_hl = n_neurons_to_hl
# Do not touch
self._create_network()
self._n_weights = None
# end
def _create_network(self):
if self.n_hidden_layers>0:
# create the first layer
self.layers = [NeuronLayer( self.n_neurons_to_hl,self.n_inputs )]
# create hidden layers
self.layers += [NeuronLayer( self.n_neurons_to_hl,self.n_neurons_to_hl ) for _ in range(0,self.n_hidden_layers)]
# hidden-to-output layer
self.layers += [NeuronLayer( self.n_outputs,self.n_neurons_to_hl )]
else:
# If we don't require hidden layers
self.layers = [NeuronLayer( self.n_outputs,self.n_inputs )]
def get_weights(self):
weights = []
for layer in self.layers:
for neuron in layer.neurons:
weights += neuron.weights
return weights
#property
def n_weights(self):
if not self._n_weights:
self._n_weights = 0
for layer in self.layers:
for neuron in layer.neurons:
self._n_weights += neuron.n_inputs+1 # +1 for bias weight
return self._n_weights
def set_weights(self, weights ):
assert len(weights)==self.n_weights, "Incorrect amount of weights."
stop = 0
for layer in self.layers:
for neuron in layer.neurons:
start, stop = stop, stop+(neuron.n_inputs+1)
neuron.set_weights( weights[start:stop] )
return self
def update(self, inputs ):
assert len(inputs)==self.n_inputs, "Incorrect amount of inputs."
for layer in self.layers:
outputs = []
for neuron in layer.neurons:
tot = neuron.sum(inputs) + neuron.weights[-1]*BIAS
outputs.append( self.sigmoid(tot) )
inputs = outputs
return outputs
def sigmoid(self, activation,response=1 ):
# the activation function
try:
return 1/(1+math.e**(-activation/response))
except OverflowError:
return float("inf")
def __str__(self):
return '\n'.join([str(i+1)+' '+str(layer) for i,layer in enumerate(self.layers)])
My input is a text file containing multiple rows. Each row contains a data-point of dimension 16 (the first 16 elements of the row) and 17th element is the class to which the data point belongs.
Input.txt
1,3,2,2,1,10,2,2,1,9,2,9,1,6,2,8,2
2,1,3,2,2,8,7,7,5,7,6,8,2,8,3,8,4
4,6,6,5,5,8,8,3,5,7,8,7,5,10,4,6,2
6,9,6,11,6,7,7,8,5,9,8,8,4,9,7,9,5
1,1,2,2,1,7,7,8,5,7,6,7,2,8,7,9,1
3,8,5,6,4,10,6,3,6,11,4,7,3,8,2,9,3
4,7,5,5,5,8,6,6,7,6,6,6,2,8,7,10,1
6,10,9,8,8,11,6,2,4,9,4,6,9,6,2,8,11
The class is always in the range [1,11]
After learning the input, the expected output for each data-point should be its corresponding class (it may not be right always).
I am creating an object of NeuralNetwork class and then trying to use the update method.
But, I am not clear how to use it properly. Please guide me how to use it for the above input.
This is my open source code
How to
You have to initialize a new network object as: network = NeuralNetwork(inputs, n_outputs, n_neurons_to_hl, n_hidden_layers).
inputs: the number of input values (in your case: 16)
n_outputs: the number of output values (in your case: 11)
n_neurons_to_hl: the number of neurons in each hidden layer
n_hidden_layers: the number of hidden layers
The update( input_signals ) method is a forward calculation on the network - that is: this is the method you wish to use to classify an input instance.

Categories

Resources