Below is the code (taken from somewhere on the internet) that I am trying to use for Multilayer neural network.
import math
import random
BIAS = -1
"""
To view the structure of the Neural Network, type
print network_name
"""
class Neuron:
def __init__(self, n_inputs ):
self.n_inputs = n_inputs
self.set_weights( [random.uniform(0,1) for x in range(0,n_inputs+1)] ) # +1 for bias weight
def sum(self, inputs ):
# Does not include the bias
return sum(val*self.weights[i] for i,val in enumerate(inputs))
def set_weights(self, weights ):
self.weights = weights
def __str__(self):
return 'Weights: %s, Bias: %s' % ( str(self.weights[:-1]),str(self.weights[-1]) )
class NeuronLayer:
def __init__(self, n_neurons, n_inputs):
self.n_neurons = n_neurons
self.neurons = [Neuron( n_inputs ) for _ in range(0,self.n_neurons)]
def __str__(self):
return 'Layer:\n\t'+'\n\t'.join([str(neuron) for neuron in self.neurons])+''
class NeuralNetwork:
def __init__(self, n_inputs, n_outputs, n_neurons_to_hl, n_hidden_layers):
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.n_hidden_layers = n_hidden_layers
self.n_neurons_to_hl = n_neurons_to_hl
# Do not touch
self._create_network()
self._n_weights = None
# end
def _create_network(self):
if self.n_hidden_layers>0:
# create the first layer
self.layers = [NeuronLayer( self.n_neurons_to_hl,self.n_inputs )]
# create hidden layers
self.layers += [NeuronLayer( self.n_neurons_to_hl,self.n_neurons_to_hl ) for _ in range(0,self.n_hidden_layers)]
# hidden-to-output layer
self.layers += [NeuronLayer( self.n_outputs,self.n_neurons_to_hl )]
else:
# If we don't require hidden layers
self.layers = [NeuronLayer( self.n_outputs,self.n_inputs )]
def get_weights(self):
weights = []
for layer in self.layers:
for neuron in layer.neurons:
weights += neuron.weights
return weights
#property
def n_weights(self):
if not self._n_weights:
self._n_weights = 0
for layer in self.layers:
for neuron in layer.neurons:
self._n_weights += neuron.n_inputs+1 # +1 for bias weight
return self._n_weights
def set_weights(self, weights ):
assert len(weights)==self.n_weights, "Incorrect amount of weights."
stop = 0
for layer in self.layers:
for neuron in layer.neurons:
start, stop = stop, stop+(neuron.n_inputs+1)
neuron.set_weights( weights[start:stop] )
return self
def update(self, inputs ):
assert len(inputs)==self.n_inputs, "Incorrect amount of inputs."
for layer in self.layers:
outputs = []
for neuron in layer.neurons:
tot = neuron.sum(inputs) + neuron.weights[-1]*BIAS
outputs.append( self.sigmoid(tot) )
inputs = outputs
return outputs
def sigmoid(self, activation,response=1 ):
# the activation function
try:
return 1/(1+math.e**(-activation/response))
except OverflowError:
return float("inf")
def __str__(self):
return '\n'.join([str(i+1)+' '+str(layer) for i,layer in enumerate(self.layers)])
My input is a text file containing multiple rows. Each row contains a data-point of dimension 16 (the first 16 elements of the row) and 17th element is the class to which the data point belongs.
Input.txt
1,3,2,2,1,10,2,2,1,9,2,9,1,6,2,8,2
2,1,3,2,2,8,7,7,5,7,6,8,2,8,3,8,4
4,6,6,5,5,8,8,3,5,7,8,7,5,10,4,6,2
6,9,6,11,6,7,7,8,5,9,8,8,4,9,7,9,5
1,1,2,2,1,7,7,8,5,7,6,7,2,8,7,9,1
3,8,5,6,4,10,6,3,6,11,4,7,3,8,2,9,3
4,7,5,5,5,8,6,6,7,6,6,6,2,8,7,10,1
6,10,9,8,8,11,6,2,4,9,4,6,9,6,2,8,11
The class is always in the range [1,11]
After learning the input, the expected output for each data-point should be its corresponding class (it may not be right always).
I am creating an object of NeuralNetwork class and then trying to use the update method.
But, I am not clear how to use it properly. Please guide me how to use it for the above input.
This is my open source code
How to
You have to initialize a new network object as: network = NeuralNetwork(inputs, n_outputs, n_neurons_to_hl, n_hidden_layers).
inputs: the number of input values (in your case: 16)
n_outputs: the number of output values (in your case: 11)
n_neurons_to_hl: the number of neurons in each hidden layer
n_hidden_layers: the number of hidden layers
The update( input_signals ) method is a forward calculation on the network - that is: this is the method you wish to use to classify an input instance.
Related
I am using PyTorch to train a deep learning model. I wonder if it is possible for me to separately save the model weight. For example:
class my_model(nn.Module):
def __init__(self):
super(my_model, self).__init__()
self.bert = transformers.AutoModel.from_pretrained(BERT_PATH)
self.out = nn.Linear(768,1)
def forward(self, ids, mask, token_type):
x = self.bert(ids, mask, token_type)[1]
x = self.out(x)
return x
I have the BERT model as the base model and an additional linear layer on the top. After I train this model, can I save the weight for the BERT model and this linear layer separately?
Alternatively to the previous answer, You can create two separated class of nn.module. One for the BERT model and another one for the linear layer:
class bert_model(nn.Module):
def __init__(self):
super(bert_model, self).__init__()
self.bert = transformers.AutoModel.from_pretrained(BERT_PATH)
def forward(self, ids, mask, token_type):
x = self.bert(ids, mask, token_type)[1]
return x
class linear_layer(nn.Module):
def __init__(self):
super(linear_layer, self).__init__()
self.out = nn.Linear(768,1)
def forward(self, x):
x = self.out(x)
return x
Then you can save the two part of the model separately with:
bert_model = bert_model()
linear_layer = linear_layer()
#train
torch.save(bert_model.state_dict(), PATH)
torch.save(linear_layer.state_dict(), PATH)
You can:
model = my_model()
# train ...
torch.save({'bert': model.bert.state_dict(), 'out': model.out.state_dict()}, 'checkpoint.pth')
While autograd's hvp tool seems to work very well for functions, once a model becomes involved, Hessian-vector products seem to go to 0. Some code.
First, I define the world's simplest model:
class SimpleMLP(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(in_dim, out_dim),
)
def forward(self, x):
'''Forward pass'''
return self.layers(x)
Then, a loss function:
def objective(x):
return torch.sum(0.25 * torch.sum(x)**4)
We instantiate it:
Arows = 2
Acols = 2
mlp = SimpleMLP(Arows, Acols)
Finally, I'm going to define a "forward" function (distinct from the model's forward function) that will serve as the the full model+loss that we want to analyze:
def forward(*params_list):
for param_val, model_param in zip(params_list, mlp.parameters()):
model_param.data = param_val
x = torch.ones((Arows,))
return objective(mlp(x))
This passes a ones vector into the single-layer "mlp," and passes it into our quadratic loss.
Now, I attempt to compute:
v = torch.ones((6,))
v_tensors = []
idx = 0
#this code "reshapes" the v vector as needed
for i, param in enumerate(mlp.parameters()):
numel = param.numel()
v_tensors.append(torch.reshape(torch.tensor(v[idx:idx+numel]), param.shape))
idx += numel
And finally:
param_tensors = tuple(mlp.parameters())
reshaped_v = tuple(v_tensors)
soln = torch.autograd.functional.hvp(forward, param_tensors, v=reshaped_v)
But, alas, the Hessian-Vector Product in soln is all 0's. What is happening?
What's happening is that strict is False by default in the hvp() function and a tensor of 0's is returned as the Hessian Vector Product instead of an error (source).
If you try with strict=True, an error RuntimeError: The output of the user-provided function is independent of input 0. This is not allowed in strict mode. is returned instead. And when I looked at the full error, I suspect that this error comes from _check_requires_grad(jac, "jacobian", strict=strict) which indicates that the jacobian jac is None.
Update:
Following is a full working example:
import torch
from torch import nn
# your loss function
def objective(x):
return torch.sum(0.25 * torch.sum(x)**4)
# Following are utilities to make nn.Module functional
# borrowed from the link I posted in comment
def del_attr(obj, names):
if len(names) == 1:
delattr(obj, names[0])
else:
del_attr(getattr(obj, names[0]), names[1:])
def set_attr(obj, names, val):
if len(names) == 1:
setattr(obj, names[0], val)
else:
set_attr(getattr(obj, names[0]), names[1:], val)
def make_functional(mod):
orig_params = tuple(mod.parameters())
# Remove all the parameters in the model
names = []
for name, p in list(mod.named_parameters()):
del_attr(mod, name.split("."))
names.append(name)
return orig_params, names
def load_weights(mod, names, params):
for name, p in zip(names, params):
set_attr(mod, name.split("."), p)
# your forward function with update
def forward(*new_params):
# this line replace your for loop
load_weights(mlp, names, new_params)
x = torch.ones((Arows,))
out = mlp(x)
loss = objective(out)
return loss
# your simple MLP model
class SimpleMLP(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(in_dim, out_dim),
)
def forward(self, x):
'''Forward pass'''
return self.layers(x)
if __name__ == '__main__':
# your model instantiation
Arows = 2
Acols = 2
mlp = SimpleMLP(Arows, Acols)
# your vector computation
v = torch.ones((6,))
v_tensors = []
idx = 0
#this code "reshapes" the v vector as needed
for i, param in enumerate(mlp.parameters()):
numel = param.numel()
v_tensors.append(torch.reshape(torch.tensor(v[idx:idx+numel]), param.shape))
idx += numel
reshaped_v = tuple(v_tensors)
#make model's parameters functional
params, names = make_functional(mlp)
params = tuple(p.detach().requires_grad_() for p in params)
#compute hvp
soln = torch.autograd.functional.vhp(forward, params, reshaped_v, strict=True)
print(soln)
Did you try it with doubles instead of floats? I did some tests on my own that showed fairly large error when backproping with 32 bit float (on the order of 1e-5) compared to doubles.
I would like to implement dropout and weight constraints in the custom layer. The program did not report an error, but neither did these two functions
The custom layer is:
'''
class Softmax_Decode(Layer):
""" A layer which uses a key to decode a sparse representation into a softmax.
Makes it easier to train spiking classifiers by allowing the use of
softmax and catagorical-crossentropy loss. Allows for encodings that are
n-hot where 'n' is the number of outputs assigned to each class. Allows
encodings to overlap, where a given output neuron can contribute
to the probability of more than one class.
# Arguments
key: A numpy array (num_classes, input_dims) with an input_dim-sized
{0,1}-vector representative for each class.
size: A tuple (num_classes, input_dim). If ``key`` is not specified, then
size must be specified. In which case, a key will automatically be generated.
"""
def __init__(self, key=None,size=None,kernel_regularizer=None,kernel_constraint=None,dropout=0.0,**kwargs):
super(Softmax_Decode, self).__init__(**kwargs)
self.key = _key_check(key, size)
if type(self.key) is dict and 'value' in self.key.keys():
self.key = np.array(self.key['value'], dtype=np.float32)
elif type(self.key) is list:
self.key = np.array(self.key, dtype=np.float32)
#self._rescaled_key = K.variable(np.transpose(2*self.key-1))
self._rescaled_key = K.variable(2*np.transpose(self.key)-1)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.dropout = dropout
def build(self, input_shape):
self.kernel = self.add_weight(name='kernel',
#shape=(input_shape[1], self.key.shape[1]),
initializer='uniform',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
super(Softmax_Decode, self).build(input_shape)
def call(self, inputs):
#return K.softmax(K.dot(2*(1-inputs),self._rescaled_key))
return K.softmax(K.dot(2*inputs-1, self._rescaled_key))
def compute_output_shape(self, input_shape):
return (input_shape[0],self.key.shape[0])
def get_config(self):
config={
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint)
}
base_config = super(Softmax_Decode, self).get_config()
return dict(list(base_config.items()) + [('key', self.key)])
'''
main code:
model.add(Softmax_Decode(key,kernel_constraint=MinMaxNorm(min_value=0, max_value=1.0, rate=1.0, axis=0),dropout=0.3))
Would you help me? Thank you so much for your time.
I have a python code with the following two classes.
import torch
import torch.nn as nn
import torch.nn.functional as F
class QNet_baseline(nn.Module):
"""
A MLP with 2 hidden layer
observation_dim (int): number of observation features
action_dim (int): Dimension of each action
seed (int): Random seed
"""
def __init__(self, observation_dim, action_dim, seed):
super(QNet_baseline, self).__init__()
self.seed = torch.manual_seed(seed)
self.fc1 = nn.Linear(observation_dim, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, action_dim)
def forward(self, observations):
"""
Forward propagation of neural network
"""
x = F.relu(self.fc1(observations))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
class QNet_3hidden(nn.Module):
"""
A MLP with 3 hidden layer
observation_dim (int): number of observation features
action_dim (int): Dimension of each action
seed (int): Random seed
"""
def __init__(self, observation_dim, action_dim, seed):
super(QNet_3hidden, self).__init__()
self.seed = torch.manual_seed(seed)
self.fc1 = nn.Linear(observation_dim, 128)
self.fc2 = nn.Linear(128, 64)
self.fc3 = nn.Linear(64, 64)
self.fc4 = nn.Linear(64, action_dim)
def forward(self, observations):
"""
Forward propagation of neural network
"""
x = F.relu(self.fc1(observations))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
return x
I used the same code to instantiate both classes. QNet_baseline works fine, but I got the following error for QNet_3hidden. Why would QNet_baseline works but QNet_3hidden has an error? What did I miss here? Thanks!
/home/workspace/QNetworks.py in __init__(self, observation_dim, action_dim, seed)
44
45 def __init__(self, observation_dim, action_dim, seed):
---> 46 super(QNet_3hidden, self).__init__()
47 self.seed = torch.manual_seed(seed)
48 self.fc1 = nn.Linear(observation_dim, 128)
TypeError: super(type, obj): obj must be an instance or subtype of type
Also, below is how the two classes instantiated:
class DDQN_Agent():
"""Interacts with and learns from the environment.
Attributes:
state_size (int): dimension of each state
action_size (int): dimension of each action
seed (int): random seed
"""
def __init__(self, state_size, action_size, seed, qnet="baseline", filename=None):
"""Initialize an Agent object.
Args:
filename: path of .pth file with trained weights
"""
self.state_size = state_size
self.action_size = action_size
self.seed = random.seed(seed)
# Q-Network
if qnet=="3hidden":
self.qnetwork_local = QNet_3hidden(state_size, action_size, seed).to(device)
self.qnetwork_target = QNet_3hidden(state_size, action_size, seed).to(device)
self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
else:
self.qnetwork_local = QNet_baseline(state_size, action_size, seed).to(device)
self.qnetwork_target = QNet_baseline(state_size, action_size, seed).to(device)
self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)
if filename:
weights = torch.load(filename)
self.qnetwork_local.load_state_dict(weights)
self.qnetwork_target.load_state_dict(weights)
# Replay memory
self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
# Initialize time step (for updating every UPDATE_EVERY steps)
self.t_step = 0
I encountered a similar problem, and completely restarting the kernel helped.
As suggested in this Comment by keitakurita:
Are you running the code in a Jupyter notebook and have not restarted the kernel? If so, there's a chance that your kernel is referencing the wrong class.
I suspect this might be the reason because I encountered the error after rewriting the class.
This would also explain why this is a hard-to-reproduce error. Followings a list of similar questions asked to help keep a track of the same:
Gitmemory
From July '20
From Feb '18
I have some code, and when I run it, I get the following error: Expected object of type torch.cuda.FloatTensor but found type torch.FloatTensor for argument #2 'other'
From this error message, I assume there a problem with pushing my models to the GPU. However, I am not sure precisely where the problem lies.
I will place the code wherein I think the problem may lie at the end of this question. Could someone please describe what the error exactly means and how to fix it? Any help is much appreciated.
class VGG(nn.Module):
'''
VGG model
'''
def __init__(self, features): # features represents the layers array
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(512,512),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(512, 512),
nn.ReLU(True),
nn.Linear(512, 10),
)
# Initialize weights
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
m.bias.data.zero_()
def forward(self, x): # x is the image, we run x through the layers
print("Running through features")
x = self.features(x) # runs through all features, where each feature is a function
print("Finsihed features, going to classifier")
x = x.view(x.size(0), -1)
# after running through features, does sequential steps to finally classify
x = self.classifier(x)
return x
def make_layers(cfg, batch_norm=False):
# print("Making layers!")
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
rlstm =RLSTM(v)
rlstm.input_to_state = torch.nn.DataParallel(rlstm.input_to_state)
rlstm.state_to_state = torch.nn.DataParallel(rlstm.state_to_state)
rlstm=rlstm.cuda()
layers+=[rlstm]
return nn.Sequential(*layers)
class RLSTM(nn.Module):
def __init__(self,ch):
# torch.set_default_tensor_type('torch.cuda.FloatTensor')
super(RLSTM,self).__init__()
self.ch=ch
self.input_to_state = torch.nn.Conv2d(self.ch,4*self.ch,kernel_size=(1,3),padding=(0,1))
self.state_to_state = torch.nn.Conv2d(self.ch,4*self.ch,kernel_size=(1,3),padding=(0,1)) # error is here: hidPrev is an array - not a valid number of input channel
# self.input_to_state = self.input_to_state.cuda()
#self.state_to_state = self.state_to_state.cuda()
def forward(self, image):
# print("going in row forward")
global current
global _layer
global isgates
size = image.size()
print("size: "+str(size))
b = size[0]
indvs = list(image.split(1,0)) # split up the batch into individual images
#print(indvs[0].size())
tensor_array = []
for i in range(b):
current = 0
_layer = []
isgates = []
print(len(tensor_array))
tensor_array.append(self.RowLSTM(indvs[i]))
seq=tuple(tensor_array)
trans = torch.cat(seq,0)
print(trans.size())
return trans.cuda() # trying to make floattensor error go away
def RowLSTM(self, image):
# print("going in rowlstm")
global current
global _layer
global isgates
# input-to-state (K_is * x_i) : 3x1 convolution. generate 4h x n x n tensor. 4hxnxn tensor contains all i -> s info
# the input to state convolution should only be computed one time
if current==0:
n = image.size()[2]
ch=image.size()[1]
# input_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1))
# print("about to do convolution")
isgates = self.splitIS(self.input_to_state(image)) # convolve, then split into gates (4 per row)
cell=RowLSTMCell(0,torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1))
# now have dummy, learnable variables for first row
_layer.append(cell)
print("layeres: "+str(len(_layer)))
else:
Cell_prev = _layer[current-1] # access previous row
hidPrev = Cell_prev.getHiddenState()
ch = image.size()[1]
# print("about to apply conv1d")
# state_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1)) # error is here: hidPrev is an array - not a valid number of input channel
# print("applied conv1d")
prevHid=Cell_prev.getHiddenState()
ssgates = self.splitSS(self.state_to_state(prevHid.unsqueeze(0))) #need to unsqueeze (Ex: currently 16x5, need to make 1x16x5)
gates = self.addGates(isgates,ssgates,current)
# split gates
ig, og, fg, gg = gates[0], gates[1], gates[2], gates[3] # into four, ADD SIGMOID!
cell = RowLSTMCell(Cell_prev,ig,og,fg,gg,0,0)
cell.compute()
_layer.append(cell)
# attempting to eliminate requirement of getting size
#print(current)
try:
print("adding one to current")
current+=1
y=(isgates[0][0][1][current])
return self.RowLSTM(image) #expecting floattensor, but gets cuda floattensor
except Exception as error:
print(error)
concats=[]
print(len(_layer))
for cell in _layer:
tensor=torch.unsqueeze(cell.h,0)
concats.append(tensor)
seq=tuple(concats)
print("non catted tensor: "+str(tensor.size()))
tense=torch.cat(seq,3)
print("catted lstm tensor "+str(tense.size()))
return tensor
The code runs, but when trying to go through the try/except block, the error is thrown. I am guessing the mistake lies somewhere here?
Edit: Using print statements to see where the program exactly terminates, it seems that there is a mistake in code that I have note posted yet! I will post that now, it looks like the error is in the compute() function, since the statement "finished computing" never gets printed.
class RowLSTMCell(): #inherit torch.nn.LSTM?
def __init__(self,prev_row, i, o, f, g, c, h):
#super(RowLSTMCell,self).__init__()
self.c=c
#self.c = self.c.cuda()
self.h=h
# self.h = self.h.cuda()
self.i=i
self.i = self.i.cuda()
self.o=o
self.o = self.o.cuda()
self.g=g
self.g = self.g.cuda()
self.f=f
self.f = self.f.cuda()
self.prev_row=prev_row
def getStateSize(self):
return self._state_size
def getOutputSize(self):
return self._output_size
def compute(self):
print("computing")
c_prev = self.prev_row.getCellState()
h_prev = self.prev_row.getHiddenState()
self.c = self.f * c_prev + self.i * self.g
self.h = torch.tanh(self.c) * self.o
print("finished computing")
def getHiddenState(self):
return self.h
def getCellState(self):
return self.c
self.c and self.h were not cuda! I guess you really have to make sure that each tensor is using cuda. I just put .cuda() at the end of self.c and self.h's computation in the compute() method.