I have some code, and when I run it, I get the following error: Expected object of type torch.cuda.FloatTensor but found type torch.FloatTensor for argument #2 'other'
From this error message, I assume there a problem with pushing my models to the GPU. However, I am not sure precisely where the problem lies.
I will place the code wherein I think the problem may lie at the end of this question. Could someone please describe what the error exactly means and how to fix it? Any help is much appreciated.
class VGG(nn.Module):
'''
VGG model
'''
def __init__(self, features): # features represents the layers array
super(VGG, self).__init__()
self.features = features
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(512,512),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(512, 512),
nn.ReLU(True),
nn.Linear(512, 10),
)
# Initialize weights
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
m.bias.data.zero_()
def forward(self, x): # x is the image, we run x through the layers
print("Running through features")
x = self.features(x) # runs through all features, where each feature is a function
print("Finsihed features, going to classifier")
x = x.view(x.size(0), -1)
# after running through features, does sequential steps to finally classify
x = self.classifier(x)
return x
def make_layers(cfg, batch_norm=False):
# print("Making layers!")
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
rlstm =RLSTM(v)
rlstm.input_to_state = torch.nn.DataParallel(rlstm.input_to_state)
rlstm.state_to_state = torch.nn.DataParallel(rlstm.state_to_state)
rlstm=rlstm.cuda()
layers+=[rlstm]
return nn.Sequential(*layers)
class RLSTM(nn.Module):
def __init__(self,ch):
# torch.set_default_tensor_type('torch.cuda.FloatTensor')
super(RLSTM,self).__init__()
self.ch=ch
self.input_to_state = torch.nn.Conv2d(self.ch,4*self.ch,kernel_size=(1,3),padding=(0,1))
self.state_to_state = torch.nn.Conv2d(self.ch,4*self.ch,kernel_size=(1,3),padding=(0,1)) # error is here: hidPrev is an array - not a valid number of input channel
# self.input_to_state = self.input_to_state.cuda()
#self.state_to_state = self.state_to_state.cuda()
def forward(self, image):
# print("going in row forward")
global current
global _layer
global isgates
size = image.size()
print("size: "+str(size))
b = size[0]
indvs = list(image.split(1,0)) # split up the batch into individual images
#print(indvs[0].size())
tensor_array = []
for i in range(b):
current = 0
_layer = []
isgates = []
print(len(tensor_array))
tensor_array.append(self.RowLSTM(indvs[i]))
seq=tuple(tensor_array)
trans = torch.cat(seq,0)
print(trans.size())
return trans.cuda() # trying to make floattensor error go away
def RowLSTM(self, image):
# print("going in rowlstm")
global current
global _layer
global isgates
# input-to-state (K_is * x_i) : 3x1 convolution. generate 4h x n x n tensor. 4hxnxn tensor contains all i -> s info
# the input to state convolution should only be computed one time
if current==0:
n = image.size()[2]
ch=image.size()[1]
# input_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1))
# print("about to do convolution")
isgates = self.splitIS(self.input_to_state(image)) # convolve, then split into gates (4 per row)
cell=RowLSTMCell(0,torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1),torch.randn(ch,n,1))
# now have dummy, learnable variables for first row
_layer.append(cell)
print("layeres: "+str(len(_layer)))
else:
Cell_prev = _layer[current-1] # access previous row
hidPrev = Cell_prev.getHiddenState()
ch = image.size()[1]
# print("about to apply conv1d")
# state_to_state = torch.nn.Conv2d(ch,4*ch,kernel_size=(1,3),padding=(0,1)) # error is here: hidPrev is an array - not a valid number of input channel
# print("applied conv1d")
prevHid=Cell_prev.getHiddenState()
ssgates = self.splitSS(self.state_to_state(prevHid.unsqueeze(0))) #need to unsqueeze (Ex: currently 16x5, need to make 1x16x5)
gates = self.addGates(isgates,ssgates,current)
# split gates
ig, og, fg, gg = gates[0], gates[1], gates[2], gates[3] # into four, ADD SIGMOID!
cell = RowLSTMCell(Cell_prev,ig,og,fg,gg,0,0)
cell.compute()
_layer.append(cell)
# attempting to eliminate requirement of getting size
#print(current)
try:
print("adding one to current")
current+=1
y=(isgates[0][0][1][current])
return self.RowLSTM(image) #expecting floattensor, but gets cuda floattensor
except Exception as error:
print(error)
concats=[]
print(len(_layer))
for cell in _layer:
tensor=torch.unsqueeze(cell.h,0)
concats.append(tensor)
seq=tuple(concats)
print("non catted tensor: "+str(tensor.size()))
tense=torch.cat(seq,3)
print("catted lstm tensor "+str(tense.size()))
return tensor
The code runs, but when trying to go through the try/except block, the error is thrown. I am guessing the mistake lies somewhere here?
Edit: Using print statements to see where the program exactly terminates, it seems that there is a mistake in code that I have note posted yet! I will post that now, it looks like the error is in the compute() function, since the statement "finished computing" never gets printed.
class RowLSTMCell(): #inherit torch.nn.LSTM?
def __init__(self,prev_row, i, o, f, g, c, h):
#super(RowLSTMCell,self).__init__()
self.c=c
#self.c = self.c.cuda()
self.h=h
# self.h = self.h.cuda()
self.i=i
self.i = self.i.cuda()
self.o=o
self.o = self.o.cuda()
self.g=g
self.g = self.g.cuda()
self.f=f
self.f = self.f.cuda()
self.prev_row=prev_row
def getStateSize(self):
return self._state_size
def getOutputSize(self):
return self._output_size
def compute(self):
print("computing")
c_prev = self.prev_row.getCellState()
h_prev = self.prev_row.getHiddenState()
self.c = self.f * c_prev + self.i * self.g
self.h = torch.tanh(self.c) * self.o
print("finished computing")
def getHiddenState(self):
return self.h
def getCellState(self):
return self.c
self.c and self.h were not cuda! I guess you really have to make sure that each tensor is using cuda. I just put .cuda() at the end of self.c and self.h's computation in the compute() method.
Related
I want to implement learned size quantization algorithm. And I create a quante Linear layer
class QLinear(nn.Module):
def __init__(self, input_dim, out_dim, bits=8):
super(QLinear, self).__init__()
# create a tensor requires_grad=True
self.up = 2 ** bits - 1
self.down = 0
self.fc = nn.Linear(input_dim, out_dim)
weight = self.fc.weight.data
self.scale = nn.Parameter(torch.Tensor((torch.max(weight) - torch.min(weight)) / (self.up - self.down)), requires_grad=True)
self.zero_point = nn.Parameter(torch.Tensor(self.down - (torch.min(weight) / self.scale).round()), requires_grad=True)
def forward(self, x):
weight = self.fc.weight
quant_weight = (round_ste(weight / self.scale) + self.zero_point)
quant_weight = torch.clamp(quant_weight, self.down, self.up)
dequant_weight = ((quant_weight - self.zero_point) * self.scale)
self.fc.weight.data = dequant_weight
return self.fc(x)
class QNet(nn.Module):
def __init__(self):
super(QNet, self).__init__()
self.fc1 = QLinear(28 * 28, 100)
self.fc2 = QLinear(100, 10)
def forward(self, x):
x = x.view(-1, 28 * 28)
x = F.relu(self.fc1(x))
x = self.fc2(x)
x = F.softmax(x)
return x
when I train this network,scale's grad always return None. Why this happen and how can i solve it?
The issue is that you are passing dequant_weight through data attribute of your parameter which ends up not being registered by autograd. A simple alternative would be to handle weight as a nn.Parameter and apply a linear operator manually in the forward definition directly with the computed weight dequant_weight.
Here is a minimal example that should work:
class QLinear(nn.Module):
def __init__(self, input_dim, out_dim, bits=8):
super().__init__()
self.up = 2 ** bits - 1
self.down = 0
self.weight = nn.Parameter(torch.rand(out_dim, input_dim))
self.scale = nn.Parameter(
torch.Tensor((self.weight.max() - self.weight.min()) / (self.up - self.down)))
self.zero_point = nn.Parameter(
torch.Tensor(self.down - (self.weight.min() / self.scale).round()))
def forward(self, x):
quant_weight = (torch.round(self.weight / self.scale) + self.zero_point)
quant_weight = torch.clamp(quant_weight, self.down, self.up)
dequant_weight = ((quant_weight - self.zero_point) * self.scale)
return F.linear(x, dequant_weight)
Side notes:
nn.Parameter requires gradient computation by default (no need to provide requires_grad=True.
Additionally you can reformat QNet by inheriting from nn.Sequential to avoid boilerplate code:
class QNet(nn.Sequential):
def __init__(self):
super().__init__(nn.Flatten(),
QLinear(28 * 28, 100),
nn.ReLU(),
QLinear(100, 10),
nn.Softmax())
I have a simple model:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(3, 10)
self.fc2 = nn.Linear(10, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.tanh(self.fc3(x))
return x
net = Model()
How can I keep the weights to always be between a certain value (eg -1,1)?
I tried the following:
self.fc1 = torch.tanh(nn.Linear(3, 10))
Which I'm not entirely sure that will always keep them in that value (even if the gradient update is trying to push them farther).
But got the following error:
TypeError: tanh(): argument 'input' (position 1) must be Tensor, not Linear
According to the discuss.pytorch you can create extra class to clip weights between a given range. Link to the discussion.
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.fc1 = nn.Linear(3, 10)
self.fc2 = nn.Linear(10, 30)
self.fc3 = nn.Linear(30, 2)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.tanh(self.fc3(x))
return x
You should add weight clipper:
class WeightClipper(object):
def __call__(self, module):
# filter the variables to get the ones you want
if hasattr(module, 'weight'):
w = module.weight.data
w = w.clamp(-1,1)
module.weight.data = w
model = Model()
clipper = WeightClipper()
model.apply(clipper)
While autograd's hvp tool seems to work very well for functions, once a model becomes involved, Hessian-vector products seem to go to 0. Some code.
First, I define the world's simplest model:
class SimpleMLP(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(in_dim, out_dim),
)
def forward(self, x):
'''Forward pass'''
return self.layers(x)
Then, a loss function:
def objective(x):
return torch.sum(0.25 * torch.sum(x)**4)
We instantiate it:
Arows = 2
Acols = 2
mlp = SimpleMLP(Arows, Acols)
Finally, I'm going to define a "forward" function (distinct from the model's forward function) that will serve as the the full model+loss that we want to analyze:
def forward(*params_list):
for param_val, model_param in zip(params_list, mlp.parameters()):
model_param.data = param_val
x = torch.ones((Arows,))
return objective(mlp(x))
This passes a ones vector into the single-layer "mlp," and passes it into our quadratic loss.
Now, I attempt to compute:
v = torch.ones((6,))
v_tensors = []
idx = 0
#this code "reshapes" the v vector as needed
for i, param in enumerate(mlp.parameters()):
numel = param.numel()
v_tensors.append(torch.reshape(torch.tensor(v[idx:idx+numel]), param.shape))
idx += numel
And finally:
param_tensors = tuple(mlp.parameters())
reshaped_v = tuple(v_tensors)
soln = torch.autograd.functional.hvp(forward, param_tensors, v=reshaped_v)
But, alas, the Hessian-Vector Product in soln is all 0's. What is happening?
What's happening is that strict is False by default in the hvp() function and a tensor of 0's is returned as the Hessian Vector Product instead of an error (source).
If you try with strict=True, an error RuntimeError: The output of the user-provided function is independent of input 0. This is not allowed in strict mode. is returned instead. And when I looked at the full error, I suspect that this error comes from _check_requires_grad(jac, "jacobian", strict=strict) which indicates that the jacobian jac is None.
Update:
Following is a full working example:
import torch
from torch import nn
# your loss function
def objective(x):
return torch.sum(0.25 * torch.sum(x)**4)
# Following are utilities to make nn.Module functional
# borrowed from the link I posted in comment
def del_attr(obj, names):
if len(names) == 1:
delattr(obj, names[0])
else:
del_attr(getattr(obj, names[0]), names[1:])
def set_attr(obj, names, val):
if len(names) == 1:
setattr(obj, names[0], val)
else:
set_attr(getattr(obj, names[0]), names[1:], val)
def make_functional(mod):
orig_params = tuple(mod.parameters())
# Remove all the parameters in the model
names = []
for name, p in list(mod.named_parameters()):
del_attr(mod, name.split("."))
names.append(name)
return orig_params, names
def load_weights(mod, names, params):
for name, p in zip(names, params):
set_attr(mod, name.split("."), p)
# your forward function with update
def forward(*new_params):
# this line replace your for loop
load_weights(mlp, names, new_params)
x = torch.ones((Arows,))
out = mlp(x)
loss = objective(out)
return loss
# your simple MLP model
class SimpleMLP(nn.Module):
def __init__(self, in_dim, out_dim):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(in_dim, out_dim),
)
def forward(self, x):
'''Forward pass'''
return self.layers(x)
if __name__ == '__main__':
# your model instantiation
Arows = 2
Acols = 2
mlp = SimpleMLP(Arows, Acols)
# your vector computation
v = torch.ones((6,))
v_tensors = []
idx = 0
#this code "reshapes" the v vector as needed
for i, param in enumerate(mlp.parameters()):
numel = param.numel()
v_tensors.append(torch.reshape(torch.tensor(v[idx:idx+numel]), param.shape))
idx += numel
reshaped_v = tuple(v_tensors)
#make model's parameters functional
params, names = make_functional(mlp)
params = tuple(p.detach().requires_grad_() for p in params)
#compute hvp
soln = torch.autograd.functional.vhp(forward, params, reshaped_v, strict=True)
print(soln)
Did you try it with doubles instead of floats? I did some tests on my own that showed fairly large error when backproping with 32 bit float (on the order of 1e-5) compared to doubles.
I am trying to get into Neural Networks and wanted to test some code from a video I am watching, but I keep getting the error, " Neural_Network object has no attribute w1", and I can't seem to figure it out. I went through many related stackoverflows, but they don't seem to answer the question, and since I haven't done any object-oriented programming for python before, I do not understand what is going on.
When I looked through the code, I thought that self.w1 was being set as a local variable so I tried to instantiate it and set is as a global variable above the constructor declaration, but that didn't work.
import numpy as np
class Neural_Network(object):
def _init_(self):
self.inputLayerSize = 2
self.outputLayerSize = 1
self.hiddenLayerSize = 3
self.w1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
self.w2 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
def forward(self, x):
self.z2= np.dot(x, self.w1)
self.a2 = self.sigmoid(self.layer1)
self.z3 = np.dot(self.a2,self.w2)
yhat = self.sigmoid(self.z3)
return yhat
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
x = np.array(([3,5],[5,1],[10,2]),dtype=float)
y = np.array((([75],[82],[93])),dtype=float)
n1 = Neural_Network()
yhat = n1.forward(x)
print(yhat)
The code should produce a matrix that states possible grades given a specified input 'x'.
something like: x = [[2,3],[5,2]]
output: [[82],[93],[100]]
init should have two underscores surrounding it like:
def __init__(self):
Without two underscores on each side, Python does not call the function when the object is instantiated. In general, all such special functions and attributes have a pair of double underscores.
This noob errors just kills me, i also found it not instantly:
init with one _ is not cound as init so while Neural_Network() nothing was executed
Inheriting from Object is always, dont write that
then you ll notice the hidden layer self.layer1 was not defined
then, do not give variable names one letter + digit, like w1, its very confusing with wl, etc
Ahh also its important, you may not have output [75],[82],[93], because sigmoid as you remember have range from 0 to 1, the same for input. Make sigmoida for input and choose clear outputs, means only 0 or 1
import numpy as np
class Neural_Network:
def __init__(self):
self.inputLayerSize = 2
self.outputLayerSize = 1
self.hiddenLayerSize = 3
self.w_h = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
self.w_o = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
self.layer_hidden = np.random.randn(self.hiddenLayerSize)
def forward(self, x):
self.z2= np.dot(x, self.w_h)
self.a2 = self.sigmoid(self.layer_hidden)
self.z3 = np.dot(self.a2,self.w_o)
yhat = self.sigmoid(self.z3)
return yhat
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
x = np.array(([3,5],[5,1],[10,2]),dtype=float)
y = np.array((([75],[82],[93])),dtype=float)
n1 = Neural_Network()
yhat = n1.forward(x)
print(yhat)
Below is the code (taken from somewhere on the internet) that I am trying to use for Multilayer neural network.
import math
import random
BIAS = -1
"""
To view the structure of the Neural Network, type
print network_name
"""
class Neuron:
def __init__(self, n_inputs ):
self.n_inputs = n_inputs
self.set_weights( [random.uniform(0,1) for x in range(0,n_inputs+1)] ) # +1 for bias weight
def sum(self, inputs ):
# Does not include the bias
return sum(val*self.weights[i] for i,val in enumerate(inputs))
def set_weights(self, weights ):
self.weights = weights
def __str__(self):
return 'Weights: %s, Bias: %s' % ( str(self.weights[:-1]),str(self.weights[-1]) )
class NeuronLayer:
def __init__(self, n_neurons, n_inputs):
self.n_neurons = n_neurons
self.neurons = [Neuron( n_inputs ) for _ in range(0,self.n_neurons)]
def __str__(self):
return 'Layer:\n\t'+'\n\t'.join([str(neuron) for neuron in self.neurons])+''
class NeuralNetwork:
def __init__(self, n_inputs, n_outputs, n_neurons_to_hl, n_hidden_layers):
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.n_hidden_layers = n_hidden_layers
self.n_neurons_to_hl = n_neurons_to_hl
# Do not touch
self._create_network()
self._n_weights = None
# end
def _create_network(self):
if self.n_hidden_layers>0:
# create the first layer
self.layers = [NeuronLayer( self.n_neurons_to_hl,self.n_inputs )]
# create hidden layers
self.layers += [NeuronLayer( self.n_neurons_to_hl,self.n_neurons_to_hl ) for _ in range(0,self.n_hidden_layers)]
# hidden-to-output layer
self.layers += [NeuronLayer( self.n_outputs,self.n_neurons_to_hl )]
else:
# If we don't require hidden layers
self.layers = [NeuronLayer( self.n_outputs,self.n_inputs )]
def get_weights(self):
weights = []
for layer in self.layers:
for neuron in layer.neurons:
weights += neuron.weights
return weights
#property
def n_weights(self):
if not self._n_weights:
self._n_weights = 0
for layer in self.layers:
for neuron in layer.neurons:
self._n_weights += neuron.n_inputs+1 # +1 for bias weight
return self._n_weights
def set_weights(self, weights ):
assert len(weights)==self.n_weights, "Incorrect amount of weights."
stop = 0
for layer in self.layers:
for neuron in layer.neurons:
start, stop = stop, stop+(neuron.n_inputs+1)
neuron.set_weights( weights[start:stop] )
return self
def update(self, inputs ):
assert len(inputs)==self.n_inputs, "Incorrect amount of inputs."
for layer in self.layers:
outputs = []
for neuron in layer.neurons:
tot = neuron.sum(inputs) + neuron.weights[-1]*BIAS
outputs.append( self.sigmoid(tot) )
inputs = outputs
return outputs
def sigmoid(self, activation,response=1 ):
# the activation function
try:
return 1/(1+math.e**(-activation/response))
except OverflowError:
return float("inf")
def __str__(self):
return '\n'.join([str(i+1)+' '+str(layer) for i,layer in enumerate(self.layers)])
My input is a text file containing multiple rows. Each row contains a data-point of dimension 16 (the first 16 elements of the row) and 17th element is the class to which the data point belongs.
Input.txt
1,3,2,2,1,10,2,2,1,9,2,9,1,6,2,8,2
2,1,3,2,2,8,7,7,5,7,6,8,2,8,3,8,4
4,6,6,5,5,8,8,3,5,7,8,7,5,10,4,6,2
6,9,6,11,6,7,7,8,5,9,8,8,4,9,7,9,5
1,1,2,2,1,7,7,8,5,7,6,7,2,8,7,9,1
3,8,5,6,4,10,6,3,6,11,4,7,3,8,2,9,3
4,7,5,5,5,8,6,6,7,6,6,6,2,8,7,10,1
6,10,9,8,8,11,6,2,4,9,4,6,9,6,2,8,11
The class is always in the range [1,11]
After learning the input, the expected output for each data-point should be its corresponding class (it may not be right always).
I am creating an object of NeuralNetwork class and then trying to use the update method.
But, I am not clear how to use it properly. Please guide me how to use it for the above input.
This is my open source code
How to
You have to initialize a new network object as: network = NeuralNetwork(inputs, n_outputs, n_neurons_to_hl, n_hidden_layers).
inputs: the number of input values (in your case: 16)
n_outputs: the number of output values (in your case: 11)
n_neurons_to_hl: the number of neurons in each hidden layer
n_hidden_layers: the number of hidden layers
The update( input_signals ) method is a forward calculation on the network - that is: this is the method you wish to use to classify an input instance.