Having a hard time setting up a neural network most of the examples are images. My problem has 3 inputs each of size N X M where N are the samples and M are the features. I have a separate file (CSV) with 1 x N binary target (0,1).
The network i'm trying to configure should have two hidden layers with 100 and 50 neurons, respectively. Sigmoid activation function and cross-entropy to check performance. The result should just be a single probability output.
Please help?
EDIT:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
#from torch.autograd import Variable
import pandas as pd
# Import Data
Input1 = pd.read_csv(r'...')
Input2 = pd.read_csv(r'...')
Input3 = pd.read_csv(r'...')
Target = pd.read_csv(r'...' )
# Convert to Tensor
Input1_tensor = torch.tensor(Input1.to_numpy()).float()
Input2_tensor = torch.tensor(Input2.to_numpy()).float()
Input3_tensor = torch.tensor(Input3.to_numpy()).float()
Target_tensor = torch.tensor(Target.to_numpy()).float()
# Transpose to have signal as columns instead of rows
input1 = Input1_tensor
input2 = Input2_tensor
input3 = Input3_tensor
y = Target_tensor
# Define the model
class Net(nn.Module):
def __init__(self, num_inputs, hidden1_size, hidden2_size, num_classes):
# Initialize super class
super(Net, self).__init__()
#self.criterion = nn.CrossEntropyLoss()
# Add hidden layer
self.layer1 = nn.Linear(num_inputs,hidden1_size)
# Activation
self.sigmoid = torch.nn.Sigmoid()
# Add output layer
self.layer2 = nn.Linear(hidden1_size,hidden2_size)
# Activation
self.sigmoid2 = torch.nn.Sigmoid()
self.layer3 = nn.Linear(hidden2_size, num_classes)
def forward(self, x1, x2, x3):
# implement the forward pass
in1 = self.layer1(x1)
in2 = self.layer1(x2)
in3 = self.layer1(x3)
xyz = torch.cat((in1,in2,in3),1)
return xyz
# Define loss function
loss_function = nn.CrossEntropyLoss()
# Define optimizer
optimizer = optim.SGD(model.parameters(), lr=1e-4)
for t in range(num_epochs):
# Forward pass: Compute predicted y by passing x to the model
y_pred = model(input1, input2, input3)
# Compute and print loss
loss = loss_function(y_pred, y)
print(t, loss.item())
# Zero gradients, perform a backward pass, and update the weights.
optimizer.zero_grad()
# Calculate gradient using backward pass
loss.backward()
# Update model parameters (weights)
optimizer.step()
Here I am getting an error of "
RuntimeError: 0D or 1D target tensor expected, multi-target not supported"
for line "loss = loss_function(y_pred, y)"
Where y_pred is [20000,375] and y is [20000,1]
you can refer to pytorch, a python library for deep learning and neural networks.
and you can use code that defines network below:
from torch import nn
import torch.nn.functional as F
def network(nn.Module):
def __init__(self, M):
# M is the dimension of input feature
super(network, self).__init__()
self.layer1 = nn.Linear(M, 100)
self.layer2 = nn.Linear(100, 50)
self.out = nn.Linear(50,1)
def forward(self,x):
return F.sigmoid(self.out(self.layer2(self.layer1(x))))
----------
You can then refer to the pytorch documentation and finish the rest training code.
Edit:
As for RuntimeError, you can squeeze the target tensor by y.squeeze(). This will remove redundant dimension in your tensor, e.g. [20000,1] -> [20000]
Related
I have defined the model as in the code below, and I used batch normalization merging to make 3 layers into 1 linear layer.
The first layer of the model is a linear layer and there is no bias.
The second layer of the model is a batch normalization and there is no weight and bias ( affine is false )
The third layer of the model is a linear layer.
The variables named new_weight and new_bias are the weight and bias of the newly created linear layer, respectively.
My question is: Why is the output of the following two print functions different? And where is the wrong part in the code below the batch merge comment?
import torch
import torch.nn as nn
import torch.optim as optim
learning_rate = 0.01
in_nodes = 20
internal_nodes = 8
out_nodes = 9
batch_size = 100
# model define
class M(nn.Module):
def __init__(self):
super(M, self).__init__()
self.layer1 = nn.Linear(in_nodes, internal_nodes, bias=False)
self.layer2 = nn.BatchNorm1d(internal_nodes, affine=False)
self.layer3 = nn.Linear(internal_nodes, out_nodes)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x
# optimizer and criterion
model = M()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
# training
for batch_num in range(1000):
model.train()
optimizer.zero_grad()
input = torch.randn(batch_size, in_nodes)
target = torch.ones(batch_size, out_nodes)
output = model(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()
# batch merge
divider = torch.sqrt(model.layer2.eps + model.layer2.running_var)
w_bn = torch.diag(torch.ones(internal_nodes) / divider)
new_weight = torch.mm(w_bn, model.layer1.weight)
new_weight = torch.mm(model.layer3.weight, new_weight)
b_bn = - model.layer2.running_mean / divider
new_bias = model.layer3.bias + torch.squeeze(torch.mm(model.layer3.weight, b_bn.reshape(-1, 1)))
input = torch.randn(batch_size, in_nodes)
print(model(input))
print(torch.t(torch.mm(new_weight, torch.t(input))) + new_bias)
Short Answer: As far as I can tell you need a model.eval() before the line
input = torch.randn(batch_size, in_nodes)
such that the end looks like this
...
model.eval()
input = torch.randn(batch_size, in_nodes)
test_input = torch.ones(batch_size,internal_nodes)/100
print(model(input))
print(torch.t(torch.mm(new_weight, torch.t(input))) + new_bias)
with that (I tested it) the two print-statements should output the same. It fixed the weights.
Long Answer:
When using Batch-Normalization according to PyTorch documentation a default momentum of 0.1 is used to compute the running_mean and running_var. The momentum defines how much the estimated statistics and how much the new observed value influence the value.
Now when you don't set a model.eval() statement the batch_normalization computes an updated running_mean and running_var due to the momentum in line
print(model(input))
For further details and or confirmation: Related Question, PyTorch-Documentation
I've trained a simple neural net using skorch to make it sklearn compatible and I would like to know how to retrieve the actual estimated weights.
Here's a replicable example of what I need.
The neural net presented here uses 10 features, has one hidden layer of 2 nodes, uses ReLu activation functions and linearly combines the output of the 2 nodes.
import torch
import numpy as np
from torch.autograd import Variable
# Create example data
np.random.seed(2022)
train_size = 1000
n_features= 10
X_train = np.random.rand(n_features, train_size).astype("float32")
l2_params_1 = np.random.rand(1,n_features).astype("float32")
l2_params_2 = np.random.rand(1,n_features).astype("float32")
l1_X = np.matmul(l2_params_1, X_train)
l2_X = np.matmul(l2_params_2, X_train)
y_train = l1_X + l2_X
# Defining my NN
class NNModule(torch.nn.Module):
def __init__(self, in_features):
super(NNModule, self).__init__()
self.l1 = torch.nn.Linear(in_features, 2)
self.a1 = torch.nn.ReLU()
self.l2 = torch.nn.Linear(2, 1)
def forward(self, x):
x = self.l1(x)
x = self.a1(x)
return self.l2(x)
# Initialize the NN
torch.manual_seed(200)
model = NNModule(in_features = 10)
model.l1.weight.data.uniform_(0.0, 1.0)
model.l1.bias.data.uniform_(0.0, 1.0)
# Define criterion and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# Train the NN
torch.manual_seed(200)
for epoch in range(100):
inputs = Variable(torch.from_numpy(np.transpose(X_train)))
labels = Variable(torch.from_numpy(np.transpose(y_train)))
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
The parameters at which I'm arriving are the following:
list(model.parameters())
[Output]:
[Parameter containing:
tensor([[0.8997, 0.8345, 0.8284, 0.6950, 0.5949, 0.1217, 0.9067, 0.1824, 0.8272,
0.2372],
[0.7525, 0.6577, 0.4358, 0.6109, 0.8817, 0.5429, 0.5263, 0.7531, 0.1552,
0.7066]], requires_grad=True),
Parameter containing:
tensor([0.6617, 0.1079], requires_grad=True),
Parameter containing:
tensor([[0.9225, 0.8339]], requires_grad=True),
Parameter containing:
tensor([0.0786], requires_grad=True)]
Now, to wrap my NNModule with skorch, I'm using this:
from skorch import NeuralNetRegressor
torch.manual_seed(200)
net = NeuralNetRegressor(
module=NNModule(in_features=10),
criterion=torch.nn.MSELoss,
optimizer=torch.optim.SGD,
optimizer__lr=0.01,
max_epochs=100,
verbose=0
)
net.fit(np.transpose(X_train), np.transpose(y_train))
And I'd like to retrieve the weights obtained in the training. I've used dir(net) to see if the weights are stored in any attributes to no avail.
To retrieve the weights one needs to output them like this:
list(net.module.parameters())
I am creating a GRU to do some classification for a project, and I'm relatively new to Pytorch and implementing GRUs. I know similar questions like this one have been answered already but I can't seem to bring the same solution over to my own problem. I understand that there is an issue with the shape/order of my fc arrays but after trying to change things I can no longer see the trees for the wood. I would appreciate it if someone could point me in the right direction.
Below I have attached my code and the error. The datasets im using contain 24 features with a label in the 25th column.
# Imports
import pandas as pd
import numpy as np
import torch
import torchvision # torch package for vision related things
import torch.nn.functional as F # Parameterless functions, like (some) activation functions
import torchvision.datasets as datasets # Standard datasets
import torchvision.transforms as transforms # Transformations we can perform on our dataset for augmentation
from torch import optim # For optimizers like SGD, Adam, etc.
from torch import nn # All neural network modules
from torch.utils.data import Dataset, DataLoader # Gives easier dataset managment by creating mini batches etc.
from tqdm import tqdm # For a nice progress bar
from sklearn.preprocessing import StandardScaler
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Hyperparameters
input_size = 24
hidden_size = 128
num_layers = 1
num_classes = 2
sequence_length = 1
learning_rate = 0.005
batch_size = 8
num_epochs = 3
# Recurrent neural network with GRU (many-to-one)
class RNN_GRU(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(RNN_GRU, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size * sequence_length, num_classes)
def forward(self, x):
# Set initial hidden and cell states
x = x.unsqueeze(0)
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
# Forward propagate LSTM
out, _ = self.gru(x, h0)
out = out.reshape(out.shape[0], -1)
# Decode the hidden state of the last time step
out = self.fc(out)
return out
class MyDataset(Dataset):
def __init__(self,file_name):
stats_df=pd.read_csv(file_name)
x=stats_df.iloc[:,0:24].values
y=stats_df.iloc[:,24].values
self.x_train=torch.tensor(x,dtype=torch.float32)
self.y_train=torch.tensor(y,dtype=torch.float32)
def __len__(self):
return len(self.y_train)
def __getitem__(self,idx):
return self.x_train[idx],self.y_train[idx]
nomDs=MyDataset("nomStats.csv")
atkDs=MyDataset("atkStats.csv")
train_loader=DataLoader(dataset=nomDs,batch_size=batch_size)
test_loader=DataLoader(dataset=atkDs,batch_size=batch_size)
# Initialize network (try out just using simple RNN, or GRU, and then compare with LSTM)
model = RNN_GRU(input_size, hidden_size, num_layers, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Train Network
for epoch in range(num_epochs):
for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
# Get data to cuda if possible
data = data.to(device=device).squeeze(1)
targets = targets.to(device=device)
# forward
scores = model(data)
loss = criterion(scores, targets)
# backward
optimizer.zero_grad()
loss.backward()
# gradient descent update step/adam step
optimizer.step()
# Check accuracy on training & test to see how good our model
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
# Set model to eval
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device).squeeze(1)
y = y.to(device=device)
scores = model(x)
_, predictions = scores.max(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
# Toggle model back to train
model.train()
return num_correct / num_samples
print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:2f}")
print(f"Accuracy on test set: {check_accuracy(test_loader, model)*100:.2f}")
Traceback (most recent call last):
File "TESTGRU.py", line 87, in <module>
scores = model(data)
File "C:\Users\steph\anaconda3\envs\FYP\lib\site-packages\torch\nn\modules\module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "TESTGRU.py", line 47, in forward
out = self.fc(out)
File "C:\Users\steph\anaconda3\envs\FYP\lib\site-packages\torch\nn\modules\module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\steph\anaconda3\envs\FYP\lib\site-packages\torch\nn\modules\linear.py", line 94, in forward
return F.linear(input, self.weight, self.bias)
File "C:\Users\steph\anaconda3\envs\FYP\lib\site-packages\torch\nn\functional.py", line 1753, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x1024 and 128x2)
It seems like these lines
# Forward propagate LSTM
out, _ = self.gru(x, h0)
out = out.reshape(out.shape[0], -1)
are the problem.
It appears that you only want to feed the hidden state of the last time step.
This could be read from the output in two ways:
If you want the output of all layers at the last time step, you should use the second return value of out, _ = self.gru(x, h0) not the first.
If you want to use just the last layer's output at the last time step (which seems to be the case), you should use
out[:, -1, :]. With this change, you may not need the
reshape operation.
This code attempts to utilize a custom implementation of dropout :
%reset -f
import torch
import torch.nn as nn
# import torchvision
# import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torch.utils.data as data_utils
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
num_epochs = 1000
number_samples = 10
from sklearn.datasets import make_moons
from matplotlib import pyplot
from pandas import DataFrame
# generate 2d classification dataset
X, y = make_moons(n_samples=number_samples, noise=0.1)
# scatter plot, dots colored by class value
x_data = [a for a in enumerate(X)]
x_data_train = x_data[:int(len(x_data) * .5)]
x_data_train = [i[1] for i in x_data_train]
x_data_train
y_data = [y[i[0]] for i in x_data]
y_data_train = y_data[:int(len(y_data) * .5)]
y_data_train
x_test = [a[1] for a in x_data[::-1][:int(len(x_data) * .5)]]
y_test = [a for a in y_data[::-1][:int(len(y_data) * .5)]]
x = torch.tensor(x_data_train).float() # <2>
print(x)
y = torch.tensor(y_data_train).long()
print(y)
x_test = torch.tensor(x_test).float()
print(x_test)
y_test = torch.tensor(y_test).long()
print(y_test)
class Dropout(nn.Module):
def __init__(self, p=0.5, inplace=False):
# print(p)
super(Dropout, self).__init__()
if p < 0 or p > 1:
raise ValueError("dropout probability has to be between 0 and 1, "
"but got {}".format(p))
self.p = p
self.inplace = inplace
def forward(self, input):
print(list(input.shape))
return np.random.binomial([np.ones((len(input),np.array(list(input.shape))))],1-dropout_percent)[0] * (1.0/(1-self.p))
def __repr__(self):
inplace_str = ', inplace' if self.inplace else ''
return self.__class__.__name__ + '(' \
+ 'p=' + str(self.p) \
+ inplace_str + ')'
class MyLinear(nn.Linear):
def __init__(self, in_feats, out_feats, drop_p, bias=True):
super(MyLinear, self).__init__(in_feats, out_feats, bias=bias)
self.custom_dropout = Dropout(p=drop_p)
def forward(self, input):
dropout_value = self.custom_dropout(self.weight)
return F.linear(input, dropout_value, self.bias)
my_train = data_utils.TensorDataset(x, y)
train_loader = data_utils.DataLoader(my_train, batch_size=2, shuffle=True)
my_test = data_utils.TensorDataset(x_test, y_test)
test_loader = data_utils.DataLoader(my_train, batch_size=2, shuffle=True)
# Device configuration
device = 'cpu'
print(device)
# Hyper-parameters
input_size = 2
hidden_size = 100
num_classes = 2
learning_rate = 0.0001
pred = []
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes, p):
super(NeuralNet, self).__init__()
# self.drop_layer = nn.Dropout(p=p)
# self.drop_layer = MyLinear()
# self.fc1 = MyLinear(input_size, hidden_size, p)
self.fc1 = MyLinear(input_size, hidden_size , p)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
# out = self.drop_layer(x)
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
model = NeuralNet(input_size, hidden_size, num_classes, p=0.9).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
images = images.reshape(-1, 2).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
Custom dropout is implemented as :
class Dropout(nn.Module):
def __init__(self, p=0.5, inplace=False):
# print(p)
super(Dropout, self).__init__()
if p < 0 or p > 1:
raise ValueError("dropout probability has to be between 0 and 1, "
"but got {}".format(p))
self.p = p
self.inplace = inplace
def forward(self, input):
print(list(input.shape))
return np.random.binomial([np.ones((len(input),np.array(list(input.shape))))],1-dropout_percent)[0] * (1.0/(1-self.p))
def __repr__(self):
inplace_str = ', inplace' if self.inplace else ''
return self.__class__.__name__ + '(' \
+ 'p=' + str(self.p) \
+ inplace_str + ')'
class MyLinear(nn.Linear):
def __init__(self, in_feats, out_feats, drop_p, bias=True):
super(MyLinear, self).__init__(in_feats, out_feats, bias=bias)
self.custom_dropout = Dropout(p=drop_p)
def forward(self, input):
dropout_value = self.custom_dropout(self.weight)
return F.linear(input, dropout_value, self.bias)
It seems I've implemented the dropout function incorrectly ? :
np.random.binomial([np.ones((len(input),np.array(list(input.shape))))],1-dropout_percent)[0] * (1.0/(1-self.p))
How to modify in order to correctly utilize dropout ?
These posts were useful in getting to this point :
Hinton's Dropout in 3 Lines of Python :
https://iamtrask.github.io/2015/07/28/dropout/
Making a Custom Dropout Function : https://discuss.pytorch.org/t/making-a-custom-dropout-function/14053/2
It seems I've implemented the dropout function incorrectly?
np.random.binomial([np.ones((len(input),np.array(list(input.shape))))],1 dropout_percent)[0] * (1.0/(1-self.p))
In fact, the above implementation is known as Inverted Dropout. Inverted Dropout is how Dropout is implemented in practice in the various deep learning frameworks.
What is inverted dropout?
Before jump into the inverted dropout, it can be helpful to see how Dropout works for a single neuron:
Since during train phase a neuron is kept on with probability q (=1-p), during the testing phase we have to emulate the behavior of the ensemble of networks used in the training phase. To this end, the authors suggest scaling the activation function by a factor of q during the test phase in order to use the expected output produced in the training phase as the single output required in the test phase (Section 10, Multiplicative Gaussian Noise). Thus:
Inverted dropout is a bit different. This approach consists in the scaling of the activations during the training phase, leaving the test phase untouched. The scale factor is the inverse of the keep probability 1/1-p = 1/q, thus:
Inverted dropout helps to define the model once and just change a parameter (the keep/drop probability) to run train and test on the same model. Direct Dropout, instead, force you to modify the network during the test phase because if you don’t multiply by q the output the neuron will produce values that are higher respect to the one expected by the successive neurons (thus the following neurons can saturate or explode): that’s why Inverted Dropout is the more common implementation.
References:
Dropout Regularization, coursera by Andrew NG
What is inverted dropout?
Dropout: scaling the activation versus inverting the dropout
Analysis of Dropout
How implement inverted dropout Pytorch?
class MyDropout(nn.Module):
def __init__(self, p: float = 0.5):
super(MyDropout, self).__init__()
if p < 0 or p > 1:
raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
self.p = p
def forward(self, X):
if self.training:
binomial = torch.distributions.binomial.Binomial(probs=1-self.p)
return X * binomial.sample(X.size()) * (1.0/(1-self.p))
return X
How to implement in Numpy?
import numpy as np
pKeep = 0.8
weights = np.ones([1, 5])
binary_value = np.random.rand(weights.shape[0], weights.shape[1]) < pKeep
res = np.multiply(weights, binary_value)
res /= pKeep # this line is called inverted dropout technique
print(res)
How to implement in Tensorflow?
import tensorflow as tf
tf.enable_eager_execution()
weights = tf.ones(shape=[1, 5])
keep_prob = 0.8
random_tensor = keep_prob
random_tensor += tf.random_uniform(weights.shape)
# 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
binary_tensor = tf.floor(random_tensor)
ret = tf.div(weights, keep_prob) * binary_tensor
print(ret)
Implementation with Torch and bernoulli..
def forward(self, x):
output = x # self.W.t() + self.bias
if self.training:
sample = torch.distributions.bernoulli.Bernoulli(self.keep_prob).sample(output.size())
print(sample)
return output * sample
return output
I'm new with Pytorch, I tried to write my training class, but I had this error
import numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim as optim
import tqdm
class MLPNet(nn.Module):
def __init__(self):
super(MLPNet, self).__init__()
self.first_fully_connected = nn.Linear(8*8, 100)
self.last_fully_connected = nn.Linear(100, 10)
def forward(self, x):
x = x.view(-1, 8*8) # reshape input tensor to the size (batch_size, 8*8)
x = F.sigmoid(self.first_fully_connected(x))
x = F.sigmoid(self.last_fully_connected(x))
return x
def training(mlp, X, y, epochs=1, lr=.2, batch_size=101):
# solver
# loss
solver = torch.optim.SGD(mlp.parameters(), lr=lr, momentum=0.9)
loss = nn.CrossEntropyLoss() # nn.NLLLoss()
n_batches = (len(X) + batch_size - 1) // batch_size
for epoch in tqdm.tqdm(range(epochs)):
for i in range(n_batches):
slice_ = np.s_[i::n_batches]
X_batch = Variable(torch.from_numpy(X[slice_])).float()
y_batch = Variable(torch.from_numpy(y[slice_, np.newaxis])).float()
# X_batch = Variable(torch.from_numpy(X[slice_])).long()
# y_batch = Variable(torch.from_numpy(y[slice_, np.newaxis])).long()
print(type(X_batch.data))
print(type(y_batch.data))
### BEGIN: your optim step here. do not forget to reset gradients
# Clear gradients w.r.t. parameters
solver.zero_grad()
prediction = mlp(X_batch)
# Forward pass to get output/logits
#outputs = mlp(X_batch)
# Calculate Loss: softmax --> cross entropy loss
#loss = criterion(outputs, y_batch)
loss_f = loss(prediction, y_batch)
# Getting gradients w.r.t. parameters
loss_f.backward()
# Updating parameters
solver.step()
### END
return mlp
mlp = nn.Sequential(
#### Your net here
nn.Linear(2, 64),
nn.ReLU(),
nn.Linear(64, 2)
)
model_mlp = training(mlp, X_std, y_std)
But I got this error,
I tried the change the type but still faced that error.
I tried the changed the loss function too but still that error.
RuntimeError Traceback (most recent call last)
in ()
----> 1 model_mlp = fit(mlp, X_std, y_std)
RuntimeError: Expected object of type Variable[torch.FloatTensor] but found type Variable[torch.LongTensor] for argument #1 'mat1'
I really appreciate any help you can provide.
Thank you so much
As edited in the question by #Ioannis Nasios, you had
X_batch = Variable(torch.from_numpy(X[slice_])).long()
Which means that your input tensors to your MLP were long integers, but the network requires floats. So you need to have:
X_batch = Variable(torch.from_numpy(X[slice_])).float()
And this should solve your error.