Optimizing with BCE is not working, nothing will change - python

I have the following code:
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import matplotlib.pyplot as plt
import os
import keras
from random import choice
import sys
devicet = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(devicet)
if devicet == 'cpu':
print ('Using CPU')
else:
print ('Using GPU')
cuda0 = torch.device('cuda:0')
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.step1 = nn.Linear(5, 25)
self.step2 = nn.Linear(25, 50)
self.step3 = nn.Linear(50, 100)
self.step4 = nn.Linear(100, 100)
self.step5 = nn.Linear(100, 10)
self.step6 = nn.Linear(10, 1)
def forward(self, x):
x = F.relu(x)
x = self.step1(x)
x = F.relu(x)
x = self.step2(x)
x = F.relu(x)
x = self.step3(x)
x = F.relu(x)
x = self.step4(x)
x = F.relu(x)
x = self.step5(x)
x = F.relu(x)
x = self.step6(x)
x = F.relu(x)
return (x)
net = Net()
x = torch.rand(10,5)
num = choice(range(10))
zero_tensor = torch.zeros(num, 1)
one_tensor = torch.ones(10-num, 1)
y = torch.cat((zero_tensor,one_tensor),0)
x.to(devicet)
y.to(devicet)
learning_rate = 1e-3
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
loss_fn = torch.nn.BCELoss()
acc_list = []
for i in tqdm(range(1000),desc='Training'):
y_pred = net(x)
loss = loss_fn(y_pred, y)
loss.backward()
optimizer.step()
acc_list.append(abs(net(x).detach().numpy()[0]-y.detach().numpy()[0]))
with torch.no_grad():
for param in net.parameters():
param -= learning_rate * param.grad
optimizer.zero_grad()
print ('\nFinished training in {} epochs.'.format(len(acc_list)))
plt.plot(range(len(acc_list)),acc_list)
plt.show()
for i in range(10):
print (str(net(x).detach().numpy()[i][0])+', '+str(y.detach().numpy()[i][0]))
When I run this, it consistently just prints out the following:
Image
Why won't it do any training? It works if I use MSE loss (actually, it only works sometimes with MSE loss, sometimes it does the same thing as in the image) , it's only when I use BCE that it stops working entirely.

Final layer activation
You are outputting only positive values, those should be between 0 and 1 for starters, these line specifically:
x = F.relu(x)
return (x)
Use torch.sigmoid with BCELoss or even better, just output x and use torch.nn.BCEWithLogitsLoss which uses logits directly
Training
You are using Adam optimizer and doing SGD manually here:
with torch.no_grad():
for param in net.parameters():
param -= learning_rate * param.grad
Essentially you are applying optimization step twice which is probably too much and might destroy the weights.
optimizer.step() already does this, no need for both!
Accuracy
This part:
abs(net(x).detach().numpy()[0]-y.detach().numpy()[0])
I assume you want to calculate accuracy, it would go like this (also do not push data through the network twice via net(x), you already have y_pred!):
# Assuming sigmoid activation
def accuracy(y_pred, y_true):
# For logits use
# predicted_labels = y_pred > 0.0
predicted_labels = y_pred > 0.5
return torch.mean((y_true == predicted_labels).float())

Related

How to fit a small dataset with a perceptron model using Pytorch

I'm trying to fit a small dataset(just 7x1 size) with a 3-layer perceptron model, but the loss can't converge. I'm fresh to machine learning area, can someone please give me a hint to adjust my code?
import torch
import torch.nn as nn
import torch.nn.functional as F
vec_shape = [7, 1]
x_0 = [500, 1000, 2000, 4000, 5000, 8000, 10000]
y_0 = [1.171467, 1.486507, 11.7738, 34.448421, 75.402871, 225.319848, 492.262426]
# x = torch.tensor(x_0).reshape(vec_shape).float()
x = torch.log(torch.tensor(x_0).reshape(vec_shape).float())
y = torch.tensor(y_0).reshape(vec_shape).float()
class Net(nn.Module):
def __init__(self,n_input,n_hidden,n_output):
super(Net,self).__init__()
self.hidden1 = nn.Linear(n_input,n_hidden)
self.hidden2 = nn.Linear(n_hidden,n_hidden)
self.predict = nn.Linear(n_hidden,n_output)
def forward(self,input):
out = self.hidden1(input)
out = F.relu(out)
out = self.hidden2(out)
out = torch.sigmoid(out)
out =self.predict(out)
return out
def weight_init(self):
for op in self.modules():
if isinstance(op, nn.Linear):
nn.init.normal_(op.weight.data)
nn.init.normal_(op.bias.data)
net = Net(1,10,1)
net.weight_init()
# print(net)
optimizer = torch.optim.SGD(net.parameters(),lr = 0.1)
loss_func = torch.nn.MSELoss()
for t in range(500):
prediction = net(x)
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if(t%50 == 0):
print('Loss = %.4f' % loss.data)
I tried to expand the model or shrink it, but both changes don't work.
Rescaling and normalization is key in machine learning, your setup is pretty good and you apply some rescaling but simply not enough. With the limited amount of datapoints you have, the range is way too large. So just like you do with x_0, apply torch.log to y_0. You can always scale back the predictions after training. Below you can find the adapted code, I changed two things:
torch.log on y_0
Learning rate to 0.01
Number of iterations to 50000
Added a print statement to show rescaling of predictions
import torch
import torch.nn as nn
import torch.nn.functional as F
vec_shape = [7, 1]
x_0 = [500, 1000, 2000, 4000, 5000, 8000, 10000]
y_0 = [1.171467, 1.486507, 11.7738, 34.448421, 75.402871, 225.319848, 492.262426]
# x = torch.tensor(x_0).reshape(vec_shape).float()
x = torch.log(torch.tensor(x_0).reshape(vec_shape).float())
y = torch.log(torch.tensor(y_0).reshape(vec_shape).float()) # modified
class Net(nn.Module):
def __init__(self,n_input,n_hidden,n_output):
super(Net,self).__init__()
self.hidden1 = nn.Linear(n_input,n_hidden)
self.hidden2 = nn.Linear(n_hidden,n_hidden)
self.predict = nn.Linear(n_hidden,n_output)
def forward(self,input):
out = self.hidden1(input)
out = F.relu(out)
out = self.hidden2(out)
out = torch.sigmoid(out)
out =self.predict(out)
return out
def weight_init(self):
for op in self.modules():
if isinstance(op, nn.Linear):
nn.init.normal_(op.weight.data)
nn.init.normal_(op.bias.data)
net = Net(1,10,1)
net.weight_init()
# print(net)
optimizer = torch.optim.SGD(net.parameters(),lr = 0.01) # modified
loss_func = torch.nn.MSELoss()
for t in range(50000): # modified
prediction = net(x)
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if(t%50 == 0):
print('Loss = %.4f' % loss.data)
print(torch.exp(net(x))) # added
I also recommend normalizing your dataset after the logarithmic rescaling, for instance by dividing by the standard deviation and subtracting the mean.

Pytorch simple model not improving

I am making a simple PyTorch neural net to approximate the sine function on x = [0, 2pi]. This is a simple architecture I use with different deep learning libraries to test whether I understand how to use it or not. The neural net, when untrained, always produces a straight horizontal line, and when trained, produces a straight line at y = 0. In general, it always produces a straight line at y = (The mean of the function). This leads me to believe something is wrong with the forward prop portion of it, as the boundary should not just be a straight line when untrained. Here is the code for the net:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.model = nn.Sequential(
nn.Linear(1, 20),
nn.Sigmoid(),
nn.Linear(20, 50),
nn.Sigmoid(),
nn.Linear(50, 50),
nn.Sigmoid(),
nn.Linear(50, 1)
)
def forward(self, x):
x = self.model(x)
return x
Here is the training loop
def train(net, trainloader, valloader, learningrate, n_epochs):
net = net.train()
loss = nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr = learningrate)
for epoch in range(n_epochs):
for X, y in trainloader:
X = X.reshape(-1, 1)
y = y.view(-1, 1)
optimizer.zero_grad()
outputs = net(X)
error = loss(outputs, y)
error.backward()
#net.parameters() net.parameters() * learningrate
optimizer.step()
total_loss = 0
for X, y in valloader:
X = X.reshape(-1, 1).float()
y = y.view(-1, 1)
outputs = net(X)
error = loss(outputs, y)
total_loss += error.data
print('Val loss for epoch', epoch, 'is', total_loss / len(valloader) )
it is called as:
net = Net()
losslist = train(net, trainloader, valloader, .0001, n_epochs = 4)
Where trainloader and valloader are the training and validation loaders. Can anyone help me see what's wrong with this? I know its not the learning rate since its the one I use in other frameworks, and I know its not the fact im using SGD or sigmoid activation functions, although I have a suspicion the error is in the activation functions somewhere.
Does anyone know how to fix this? Thanks.
After a while playing with some hyperparameters, modifying the net and changing the optimizer (following this excellent recipe) I ended up with changing the line optimizer = torch.optim.SGD(net.parameters(), lr = learningrate) to optimizer = torch.optim.Adam(net.parameters()) (the default optimizer parameters was used), running for 100 epochs and batch size equal to 1.
The following code was used (tested on CPU only):
import torch
import torch.nn as nn
from torch.utils import data
import numpy as np
import matplotlib.pyplot as plt
# for reproducibility
torch.manual_seed(0)
np.random.seed(0)
class Dataset(data.Dataset):
def __init__(self, init, end, n):
self.n = n
self.x = np.random.rand(self.n, 1) * (end - init) + init
self.y = np.sin(self.x)
def __len__(self):
return self.n
def __getitem__(self, idx):
x = self.x[idx, np.newaxis]
y = self.y[idx, np.newaxis]
return torch.Tensor(x), torch.Tensor(y)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.model = nn.Sequential(
nn.Linear(1, 20),
nn.Sigmoid(),
nn.Linear(20, 50),
nn.Sigmoid(),
nn.Linear(50, 50),
nn.Sigmoid(),
nn.Linear(50, 1)
)
def forward(self, x):
x = self.model(x)
return x
def train(net, trainloader, valloader, n_epochs):
loss = nn.MSELoss()
# Switch the two following lines and run the code
# optimizer = torch.optim.SGD(net.parameters(), lr = 0.0001)
optimizer = torch.optim.Adam(net.parameters())
for epoch in range(n_epochs):
net.train()
for x, y in trainloader:
optimizer.zero_grad()
outputs = net(x).view(-1)
error = loss(outputs, y)
error.backward()
optimizer.step()
net.eval()
total_loss = 0
for x, y in valloader:
outputs = net(x)
error = loss(outputs, y)
total_loss += error.data
print('Val loss for epoch', epoch, 'is', total_loss / len(valloader) )
net.eval()
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
def plot_result(ax, dataloader):
out, xx, yy = [], [], []
for x, y in dataloader:
out.append(net(x))
xx.append(x)
yy.append(y)
out = torch.cat(out, dim=0).detach().numpy().reshape(-1)
xx = torch.cat(xx, dim=0).numpy().reshape(-1)
yy = torch.cat(yy, dim=0).numpy().reshape(-1)
ax.scatter(xx, yy, facecolor='green')
ax.scatter(xx, out, facecolor='red')
xx = np.linspace(0.0, 3.14159*2, 1000)
ax.plot(xx, np.sin(xx), color='green')
plot_result(ax1, trainloader)
plot_result(ax2, valloader)
plt.show()
train_dataset = Dataset(0.0, 3.14159*2, 100)
val_dataset = Dataset(0.0, 3.14159*2, 30)
params = {'batch_size': 1,
'shuffle': True,
'num_workers': 4}
trainloader = data.DataLoader(train_dataset, **params)
valloader = data.DataLoader(val_dataset, **params)
net = Net()
losslist = train(net, trainloader, valloader, n_epochs = 100)
Result with Adam optimizer:
Result with SGD optimizer:
In general, it always produces a straight line at y = (The mean of the function).
Usually, this means that the NN has only successfully trained the final layer so far. You need to train it for longer or with better optimizations, as ViniciusArruda shows here.
Edit: To explain further.. When only the final layer has been trained, the NN is effectively trying to guess the output y with no knowledge of the input X. In this case, the best guess it can make is the mean value. That way, it can minimize its MSE loss.

What is the proper way to implement an inverse dropout layer in keras? [duplicate]

This code attempts to utilize a custom implementation of dropout :
%reset -f
import torch
import torch.nn as nn
# import torchvision
# import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torch.utils.data as data_utils
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
num_epochs = 1000
number_samples = 10
from sklearn.datasets import make_moons
from matplotlib import pyplot
from pandas import DataFrame
# generate 2d classification dataset
X, y = make_moons(n_samples=number_samples, noise=0.1)
# scatter plot, dots colored by class value
x_data = [a for a in enumerate(X)]
x_data_train = x_data[:int(len(x_data) * .5)]
x_data_train = [i[1] for i in x_data_train]
x_data_train
y_data = [y[i[0]] for i in x_data]
y_data_train = y_data[:int(len(y_data) * .5)]
y_data_train
x_test = [a[1] for a in x_data[::-1][:int(len(x_data) * .5)]]
y_test = [a for a in y_data[::-1][:int(len(y_data) * .5)]]
x = torch.tensor(x_data_train).float() # <2>
print(x)
y = torch.tensor(y_data_train).long()
print(y)
x_test = torch.tensor(x_test).float()
print(x_test)
y_test = torch.tensor(y_test).long()
print(y_test)
class Dropout(nn.Module):
def __init__(self, p=0.5, inplace=False):
# print(p)
super(Dropout, self).__init__()
if p < 0 or p > 1:
raise ValueError("dropout probability has to be between 0 and 1, "
"but got {}".format(p))
self.p = p
self.inplace = inplace
def forward(self, input):
print(list(input.shape))
return np.random.binomial([np.ones((len(input),np.array(list(input.shape))))],1-dropout_percent)[0] * (1.0/(1-self.p))
def __repr__(self):
inplace_str = ', inplace' if self.inplace else ''
return self.__class__.__name__ + '(' \
+ 'p=' + str(self.p) \
+ inplace_str + ')'
class MyLinear(nn.Linear):
def __init__(self, in_feats, out_feats, drop_p, bias=True):
super(MyLinear, self).__init__(in_feats, out_feats, bias=bias)
self.custom_dropout = Dropout(p=drop_p)
def forward(self, input):
dropout_value = self.custom_dropout(self.weight)
return F.linear(input, dropout_value, self.bias)
my_train = data_utils.TensorDataset(x, y)
train_loader = data_utils.DataLoader(my_train, batch_size=2, shuffle=True)
my_test = data_utils.TensorDataset(x_test, y_test)
test_loader = data_utils.DataLoader(my_train, batch_size=2, shuffle=True)
# Device configuration
device = 'cpu'
print(device)
# Hyper-parameters
input_size = 2
hidden_size = 100
num_classes = 2
learning_rate = 0.0001
pred = []
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes, p):
super(NeuralNet, self).__init__()
# self.drop_layer = nn.Dropout(p=p)
# self.drop_layer = MyLinear()
# self.fc1 = MyLinear(input_size, hidden_size, p)
self.fc1 = MyLinear(input_size, hidden_size , p)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
# out = self.drop_layer(x)
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
model = NeuralNet(input_size, hidden_size, num_classes, p=0.9).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
images = images.reshape(-1, 2).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
Custom dropout is implemented as :
class Dropout(nn.Module):
def __init__(self, p=0.5, inplace=False):
# print(p)
super(Dropout, self).__init__()
if p < 0 or p > 1:
raise ValueError("dropout probability has to be between 0 and 1, "
"but got {}".format(p))
self.p = p
self.inplace = inplace
def forward(self, input):
print(list(input.shape))
return np.random.binomial([np.ones((len(input),np.array(list(input.shape))))],1-dropout_percent)[0] * (1.0/(1-self.p))
def __repr__(self):
inplace_str = ', inplace' if self.inplace else ''
return self.__class__.__name__ + '(' \
+ 'p=' + str(self.p) \
+ inplace_str + ')'
class MyLinear(nn.Linear):
def __init__(self, in_feats, out_feats, drop_p, bias=True):
super(MyLinear, self).__init__(in_feats, out_feats, bias=bias)
self.custom_dropout = Dropout(p=drop_p)
def forward(self, input):
dropout_value = self.custom_dropout(self.weight)
return F.linear(input, dropout_value, self.bias)
It seems I've implemented the dropout function incorrectly ? :
np.random.binomial([np.ones((len(input),np.array(list(input.shape))))],1-dropout_percent)[0] * (1.0/(1-self.p))
How to modify in order to correctly utilize dropout ?
These posts were useful in getting to this point :
Hinton's Dropout in 3 Lines of Python :
https://iamtrask.github.io/2015/07/28/dropout/
Making a Custom Dropout Function : https://discuss.pytorch.org/t/making-a-custom-dropout-function/14053/2
It seems I've implemented the dropout function incorrectly?
np.random.binomial([np.ones((len(input),np.array(list(input.shape))))],1 dropout_percent)[0] * (1.0/(1-self.p))
In fact, the above implementation is known as Inverted Dropout. Inverted Dropout is how Dropout is implemented in practice in the various deep learning frameworks.
What is inverted dropout?
Before jump into the inverted dropout, it can be helpful to see how Dropout works for a single neuron:
Since during train phase a neuron is kept on with probability q (=1-p), during the testing phase we have to emulate the behavior of the ensemble of networks used in the training phase. To this end, the authors suggest scaling the activation function by a factor of q during the test phase in order to use the expected output produced in the training phase as the single output required in the test phase (Section 10, Multiplicative Gaussian Noise). Thus:
Inverted dropout is a bit different. This approach consists in the scaling of the activations during the training phase, leaving the test phase untouched. The scale factor is the inverse of the keep probability 1/1-p = 1/q, thus:
Inverted dropout helps to define the model once and just change a parameter (the keep/drop probability) to run train and test on the same model. Direct Dropout, instead, force you to modify the network during the test phase because if you don’t multiply by q the output the neuron will produce values that are higher respect to the one expected by the successive neurons (thus the following neurons can saturate or explode): that’s why Inverted Dropout is the more common implementation.
References:
Dropout Regularization, coursera by Andrew NG
What is inverted dropout?
Dropout: scaling the activation versus inverting the dropout
Analysis of Dropout
How implement inverted dropout Pytorch?
class MyDropout(nn.Module):
def __init__(self, p: float = 0.5):
super(MyDropout, self).__init__()
if p < 0 or p > 1:
raise ValueError("dropout probability has to be between 0 and 1, " "but got {}".format(p))
self.p = p
def forward(self, X):
if self.training:
binomial = torch.distributions.binomial.Binomial(probs=1-self.p)
return X * binomial.sample(X.size()) * (1.0/(1-self.p))
return X
How to implement in Numpy?
import numpy as np
pKeep = 0.8
weights = np.ones([1, 5])
binary_value = np.random.rand(weights.shape[0], weights.shape[1]) < pKeep
res = np.multiply(weights, binary_value)
res /= pKeep # this line is called inverted dropout technique
print(res)
How to implement in Tensorflow?
import tensorflow as tf
tf.enable_eager_execution()
weights = tf.ones(shape=[1, 5])
keep_prob = 0.8
random_tensor = keep_prob
random_tensor += tf.random_uniform(weights.shape)
# 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
binary_tensor = tf.floor(random_tensor)
ret = tf.div(weights, keep_prob) * binary_tensor
print(ret)
Implementation with Torch and bernoulli..
def forward(self, x):
output = x # self.W.t() + self.bias
if self.training:
sample = torch.distributions.bernoulli.Bernoulli(self.keep_prob).sample(output.size())
print(sample)
return output * sample
return output

pytorch Crossentropy error in simple example of NN

H1, I am try to make NN model that satisfy simple formula.
y = X1^2 + X2^2
But when i use CrossEntropyLoss for loss function, i get two different error message.
First, when i set code like this
x = torch.randn(batch_size, 2)
y_hat = model(x)
y = answer(x).long()
optimizer.zero_grad()
loss = loss_func(y_hat, y)
loss.backward()
optimizer.step()
i get this message
RuntimeError: Assertion `cur_target >= 0 && cur_target < n_classes' failed. at
c:\programdata\miniconda3\conda-bld\pytorch_1533090623466\work\aten\src\thnn\generic/Cl
assNLLCriterion.c:93
Second, I change code like this
x = torch.randn(batch_size, 2)
y_hat = model(x)
y = answer(x).long().view(batch_size,1,1)
optimizer.zero_grad()
loss = loss_func(y_hat, y)
loss.backward()
optimizer.step()
then i get message like
RuntimeError: multi-target not supported at c:\programdata\miniconda3\conda-bld\pytorch_1533090623466\work\aten\src\thnn\generic/ClassNLLCriterion.c:21
How can i solve this problem? Thanks.(sorry for my English)
This is my code
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
def answer(x):
y = x[:,0].pow(2) + x[:,1].pow(2)
return y
class Model(nn.Module):
def __init__(self, input_size, output_size):
super(Model, self).__init__()
self.linear1 = nn.Linear(input_size, 10)
self.linear2 = nn.Linear(10, 1)
def forward(self, x):
y = F.relu(self.linear1(x))
y = F.relu(self.linear2(y))
return y
model = Model(2,1)
print(model, '\n')
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001)
batch_size = 3
epoch_n = 100
iter_n = 100
for epoch in range(epoch_n):
loss_avg = 0
for i in range(iter_n):
x = torch.randn(batch_size, 2)
y_hat = model(x)
y = answer(x).long().view(batch_size,1,1)
optimizer.zero_grad()
loss = loss_func(y_hat, y)
loss.backward()
optimizer.step()
loss_avg += loss
loss_avg = loss_avg / iter_n
if epoch % 10 == 0:
print(loss_avg)
if loss_avg < 0.001:
break
Can i make those dataset using dataloader in pytorch? Thanks for your help.
You are using the wrong loss function. CrossEntropyLoss is used for classification problems generally wheread your problem is that of regression. So you should use losses which are meant for regression like tasks like Mean Squared Error Loss, L1 Loss etc. Take a look at this, this, this and this.

XOR neural network does not learn

I am trying to solve the very simple non-linear problem. It is XOR gate.
I my school knowledge. XOR can be solve by using 2 input nodes, 2 hidden layer nodes. And 1 output. It is binary classification problem.
I generate the 1000 of random integer number it is 0 or 1 and then do backpropagation. But for some unknown reason my network has not learned anything. The training accuracy is constant at 50.
# coding: utf-8
import matplotlib
import torch
import torch.nn as nn
from torch.autograd import Variable
matplotlib.use('TkAgg') # My buggy OSX 10.13.6 requires this
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
from tqdm import tqdm
import random
N = 1000
batch_size = 10
epochs = 40
hidden_size = 2
output_size = 1
lr = 0.1
def return_xor(N):
tmp_x = []
tmp_y = []
for i in range(N):
a = (random.randint(0, 1) == 1)
b = (random.randint(0, 1) == 1)
if (a and not b) or (not a and b):
q = True
else:
q = False
input_features = (a, b)
output_class = q
tmp_x.append(input_features)
tmp_y.append(output_class)
return tmp_x, tmp_y
# In[495]:
# Training set
x, y = return_xor(N)
x = torch.tensor(x, dtype=torch.float, requires_grad=True)
y = torch.tensor(y, dtype=torch.float, requires_grad=True)
# Test dataset
x_test, y_test = return_xor(100)
x_test = torch.tensor(x_test)
y_test = torch.tensor(y_test)
class MyDataset(Dataset):
"""Define my own `Dataset` in order to use `Variable` with `autograd`"""
def __init__(self, x, y):
self.x = x
self.y = y
def __getitem__(self, index):
return self.x[index], self.y[index]
def __len__(self):
return len(self.x)
dataset = MyDataset(x, y)
test_dataset = MyDataset(x_test, y_test)
print(dataset.x.shape)
print(dataset.y.shape)
# Make data iterable by loading to a loader. Shuffle, batch_size kwargs put them here in order to remind I myself
train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
print(f"They are {len(train_loader)} batches in the dataset")
shown = 0
for (x, y) in train_loader:
if shown == 1:
break
print(f"{x.shape} {x.dtype}")
print(f"{y.shape} {y.dtype}")
shown += 1
class MyModel(nn.Module):
"""
Binary classification
2 input nodes
2 hidden nodes
1 output node
"""
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.fc1 = torch.nn.Linear(input_size, hidden_size)
self.fc2 = torch.nn.Linear(hidden_size, output_size)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, out):
out = self.fc1(out)
out = self.fc2(out)
out = self.sigmoid(out)
return out
# Create my network
net = MyModel(dataset.x.shape[1], hidden_size, output_size)
CUDA = torch.cuda.is_available()
if CUDA:
net = net.cuda()
criterion = torch.nn.BCELoss(reduction='elementwise_mean')
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
# Train the network
correct_train = 0
total_train = 0
for epoch in range(epochs):
for i, (batches, labels) in enumerate(train_loader):
batcesh = Variable(batches.float())
labels = Variable(labels.float())
output = net(batches) # Forward pass
optimizer.zero_grad()
loss = criterion(output, labels.view(10, 1))
loss.backward()
optimizer.step()
total_train += labels.size(0)
correct_train += (predicted == labels.long()).sum()
if (i + 1) % 10 == 0:
print(f"""
Epoch {epoch+1}/{epochs},
Iteration {i+1}/{len(dataset)//batch_size},
Training Loss: {loss.item()},
Training Accuracy: {100*correct_train/total_train}
""")
Solution:
I did initialized weight, Adaptive learning rate
https://github.com/elcolie/nnbootcamp/blob/master/Study-XOR.ipynb
I am not sure what results you are getting, as the code you have posted in the question doesn't work (It gives errors with pytorch 0.4.1 like predicted not defined etc). But syntax issues apart, there are other problems.
Your model is not actually two layer as it does not use non-linearity after the first output. Effectively this is one layer network and to fix that you can modify your model's forward as follows:
def forward(self, out):
out = torch.nn.functional.relu(self.fc1(out))
out = self.fc2(out)
out = self.sigmoid(out)
return out
You can try sigmoid or tanh non-linearity as well... but the non-linearity is a must. This should fix the problem.
I also see that you are using only 2 hidden units. This might be restrictive and you might want to increase that to something like 5 or 10.

Categories

Resources