from dataset import get_strange_symbol_loader, get_strange_symbols_test_data
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(28*28, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, 15)
def forward(self,x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return F.softmax(x, dim=1)
if __name__ == '__main__':
net = Net()
train, test = get_strange_symbol_loader(batch_size=128)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=1e-3)
Accuracy = []
for epoch in range(30):
print("epoch",epoch)
#Train
for data in train:
img, label = data
net.zero_grad()
output = net(img.view(-1,28*28))
loss = F.nll_loss(output, label)
loss.backward()
optimizer.step()
#Test
correct, total = 0, 0
with torch.no_grad():
for data in test:
img, label = data
output = net(img.view(-1,784))
for idx, i in enumerate(output):
if torch.argmax(i) == label[idx]:
correct += 1
total += 1
Accuracy.append(round(correct/total, 3))
print("Accuracy: ",Accuracy)
Here is my neural network made with PyTorch based on the one by Sentdex. I'm using a dataset given to me by my university course administrators imported by the function get_strange_symbol_loader(batch_size=128).
When I run this code it tells me that accuracy in every epoch is supposed to be 1.0. However, running the #Test block after the iteration of the for loop containing epoch gives somewhat more realistic results. Why does this happen?
My goal here is to plot testing accuracy against the number of epochs to find the optimal number of epochs for the model before it starts to overfit.
You are incrementing both correct and total in the block
if torch.argmax(i) == label[idx]:
correct += 1
total += 1
therefore both have always the same value and one divided by the other gives 1.0
Check your intends, I think removing a tab from total +=1 should do it.
EDIT: I assume that by "after running the #test block after..." you mean you run another snippet which might be different (is correctly intended perhaps)
Related
I'm currently working on a project using Pytorch. I want to evaluate the accuracy of a neural network but it seems it does not increase when the test is running. The output I get is:
As you can see, I print the accuracy of every epoch always getting the same number.
Here you are the code of my classifier:
class Classifier(torch.nn.Module):
def __init__(self):
super().__init__()
self.layer1 = torch.nn.Linear(in_features=6, out_features=2, bias=True)
self.layer2 = torch.nn.Linear(in_features=2, out_features=1, bias=True)
self.activation = torch.sigmoid
def forward(self, x):
x=self.activation(self.layer1(x))
x=self.activation(self.layer2(x))
return x
model=Classifier()
def setParameters(m):
if type(m) == torch.nn.Linear:
torch.nn.init.uniform_(m.weight.data, -0.3, 0.3)
torch.nn.init.constant_(m.bias.data, 1)
model.apply(setParameters)
model.layer1.bias.requires_grad = False
model.layer2.bias.requires_grad = False
The code I use to train the network is the following:
from google.colab import drive
import torch
import random
drive.mount('/content/drive')
%cd drive/MyDrive/deeplearning/ass1/data
numbers = []
results = []
with open('data.txt') as f:
lines = f.readlines()
random.shuffle(lines)
for line in lines:
digitsOfNumber = [int(x) for x in str(line[0:6])]
resultInteger = int(line[7:8])
numbers.append(digitsOfNumber)
results.append(resultInteger)
numbersTensor = torch.Tensor(numbers)
resultsTensor = torch.tensor(results)
dataset = torch.utils.data.TensorDataset(numbersTensor, resultsTensor)
trainsetSize = int((80/100) * len(dataset))
trainset, testset = torch.utils.data.random_split(dataset, [trainsetSize, len(dataset) - trainsetSize])
print(len(trainset), len(testset))
testloader = torch.utils.data.DataLoader(testset, batch_size=len(testset), shuffle=False)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=len(trainset), shuffle=False)
def get_accuracy(model, dataloader):
model.eval()
with torch.no_grad():
correct=0
for x, y in iter(dataloader):
out=model(x)
correct+=(torch.argmax(out, axis=1)==y).sum()
return correct/len(dataloader.dataset)
epochs=1425
losses=[]
for epoch in range(epochs):
print("Test accuracy: ", get_accuracy(model, testloader).item())
model.train()
print("Epoch: ", epoch)
for x, y in iter(trainloader):
out=model(x)
l=loss(out, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
losses.append(l.item())
print("Final accuracy: ", get_accuracy(model, testloader))
for name, param in model.named_parameters():
print(name, param)
The last part is the one I use to print out the accuracy and to train the network accordingly. How can I fix my issue?
Thank you in advance for your time and patience.
The last layer of your model produces a tensor of shape (batch size, 1), since you have set out_features = 1. I assume your dataset has more than 1 class?
When you are calculating your accuracy, torch.argmax(out, axis=1) will always give the same class index, being 0 in this case. This explains why your accuracy is constant.
I advise looking into your dataset and finding out how many classes you have, and modify your model based on that. If you have 10 classes, the last layer should have 10 output features based on how the rest of your code is set up.
I have defined the model as in the code below, and I used batch normalization merging to make 3 layers into 1 linear layer.
The first layer of the model is a linear layer and there is no bias.
The second layer of the model is a batch normalization and there is no weight and bias ( affine is false )
The third layer of the model is a linear layer.
The variables named new_weight and new_bias are the weight and bias of the newly created linear layer, respectively.
My question is: Why is the output of the following two print functions different? And where is the wrong part in the code below the batch merge comment?
import torch
import torch.nn as nn
import torch.optim as optim
learning_rate = 0.01
in_nodes = 20
internal_nodes = 8
out_nodes = 9
batch_size = 100
# model define
class M(nn.Module):
def __init__(self):
super(M, self).__init__()
self.layer1 = nn.Linear(in_nodes, internal_nodes, bias=False)
self.layer2 = nn.BatchNorm1d(internal_nodes, affine=False)
self.layer3 = nn.Linear(internal_nodes, out_nodes)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x
# optimizer and criterion
model = M()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
# training
for batch_num in range(1000):
model.train()
optimizer.zero_grad()
input = torch.randn(batch_size, in_nodes)
target = torch.ones(batch_size, out_nodes)
output = model(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()
# batch merge
divider = torch.sqrt(model.layer2.eps + model.layer2.running_var)
w_bn = torch.diag(torch.ones(internal_nodes) / divider)
new_weight = torch.mm(w_bn, model.layer1.weight)
new_weight = torch.mm(model.layer3.weight, new_weight)
b_bn = - model.layer2.running_mean / divider
new_bias = model.layer3.bias + torch.squeeze(torch.mm(model.layer3.weight, b_bn.reshape(-1, 1)))
input = torch.randn(batch_size, in_nodes)
print(model(input))
print(torch.t(torch.mm(new_weight, torch.t(input))) + new_bias)
Short Answer: As far as I can tell you need a model.eval() before the line
input = torch.randn(batch_size, in_nodes)
such that the end looks like this
...
model.eval()
input = torch.randn(batch_size, in_nodes)
test_input = torch.ones(batch_size,internal_nodes)/100
print(model(input))
print(torch.t(torch.mm(new_weight, torch.t(input))) + new_bias)
with that (I tested it) the two print-statements should output the same. It fixed the weights.
Long Answer:
When using Batch-Normalization according to PyTorch documentation a default momentum of 0.1 is used to compute the running_mean and running_var. The momentum defines how much the estimated statistics and how much the new observed value influence the value.
Now when you don't set a model.eval() statement the batch_normalization computes an updated running_mean and running_var due to the momentum in line
print(model(input))
For further details and or confirmation: Related Question, PyTorch-Documentation
I am new to Pytorch. I was trying to model a binary classifier on the Kepler dataset. The following was my dataset class.
class KeplerDataset(Dataset):
def __init__(self, test=False):
self.dataframe_orig = pd.read_csv(koi_cumm_path)
if (test == False):
self.data = df_numeric[( df_numeric.koi_disposition == 1 ) | ( df_numeric.koi_disposition == 0 )].values
else:
self.data = df_numeric[~(( df_numeric.koi_disposition == 1 ) | ( df_numeric.koi_disposition == 0 ))].values
self.X_data = torch.FloatTensor(self.data[:, 1:])
self.y_data = torch.FloatTensor(self.data[:, 0])
def __len__(self):
return len(self.data)
def __getitem__(self, index):
return self.X_data[index], self.y_data[index]
Here, I created a custom classifier class with one hidden layer and a single output unit that produces sigmoidal probability of being in class 1 (planet).
class KOIClassifier(nn.Module):
def __init__(self, input_dim, out_dim):
super(KOIClassifier, self).__init__()
self.linear1 = nn.Linear(input_dim, 32)
self.linear2 = nn.Linear(32, 32)
self.linear3 = nn.Linear(32, out_dim)
def forward(self, xb):
out = self.linear1(xb)
out = F.relu(out)
out = self.linear2(out)
out = F.relu(out)
out = self.linear3(out)
out = torch.sigmoid(out)
return out
I then created a train_model function to optimize the loss using SGD.
def train_model(X, y):
criterion = nn.BCELoss()
optim = torch.optim.SGD(model.parameters(), lr=0.001)
n_epochs = 100
losses = []
for epoch in range(n_epochs):
y_pred = model.forward(X)
loss = criterion(y_pred, y)
losses.append(loss.item())
optim.zero_grad()
loss.backward()
optim.step()
losses = []
for X, y in train_loader:
losses.append(train_model(X, y))
But after performing the optimization over the train_loader, When I try predicting on the trainn_loader itself, the prediction values are so much worse.
for features, y in train_loader:
y_pred = model.predict(features)
break
y_pred
> tensor([[4.5436e-02],
[1.5024e-02],
[2.2579e-01],
[4.2279e-01],
[6.0811e-02],
.....
Why is my model not working properly? Is it the problem with the dataset or am I doing something wrong with implementing the Neural net? I will link my Kaggle notebook because more context might be helpful. Please help.
You are optimizing many times (100 steps) on the first batch (first samples), then moving to the next samples. It means that your model will overfit your few samples before going to the next batch. Then, your training will be very non smooth, diverge and go far from your global optimum.
Usually, in a training loop you should:
go over all samples (this is one epoch)
shuffle your dataset in order to visit your samples in a different order (set your pytorch training loader accordingly)
go back to 1. until you reach the max number of epochs
Also you should not define your optimizer each time (nor your criterion).
Your training loop should look like this:
criterion = nn.BCELoss()
optim = torch.optim.SGD(model.parameters(), lr=0.001)
n_epochs = 100
def train_model():
for X, y in train_loader:
optim.zero_grad()
y_pred = model.forward(X)
loss = criterion(y_pred, y)
loss.backward()
optim.step()
for epoch in range(n_epochs):
train_model()
I'm new to PyTorch and deep learning generally.
The code I wrote can be seen longer down.
I'm trying to learn the simple 'And' problem, which is linearby separable.
The problem is, that I'm getting poor results. Only around 2/10 times it gets to the correct answer.
Sometimes the loss.item() values is stuck at 0.250.
Just to clear up
Why does it only work 2/10 times?
.
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autog
data_x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
data_y = np.array([[0, 0, 0, 1]]).T
data_x = autog.Variable(torch.FloatTensor(data_x))
data_y = autog.Variable(torch.FloatTensor(data_y), requires_grad=False)
in_dim = 2
out_dim = 1
epochs = 15000
epoch_print = epochs / 5
l_rate = 0.001
class NeuralNet(nn.Module):
def __init__(self, input_size, output_size):
super(NeuralNet, self).__init__()
self.lin1 = nn.Linear(input_size, output_size)
self.relu = nn.ReLU()
def forward(self, x):
out = x
out = self.lin1(out)
out = self.relu(out)
return out
model = NeuralNet(in_dim, out_dim)
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=l_rate)
for epoch in range(epochs):
pred = model(data_x)
loss = criterion(pred, data_y)
loss.backward()
optimizer.step()
if (epoch + 1) % epoch_print == 0:
print("Epoch %d Loss %.3f" %(epoch + 1, loss.item()))
for x, y in zip(data_x, data_y):
pred = model(x)
print("Input", list(map(int, x)), "Pred", int(pred), "Output", int(y))
1. Using zero_grad with optimizer
You are not using optimizer.zero_grad() to clear the gradient. Your learning loop should look like this:
for epoch in range(epochs):
optimizer.zero_grad()
pred = model(data_x)
loss = criterion(pred, data_y)
loss.backward()
optimizer.step()
if (epoch + 1) % epoch_print == 0:
print("Epoch %d Loss %.3f" %(epoch + 1, loss.item()))
In this particular case it will not have any detrimental effect, the gradient is accumulating, but as you have the same dataset looped over and over it makes barely any difference (you should get into this habit though, as you will use it throughout your deep learning journey).
2. Cost Function
You are using Mean Absolute Error which is regression loss function, not a classification one (what you do is binary classification).
Accordingly, you should use BCELoss and sigmoid activation or (I prefer it that way), return logits from the network and use BCEWithLogitsLoss, both of them calculate binary cross entropy (simplified version of cross-entropy).
See below:
class NeuralNet(nn.Module):
def __init__(self, input_size, output_size):
super(NeuralNet, self).__init__()
self.lin1 = nn.Linear(input_size, output_size)
def forward(self, x):
# You may want to use torch.nn.functional.sigmoid activation
return self.lin1(x)
...
# Change your criterion to nn.BCELoss() if using sigmoid
criterion = nn.BCEWithLogitsLoss()
...
3. Predictions
If you used the logits version, classifier learns to assign negative values to 0 label and positive to indicate 1. Your display function has to be modified to incorporate this knowledge:
for x, y in zip(data_x, data_y):
pred = model(x)
# See int(pred > 0), that's the only change
print("Input", list(map(int, x)), "Pred", int(pred > 0), "Output", int(y))
This step does not apply if your forward applies sigmoid to the output. Oh, and it's better to use torch.round instead of casting to int.
I am trying to solve the very simple non-linear problem. It is XOR gate.
I my school knowledge. XOR can be solve by using 2 input nodes, 2 hidden layer nodes. And 1 output. It is binary classification problem.
I generate the 1000 of random integer number it is 0 or 1 and then do backpropagation. But for some unknown reason my network has not learned anything. The training accuracy is constant at 50.
# coding: utf-8
import matplotlib
import torch
import torch.nn as nn
from torch.autograd import Variable
matplotlib.use('TkAgg') # My buggy OSX 10.13.6 requires this
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
from tqdm import tqdm
import random
N = 1000
batch_size = 10
epochs = 40
hidden_size = 2
output_size = 1
lr = 0.1
def return_xor(N):
tmp_x = []
tmp_y = []
for i in range(N):
a = (random.randint(0, 1) == 1)
b = (random.randint(0, 1) == 1)
if (a and not b) or (not a and b):
q = True
else:
q = False
input_features = (a, b)
output_class = q
tmp_x.append(input_features)
tmp_y.append(output_class)
return tmp_x, tmp_y
# In[495]:
# Training set
x, y = return_xor(N)
x = torch.tensor(x, dtype=torch.float, requires_grad=True)
y = torch.tensor(y, dtype=torch.float, requires_grad=True)
# Test dataset
x_test, y_test = return_xor(100)
x_test = torch.tensor(x_test)
y_test = torch.tensor(y_test)
class MyDataset(Dataset):
"""Define my own `Dataset` in order to use `Variable` with `autograd`"""
def __init__(self, x, y):
self.x = x
self.y = y
def __getitem__(self, index):
return self.x[index], self.y[index]
def __len__(self):
return len(self.x)
dataset = MyDataset(x, y)
test_dataset = MyDataset(x_test, y_test)
print(dataset.x.shape)
print(dataset.y.shape)
# Make data iterable by loading to a loader. Shuffle, batch_size kwargs put them here in order to remind I myself
train_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
print(f"They are {len(train_loader)} batches in the dataset")
shown = 0
for (x, y) in train_loader:
if shown == 1:
break
print(f"{x.shape} {x.dtype}")
print(f"{y.shape} {y.dtype}")
shown += 1
class MyModel(nn.Module):
"""
Binary classification
2 input nodes
2 hidden nodes
1 output node
"""
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.fc1 = torch.nn.Linear(input_size, hidden_size)
self.fc2 = torch.nn.Linear(hidden_size, output_size)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, out):
out = self.fc1(out)
out = self.fc2(out)
out = self.sigmoid(out)
return out
# Create my network
net = MyModel(dataset.x.shape[1], hidden_size, output_size)
CUDA = torch.cuda.is_available()
if CUDA:
net = net.cuda()
criterion = torch.nn.BCELoss(reduction='elementwise_mean')
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
# Train the network
correct_train = 0
total_train = 0
for epoch in range(epochs):
for i, (batches, labels) in enumerate(train_loader):
batcesh = Variable(batches.float())
labels = Variable(labels.float())
output = net(batches) # Forward pass
optimizer.zero_grad()
loss = criterion(output, labels.view(10, 1))
loss.backward()
optimizer.step()
total_train += labels.size(0)
correct_train += (predicted == labels.long()).sum()
if (i + 1) % 10 == 0:
print(f"""
Epoch {epoch+1}/{epochs},
Iteration {i+1}/{len(dataset)//batch_size},
Training Loss: {loss.item()},
Training Accuracy: {100*correct_train/total_train}
""")
Solution:
I did initialized weight, Adaptive learning rate
https://github.com/elcolie/nnbootcamp/blob/master/Study-XOR.ipynb
I am not sure what results you are getting, as the code you have posted in the question doesn't work (It gives errors with pytorch 0.4.1 like predicted not defined etc). But syntax issues apart, there are other problems.
Your model is not actually two layer as it does not use non-linearity after the first output. Effectively this is one layer network and to fix that you can modify your model's forward as follows:
def forward(self, out):
out = torch.nn.functional.relu(self.fc1(out))
out = self.fc2(out)
out = self.sigmoid(out)
return out
You can try sigmoid or tanh non-linearity as well... but the non-linearity is a must. This should fix the problem.
I also see that you are using only 2 hidden units. This might be restrictive and you might want to increase that to something like 5 or 10.