Related
I'm trying to implement word2vec with negative sampling in python almost from scratch and quite new in neural networks and faced some issues. Would be very appreciate for any help.
So, I wrote simple nn with a forward pass. I didn't get which element have to have grad_fn, I'd been getting error like 'tensor have no grad_fn' until I add requires_grad_() on the returning value. Is that correct?
dataset = Word2VecNegativeSampling(data, num_negative_samples, 30000)
dataset.generate_dataset()
wordvec_dim = 10
class Word2VecNegativeSamples(nn.Module):
def __init__(self, num_tokens):
super(Word2VecNegativeSamples, self).__init__()
self.input = nn.Linear(num_tokens, 10, bias=False)
self.output = nn.Linear(10, num_tokens, bias=False)
self.num_tokens = num_tokens
def forward(self, input_index_batch, output_indices_batch):
'''
Implements forward pass with negative sampling
Arguments:
input_index_batch - Tensor of ints, shape: (batch_size, ), indices of input words in the batch
output_indices_batch - Tensor if ints, shape: (batch_size, num_negative_samples+1),
indices of the target words for every sample
Returns:
predictions - Tensor of floats, shape: (batch_size, num_negative_samples+1)
'''
results = []
batch_size = len(input_index_batch)
for i in range(batch_size):
input_one_hot = torch.zeros(self.num_tokens)
input_one_hot[input_index_batch[i]] = 1
forward_result = self.output(self.input(input_one_hot))
results.append(torch.tensor([forward_result[out_index] for out_index in output_indices_batch[i]]))
return torch.stack(results).requires_grad_()
nn_model = Word2VecNegativeSamples(data.num_tokens())
nn_model.type(torch.FloatTensor)
After all i'm trying to train the model, but neither loss nor accuracy changing. Is the code for model prediction correct as well?
Here is training code:
def train_neg_sample(model, dataset, train_loader, optimizer, scheduler, num_epochs):
loss = nn.BCEWithLogitsLoss().type(torch.FloatTensor)
loss_history = []
train_history = []
for epoch in range(num_epochs):
model.train() # Enter train mode
dataset.generate_dataset()
loss_accum = 0
correct_samples = 0
total_samples = 0
for i_step, (inp, out, lab) in enumerate(train_loader):
prediction = model(inp, out)
loss_value = loss(prediction, lab)
optimizer.zero_grad()
loss_value.backward()
optimizer.step()
_, indices = torch.max(prediction, 1)
correct_samples += torch.sum(indices == 0)
total_samples += lab.shape[0]
loss_accum += loss_value
scheduler.step()
ave_loss = loss_accum / i_step
train_accuracy = float(correct_samples) / total_samples
loss_history.append(float(ave_loss))
train_history.append(train_accuracy)
print("Epoch#: %i, Average loss: %f, Train accuracy: %f" % (epoch, ave_loss, train_accuracy))
return loss_history, train_history
If your loss function is not changing, it's highly probable that you register the wrong set of parameters to the optimizer. Can you post the code snippet where you initialize your model and optimizer? It is supposed to look like this:
nn_model = Word2VecNegativeSamples(data.num_tokens())
optimizer = optim.SGD(nn_model.parameters(), lr=0.001, momentum=0.9)
Following my previous question , I have written this code to train an autoencoder and then extract the features.
(There might be some changes in the variable names)
# Autoencoder class
#https://medium.com/pytorch/implementing-an-autoencoder-in-pytorch-19baa22647d1
class AE_class(nn.Module):
def __init__(self, **kwargs):
super().__init__()
self.encoder_hidden_layer = nn.Linear(
in_features=kwargs["input_shape"], out_features=128
)
self.encoder_output_layer = nn.Linear(
in_features=128, out_features=128
)
self.decoder_hidden_layer = nn.Linear(
in_features=128, out_features=128
)
self.decoder_output_layer = nn.Linear(
in_features=128, out_features=kwargs["input_shape"]
)
def forward(self, features):
#print("in forward")
#print(type(features))
activation = self.encoder_hidden_layer(features)
activation = torch.relu(activation)
code = self.encoder_output_layer(activation)
code = torch.relu(code)
activation = self.decoder_hidden_layer(code)
activation = torch.relu(activation)
activation = self.decoder_output_layer(activation)
reconstructed = torch.relu(activation)
return reconstructed
def encode(self, features_h):
activation_h = self.encoder_hidden_layer(features_h)
activation_h = torch.relu(activation_h)
code_h = self.encoder_output_layer(activation_h)
code_h = torch.relu(code_h)
return code_h
And then, for training:
def retrieve_AE_features(X_before, n_voxel_region):
# use gpu if available
#https://discuss.pytorch.org/t/runtimeerror-tensor-for-out-is-on-cpu-tensor-for-argument-1-self-is-on-cpu-but-expected-them-to-be-on-gpu-while-checking-arguments-for-addmm/105453
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# create a model from `AE` autoencoder class
# load it to the specified device, either gpu or cpu
model_AE = AE_class(input_shape=n_voxel_region).to(device)
# create an optimizer object
# Adam optimizer with learning rate 1e-3
optimizer = optim.Adam(model_AE.parameters(), lr=1e-3)
# mean-squared error loss
criterion = nn.MSELoss()
X_tensor = torch.tensor(X_before, dtype=torch.float32)
print(type(X_tensor))
train_loader = torch.utils.data.DataLoader(
X_tensor, batch_size=64, shuffle=True, num_workers=2, pin_memory=True
)
test_loader = torch.utils.data.DataLoader(
X_tensor, batch_size=32, shuffle=False, num_workers=2
)
print(type(train_loader))
for epoch in range(epochs_AE):
loss = 0
for batch_features in train_loader:
# reshape mini-batch data to [N, 784] matrix
# load it to the active device
#batch_features = batch_features.view(-1, 784).to(device)
#print(batch_features.shape)
# reset the gradients back to zero
# PyTorch accumulates gradients on subsequent backward passes
optimizer.zero_grad()
# compute reconstructions
outputs = model_AE(batch_features)
# compute training reconstruction loss
train_loss = criterion(outputs, batch_features)
# compute accumulated gradients
train_loss.backward()
# perform parameter update based on current gradients
optimizer.step()
# add the mini-batch training loss to epoch loss
loss += train_loss.item()
# compute the epoch training loss
loss = loss / len(train_loader)
# display the epoch training loss
print("AE, epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs_AE, loss))
#After training
hidden_features = model_AE.encode(X_before)
return hidden_features
However, I received the following error:
Tensor for argument #2 'mat1' is on CPU, but expected it to be on GPU
(while checking arguments for addmm)
It seems some of my variables should be defined in another way to be able to be executed on GPU.
My questions:
How can I understand which variables will be executed on GPU and which ones on CPU?
How to fix it? In other words, how to define a variable executable on GPU?
Thanks in advance
I see that Your model is moved to device which is decided by this line device = torch.device("cuda" if torch.cuda.is_available() else "cpu") This can be is either cpu or cuda.
So adding this line batch_features = batch_features.to(device) will actually move your input data to device.
Since your model is moved to device , You should also move your input to the device.
Below code has that change
for epoch in range(epochs_AE):
loss = 0
for batch_features in train_loader:
batch_features = batch_features.to(device) #this will move inout to your device
optimizer.zero_grad()
outputs = model_AE(batch_features)
train_loss = criterion(outputs, batch_features)
...
coming to your question : Calling .to(device) can directly move the tensor to your specified device
And if you want it it to be hard coded then do .to('cpu') or .to('cuda') on your torch tensor
After each epoch y_pred simply keeps increasing
input at each batch is 64x10 tensor, trying to predict max of the vector at each row.
I thought the gradient might not be going to 0 between batches, but I that wasn't the case.
I tried changing LR, epoch, LSTM layers (LSTM to RNN), hidden size etc, nothing helped.
BTW, using simple sequential network of dense and relu instead of lstm worked perfectly
Following is the code:
LR = 0.0001
class LSTM(nn.Module):
def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.lstm = nn.LSTM(input_size, hidden_layer_size)
self.linear = nn.Linear(hidden_layer_size, output_size)
# self.hidden_cell = (torch.zeros(1,max_array_len,self.hidden_layer_size),
# torch.zeros(1,max_array_len,self.hidden_layer_size))
def forward(self, input_seq):
# lstm_out,self.hidden_cell = self.lstm(input_seq.view(len(input_seq),max_array_len, 1),self.hidden_cell)
lstm_out,self.hidden_cell = self.lstm(input_seq.view(len(input_seq),max_array_len, 1))
predictions = self.linear(lstm_out[:, -1,:])
return predictions
model=LSTM()
optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=0.8) # optimize all cnn parameters
loss_func = nn.MSELoss() # the target label is not one-hotted
print(model)
EPOCHS=2000
for i in range(EPOCHS):
# model.train()
for step, (seq,labels) in enumerate(train_data):
model.zero_grad()
labels=labels.view(labels.shape[0],1)
y_pred = model(seq)
loss = loss_func(y_pred.float(), labels.float())
loss.backward(retain_graph=True)
optimizer.step()
if i%10 == 0:
# print(y_pred.shape,labels.shape)
print(y_pred)
print(f'epoch: {i:3} train_loss: {loss.item():10.8f}')
print('Finished Training')
y_pred i am gettting is:
tensor([[0.2661],
[0.7536],
[1.4659],
[2.4905],
[3.8662],
[5.4478],
[6.8958],
[7.9347],
[8.5493],
[8.8773],
[9.0486],
[9.1409],
[9.1931],
[9.2244],
[9.2441],
[9.2570],
[9.2657],
[9.2718],
[9.2761],
[9.2792],
[9.2815],
[9.2831],
[9.2843],
[9.2853],
[9.2860],
[9.2865],
[9.2869],
[9.2872],
[9.2874],
[9.2876],
[9.2877],
[9.2878]], grad_fn=<AddmmBackward>)```
I have a sample of data in my training dataset which I am able to view if I print the data, but when accessing it to train the data, I keep getting RuntimeError: Expected object of scalar type Double but got scalar type Float for argument #2 'weight' in call to _thnn_conv2d_forward. I am unable to figure out why this is happening. I have also attached an image at the end to give a better understanding of the error message.
The labels.txt files looks as follows (name of image linking to another folder with corresponding images, center points (x, y) and radius)
0000, 0.67 , 0.69 , 0.26
0001, 0.69 , 0.33 , 0.3
0002, 0.16 , 0.27 , 0.15
0003, 0.54 , 0.33 , 0.17
0004, 0.32 , 0.45 , 0.3
0005, 0.78 , 0.26 , 0.17
0006, 0.44 , 0.49 , 0.19
EDIT: This is the loss function and optimizer I am using optimizer =
optim.Adam(model.parameters(), lr=0.001)
nn.CrossEntropyLoss()
My validate model function is as follows:
def validate_model(model, loader):
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
# (dropout is set to zero)
val_running_loss = 0.0
val_running_correct = 0
for int, data in enumerate(loader):
data, target = data['image'].to(device), data['labels'].to(device)
output = model(data)
loss = my_loss(output, target)
val_running_loss = val_running_loss + loss.item()
_, preds = torch.max(output.data, 1)
val_running_correct = val_running_correct+ (preds == target).sum().item()
avg_loss = val_running_loss/len(loader.dataset)
val_accuracy = 100. * val_running_correct/len(loader.dataset)
#----------------------------------------------
# implementation needed here
#----------------------------------------------
return avg_loss, val_accuracy
I have a fit function for working out training loss:
def fit(model, train_dataloader):
model.train()
train_running_loss = 0.0
train_running_correct = 0
for i, data in enumerate(train_dataloader):
print(data)
#I believe this is causing the error, but not sure why.
data, target = data['image'].to(device), data['labels'].to(device)
optimizer.zero_grad()
output = model(data)
loss = my_loss(output, target)
train_running_loss = train_running_loss + loss.item()
_, preds = torch.max(output.data, 1)
train_running_correct = train_running_correct + (preds == target).sum().item()
loss.backward()
optimizer.step()
train_loss = train_running_loss/len(train_dataloader.dataset)
train_accuracy = 100. * train_running_correct/len(train_dataloader.dataset)
print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}')
return train_loss, train_accuracy
and the following train_model function which stores the losses and accuracy in a list:
train_losses , train_accuracy = [], []
validation_losses , val_accuracy = [], []
def train_model(model,
optimizer,
train_loader,
validation_loader,
train_losses,
validation_losses,
epochs=1):
"""
Trains a neural network.
Args:
model - model to be trained
optimizer - optimizer used for training
train_loader - loader from which data for training comes
validation_loader - loader from which data for validation comes (maybe at the end, you use test_loader)
train_losses - adding train loss value to this list for future analysis
validation_losses - adding validation loss value to this list for future analysis
epochs - number of runs over the entire data set
"""
#----------------------------------------------
# implementation needed here
#----------------------------------------------
for epoch in range(epochs):
train_epoch_loss, train_epoch_accuracy = fit(model, train_loader)
val_epoch_loss, val_epoch_accuracy = validate_model(model, validation_loader)
train_losses.append(train_epoch_loss)
train_accuracy.append(train_epoch_accuracy)
validation_losses.append(val_epoch_loss)
val_accuracy.append(val_epoch_accuracy)
return
And when I run the following code I get the Runtime Error:
train_model(model,
optimizer,
train_loader,
validation_loader,
train_losses,
validation_losses,
epochs=2)
ERROR: RuntimeError: Expected object of scalar type Double but got
scalar type Float for argument #2 'weight' in call to
_thnn_conv2d_forward
Here is a screenshot of the error message as well:
ERROR
EDIT: this is how my model looks like, I am supposed to detect circles in an image with the given centers and radius in the labels.txt file and paint over them - the painting function was given, I had create the model and the training and validation.
class CircleNet(nn.Module): # nn.Module is parent class
def __init__(self):
super(CircleNet, self).__init__() #calls init of parent class
#----------------------------------------------
# implementation needed here
#----------------------------------------------
#keep dimensions of input image: (I-F+2P)/S +1= (128-3+2)/1 + 1 = 128
#RGB image = input channels = 3. Use 12 filters for first 2 convolution layers, then double
self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
self.conv4 = nn.Conv2d(in_channels=24, out_channels=32, kernel_size=3, stride=1, padding=1)
#Pooling to reduce sizes, and dropout to prevent overfitting
self.pool = nn.MaxPool2d(kernel_size=2)
self.relu = nn.ReLU()
self.drop = nn.Dropout2d(p=0.25)
self.norm1 = nn.BatchNorm2d(12)
self.norm2 = nn.BatchNorm2d(24)
# There are 2 pooling layers, each with kernel size of 2. Output size: 128/(2*2) = 32
# Have 3 output features, corresponding to x-pos, y-pos, radius.
self.fc = nn.Linear(in_features=32 * 32 * 32, out_features=3)
def forward(self, x):
"""
Feed forward through network
Args:
x - input to the network
Returns "x", which is the network's output
"""
#----------------------------------------------
# implementation needed here
#----------------------------------------------
#Conv1
out = self.conv1(x)
out = self.pool(out)
out = self.relu(out)
out = self.norm1(out)
#Conv2
out = self.conv2(out)
out = self.pool(out)
out = self.relu(out)
out = self.norm1(out)
#Conv3
out = self.conv3(out)
out = self.drop(out)
#Conv4
out = self.conv4(out)
out = F.dropout(out, training=self.training)
out = out.view(-1, 32 * 32 * 32)
out = self.fc(out)
return out
EDIT: Should it help here is my custom loss function:
criterion = nn.CrossEntropyLoss()
def my_loss(outputs, labels):
"""
Args:
outputs - output of network ([batch size, 3])
labels - desired labels ([batch size, 3])
"""
loss = torch.zeros(1, dtype=torch.float, requires_grad=True)
loss = loss.to(device)
loss = criterion(outputs, labels)
#----------------------------------------------
# implementation needed here
#----------------------------------------------
# Observe: If you need to iterate and add certain values to loss defined above
# you cannot write: loss +=... because this will raise the error:
# "Leaf variable was used in an inplace operation"
# Instead, to avoid this error write: loss = loss + ...
return loss
Train Loader (given to me):
train_dir = "./train/"
validation_dir = "./validation/"
test_dir = "./test/"
train_dataset = ShapesDataset(train_dir)
train_loader = DataLoader(train_dataset,
batch_size=32,
shuffle=True)
validation_dataset = ShapesDataset(validation_dir)
validation_loader = DataLoader(validation_dataset,
batch_size=1,
shuffle=False)
test_dataset = ShapesDataset(test_dir)
test_loader = DataLoader(test_dataset,
batch_size=1,
shuffle=False)
print("train loader examples :", len(train_dataset))
print("validation loader examples:", len(validation_dataset))
print("test loader examples :", len(test_dataset))
EDIT: This view image, target circle labels and network output was also given:
"""
View first image of a given number of batches assuming that model has been created.
Currently, lines assuming model has been creatd, are commented out. Without a model,
you can view target labels and the corresponding images.
This is given to you so that you may see how loaders and model can be used.
"""
loader = train_loader # choose from which loader to show images
bacthes_to_show = 2
with torch.no_grad():
for i, data in enumerate(loader, 0): #0 means that counting starts at zero
inputs = (data['image']).to(device) # has shape (batch_size, 3, 128, 128)
labels = (data['labels']).to(device) # has shape (batch_size, 3)
img_fnames = data['fname'] # list of length batch_size
#outputs = model(inputs.float())
img = Image.open(img_fnames[0])
print ("showing image: ", img_fnames[0])
labels_str = [ float(("{0:.2f}".format(x))) for x in labels[0]]#labels_np_arr]
#outputs_np_arr = outputs[0] # using ".numpy()" to convert tensor to numpy array
#outputs_str = [ float(("{0:.2f}".format(x))) for x in outputs_np_arr]
print("Target labels :", labels_str )
#print("network coeffs:", outputs_str)
print()
#img.show()
if (i+1) == bacthes_to_show:
break
Here is the output I'm getting, it is supposed to cover the full circle:
Output I am getting
Any idea will be helpful.
I basically added(in the validate_model and fit function) this:
_, target= torch.max(target.data, 1)
below the _, preds = torch.max(output.data, 1) line of code to get the data and target to be of the same length. Also changed the loss function from CrossEntropyLoss to MSELoss
Then in the same functions:
I changed the following line output = model(data) to output = model(data.float())
Hi I am trying to modify the mnist example to match it to my dataset. I only try to use the mlp example and it gives a strange error.
Tha dataset is a matrix with 2100 rows and 17 columns, and the output should be one of the 16 possible classes. The error seems happening in the secon phase of the training. The model is build correctly (log info confirmed).
Here is the error log:
ValueError: y_i value out of bounds
Apply node that caused the error:
CrossentropySoftmaxArgmax1HotWithBias(Dot22.0, b, targets)
Toposort index: 33
Inputs types: [TensorType(float64, matrix), TensorType(float64, vector), >TensorType(int32, vector)]
Inputs shapes: [(100, 17), (17,), (100,)]
Inputs strides: [(136, 8), (8,), (4,)]
Inputs values: ['not shown', 'not shown', 'not shown']
Outputs clients: [[Sum{acc_dtype=float64}(CrossentropySoftmaxArgmax1HotWithBias.0)], [CrossentropySoftmax1HotWithBiasDx(Assert{msg='sm and dy do not have the same shape.'}.0, CrossentropySoftmaxArgmax1HotWithBias.1, targets)], []]
HINT: Re-running with most Theano optimization disabled could give you a >back-trace of when this node was created. This can be done with by >setting the Theano flag 'optimizer=fast_compile'. If that does not work, >Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
Here is the code:
def build_mlp(input_var=None):
l_in = lasagne.layers.InputLayer(shape=(None, 16),
input_var=input_var)
# Apply 20% dropout to the input data:
l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
# Add a fully-connected layer of 800 units, using the linear rectifier, and
# initializing weights with Glorot's scheme (which is the default anyway):
l_hid1 = lasagne.layers.DenseLayer(
l_in_drop, num_units=10,
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
# We'll now add dropout of 50%:
l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)
# Another 800-unit layer:
l_hid2 = lasagne.layers.DenseLayer(
l_hid1_drop, num_units=10,
nonlinearity=lasagne.nonlinearities.rectify)
# 50% dropout again:
l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)
# Finally, we'll add the fully-connected output layer, of 10 softmax units:
l_out = lasagne.layers.DenseLayer(
l_hid2_drop, num_units=17,
nonlinearity=lasagne.nonlinearities.softmax)
# Each layer is linked to its incoming layer(s), so we only need to pass
# the output layer to give access to a network in Lasagne:
return l_out
def main(model='mlp', num_epochs=300):
# Load the dataset
print("Loading data...")
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
# Prepare Theano variables for inputs and targets
input_var = T.matrix('inputs')
target_var = T.ivector('targets')
# Create neural network model (depending on first command line parameter)
print("Building model and compiling functions...")
if model == 'cnn':
network = build_cnn(input_var)
elif model == 'mlp':
network = build_mlp(input_var)
elif model == 'lstm':
network = build_lstm(input_var)
else:
print("Unrecognized model type %r." % model)
# Create a loss expression for training, i.e., a scalar objective we want
# to minimize (for our multi-class problem, it is the cross-entropy loss):
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
# We could add some weight decay as well here, see lasagne.regularization.
# Create update expressions for training, i.e., how to modify the
# parameters at each training step. Here, we'll use Stochastic Gradient
# Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
loss, params, learning_rate=0.01, momentum=0.9)
# Create a loss expression for validation/testing. The crucial difference
# here is that we do a deterministic forward pass through the network,
# disabling dropout layers.
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
target_var)
test_loss = test_loss.mean()
# As a bonus, also create an expression for the classification accuracy:
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
dtype=theano.config.floatX)
# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], loss, updates=updates)
# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
# Finally, launch the training loop.
print("Starting training...")
# We iterate over epochs:
for epoch in range(num_epochs):
# In each epoch, we do a full pass over the training data:
train_err = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train, 100, shuffle=True):
inputs, targets = batch
train_err += train_fn(inputs, targets)
train_batches += 1
# And a full pass over the validation data:
val_err = 0
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(X_val, y_val, 100, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
val_err += err
val_acc += acc
val_batches += 1
# Then we print the results for this epoch:
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" validation loss:\t\t{:.6f}".format(val_err / val_batches))
print(" validation accuracy:\t\t{:.2f} %".format(
val_acc / val_batches * 100))
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 100, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
test_err += err
test_acc += acc
test_batches += 1
print("Final results:")
print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print(" test accuracy:\t\t{:.2f} %".format(
test_acc / test_batches * 100))
I Figured out the problem:
my dataset does not have an output for every target, becouse it is too small! There are 17 target outputs but my dataset has only 16 different outputs, and it is missing examples of the 17th output.
In order to resolve this problem, just change the softmax with rectify,
from this:
l_out = lasagne.layers.DenseLayer(
l_hid2_drop, num_units=17,
nonlinearity=lasagne.nonlinearities.softmax)
to this:
l_out = lasagne.layers.DenseLayer(
l_hid2_drop, num_units=17,
nonlinearity=lasagne.nonlinearities.rectify)