Word2Vec with negative sampling python implementation - python

I'm trying to implement word2vec with negative sampling in python almost from scratch and quite new in neural networks and faced some issues. Would be very appreciate for any help.
So, I wrote simple nn with a forward pass. I didn't get which element have to have grad_fn, I'd been getting error like 'tensor have no grad_fn' until I add requires_grad_() on the returning value. Is that correct?
dataset = Word2VecNegativeSampling(data, num_negative_samples, 30000)
dataset.generate_dataset()
wordvec_dim = 10
class Word2VecNegativeSamples(nn.Module):
def __init__(self, num_tokens):
super(Word2VecNegativeSamples, self).__init__()
self.input = nn.Linear(num_tokens, 10, bias=False)
self.output = nn.Linear(10, num_tokens, bias=False)
self.num_tokens = num_tokens
def forward(self, input_index_batch, output_indices_batch):
'''
Implements forward pass with negative sampling
Arguments:
input_index_batch - Tensor of ints, shape: (batch_size, ), indices of input words in the batch
output_indices_batch - Tensor if ints, shape: (batch_size, num_negative_samples+1),
indices of the target words for every sample
Returns:
predictions - Tensor of floats, shape: (batch_size, num_negative_samples+1)
'''
results = []
batch_size = len(input_index_batch)
for i in range(batch_size):
input_one_hot = torch.zeros(self.num_tokens)
input_one_hot[input_index_batch[i]] = 1
forward_result = self.output(self.input(input_one_hot))
results.append(torch.tensor([forward_result[out_index] for out_index in output_indices_batch[i]]))
return torch.stack(results).requires_grad_()
nn_model = Word2VecNegativeSamples(data.num_tokens())
nn_model.type(torch.FloatTensor)
After all i'm trying to train the model, but neither loss nor accuracy changing. Is the code for model prediction correct as well?
Here is training code:
def train_neg_sample(model, dataset, train_loader, optimizer, scheduler, num_epochs):
loss = nn.BCEWithLogitsLoss().type(torch.FloatTensor)
loss_history = []
train_history = []
for epoch in range(num_epochs):
model.train() # Enter train mode
dataset.generate_dataset()
loss_accum = 0
correct_samples = 0
total_samples = 0
for i_step, (inp, out, lab) in enumerate(train_loader):
prediction = model(inp, out)
loss_value = loss(prediction, lab)
optimizer.zero_grad()
loss_value.backward()
optimizer.step()
_, indices = torch.max(prediction, 1)
correct_samples += torch.sum(indices == 0)
total_samples += lab.shape[0]
loss_accum += loss_value
scheduler.step()
ave_loss = loss_accum / i_step
train_accuracy = float(correct_samples) / total_samples
loss_history.append(float(ave_loss))
train_history.append(train_accuracy)
print("Epoch#: %i, Average loss: %f, Train accuracy: %f" % (epoch, ave_loss, train_accuracy))
return loss_history, train_history

If your loss function is not changing, it's highly probable that you register the wrong set of parameters to the optimizer. Can you post the code snippet where you initialize your model and optimizer? It is supposed to look like this:
nn_model = Word2VecNegativeSamples(data.num_tokens())
optimizer = optim.SGD(nn_model.parameters(), lr=0.001, momentum=0.9)

Related

Accuracy does not increase Pytorch

I'm currently working on a project using Pytorch. I want to evaluate the accuracy of a neural network but it seems it does not increase when the test is running. The output I get is:
As you can see, I print the accuracy of every epoch always getting the same number.
Here you are the code of my classifier:
class Classifier(torch.nn.Module):
def __init__(self):
super().__init__()
self.layer1 = torch.nn.Linear(in_features=6, out_features=2, bias=True)
self.layer2 = torch.nn.Linear(in_features=2, out_features=1, bias=True)
self.activation = torch.sigmoid
def forward(self, x):
x=self.activation(self.layer1(x))
x=self.activation(self.layer2(x))
return x
model=Classifier()
def setParameters(m):
if type(m) == torch.nn.Linear:
torch.nn.init.uniform_(m.weight.data, -0.3, 0.3)
torch.nn.init.constant_(m.bias.data, 1)
model.apply(setParameters)
model.layer1.bias.requires_grad = False
model.layer2.bias.requires_grad = False
The code I use to train the network is the following:
from google.colab import drive
import torch
import random
drive.mount('/content/drive')
%cd drive/MyDrive/deeplearning/ass1/data
numbers = []
results = []
with open('data.txt') as f:
lines = f.readlines()
random.shuffle(lines)
for line in lines:
digitsOfNumber = [int(x) for x in str(line[0:6])]
resultInteger = int(line[7:8])
numbers.append(digitsOfNumber)
results.append(resultInteger)
numbersTensor = torch.Tensor(numbers)
resultsTensor = torch.tensor(results)
dataset = torch.utils.data.TensorDataset(numbersTensor, resultsTensor)
trainsetSize = int((80/100) * len(dataset))
trainset, testset = torch.utils.data.random_split(dataset, [trainsetSize, len(dataset) - trainsetSize])
print(len(trainset), len(testset))
testloader = torch.utils.data.DataLoader(testset, batch_size=len(testset), shuffle=False)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=len(trainset), shuffle=False)
def get_accuracy(model, dataloader):
model.eval()
with torch.no_grad():
correct=0
for x, y in iter(dataloader):
out=model(x)
correct+=(torch.argmax(out, axis=1)==y).sum()
return correct/len(dataloader.dataset)
epochs=1425
losses=[]
for epoch in range(epochs):
print("Test accuracy: ", get_accuracy(model, testloader).item())
model.train()
print("Epoch: ", epoch)
for x, y in iter(trainloader):
out=model(x)
l=loss(out, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
losses.append(l.item())
print("Final accuracy: ", get_accuracy(model, testloader))
for name, param in model.named_parameters():
print(name, param)
The last part is the one I use to print out the accuracy and to train the network accordingly. How can I fix my issue?
Thank you in advance for your time and patience.
The last layer of your model produces a tensor of shape (batch size, 1), since you have set out_features = 1. I assume your dataset has more than 1 class?
When you are calculating your accuracy, torch.argmax(out, axis=1) will always give the same class index, being 0 in this case. This explains why your accuracy is constant.
I advise looking into your dataset and finding out how many classes you have, and modify your model based on that. If you have 10 classes, the last layer should have 10 output features based on how the rest of your code is set up.

Has anyone implemented a optuna Hyperparameter optimization for a Pytorch LSTM?

I am trying to implemented a Optuna Hyperparameter optimization for a Pytorch LSTM. But I do not know how to define my model correctly.
When I just use nn.linear erverything works fine but when I use nn.LSTMCell I get the following error:
AttributeError: 'tuple' object has no attribute 'dim'
The error gets raised because, the LSTM returns a tupel not a tensor. But I do not know how to fix it and can not find an example of an Pytorch LSTM with Optuna optimization online.
Here the Model definition:
def build_model_custom(trail):
# Suggest the number of layers of neural network model
n_layers = trail.suggest_int("n_layers", 1, 3)
layers = []
in_features = 20
for i in range(n_layers):
# Suggest the number of units in each layer
out_features = trail.suggest_int("n_units_l{}".format(i), 4, 18)
layers.append(nn.LSTMCell(in_features, out_features))
in_features = out_features
layers.append(nn.Linear(in_features, 2))
return nn.Sequential(*layers)
I have implemented an example of optuna optimizing LSTM before, I hope it will help you:
def get_best_parameters(args, Dtr, Val):
def objective(trial):
model = TransformerModel(args).to(args.device)
loss_function = nn.MSELoss().to(args.device)
optimizer = trial.suggest_categorical('optimizer',
[torch.optim.SGD,
torch.optim.RMSprop,
torch.optim.Adam])(
model.parameters(), lr=trial.suggest_loguniform('lr', 5e-4, 1e-2))
print('training...')
epochs = 10
val_loss = 0
for epoch in range(epochs):
train_loss = []
for batch_idx, (seq, target) in enumerate(Dtr, 0):
seq, target = seq.to(args.device), target.to(args.device)
optimizer.zero_grad()
y_pred = model(seq)
loss = loss_function(y_pred, target)
train_loss.append(loss.item())
loss.backward()
optimizer.step()
# validation
val_loss = get_val_loss(args, model, Val)
print('epoch {:03d} train_loss {:.8f} val_loss {:.8f}'.format(epoch, np.mean(train_loss), val_loss))
model.train()
return val_loss
sampler = optuna.samplers.TPESampler()
study = optuna.create_study(sampler=sampler, direction='minimize')
study.optimize(func=objective, n_trials=5)
pruned_trials = study.get_trials(deepcopy=False,
states=tuple([TrialState.PRUNED]))
complete_trials = study.get_trials(deepcopy=False,
states=tuple([TrialState.COMPLETE]))
best_trial = study.best_trial
print('val_loss = ', best_trial.value)
for key, value in best_trial.params.items():
print("{}: {}".format(key, value))
I implemented a solution by my self. I am not sure if it's the most pythonic but it works.
Suggestions for improvement are welcome.
def train_and_evaluate(param, model, trail):
# Load Data
train_dataloader = torch.utils.data.DataLoader(Train_Dataset, batch_size=batch_size)
Test_dataloader = torch.utils.data.DataLoader(Test_Dataset, batch_size=batch_size)
criterion = nn.MSELoss()
optimizer = getattr(optim, param['optimizer'])(model.parameters(), lr= param['learning_rate'])
acc = nn.L1Loss()
# Training Loop
for epoch_num in range(EPOCHS):
# Training
total_loss_train = 0
for train_input, train_target in train_dataloader:
output = model.forward(train_input.float())
batch_loss = criterion(output, train_target.float())
total_loss_train += batch_loss.item()
model.zero_grad()
batch_loss.backward()
optimizer.step()
# Evaluation
total_loss_val = 0
total_mae = 0
with torch.no_grad():
for test_input, test_target in Test_dataloader:
output = model(test_input.float())
batch_loss = criterion(output, test_target)
total_loss_val += batch_loss.item()
batch_mae = acc(output, test_target)
total_mae += batch_mae.item()
accuracy = total_mae/len(Test_Dataset)
# Add prune mechanism
trail.report(accuracy, epoch_num)
if trail.should_prune():
raise optuna.exceptions.TrialPruned()
return accuracy

RuntimeError: shape '[4, 512]' is invalid for input of size 1024 while while evaluating test data

I am trying XLnet over Jigsaw toxic dataset.
When I train my data with
input_ids = d["input_ids"].reshape(4,512).to(device) # batch size x seq length
it trains perfectly.
But when I try to test the model with test data with reshaping the input_ids in the same way, it generates a run time error:
shape '[4, 512]' is invalid for input of size 1024
This is the method I am using for training:
def train_epoch(model, data_loader, optimizer, device, scheduler, n_examples):
model = model.train()
losses = []
acc = 0
counter = 0
for d in data_loader:
input_ids = d["input_ids"].reshape(4,512).to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(input_ids=input_ids, token_type_ids=None, attention_mask=attention_mask, labels = targets)
loss = outputs[0]
logits = outputs[1]
# preds = preds.cpu().detach().numpy()
_, prediction = torch.max(outputs[1], dim=1)
targets = targets.cpu().detach().numpy()
prediction = prediction.cpu().detach().numpy()
accuracy = metrics.accuracy_score(targets, prediction)
acc += accuracy
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
counter = counter + 1
return acc / counter, np.mean(losses)
This is the method I am using for evaluating my test data:
def eval_model(model, data_loader, device, n_examples):
model = model.eval()
losses = []
acc = 0
counter = 0
with torch.no_grad():
for d in data_loader:
# print(d["input_ids"])
input_ids = d["input_ids"].reshape(4,512).to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(input_ids=input_ids, token_type_ids=None, attention_mask=attention_mask, labels = targets)
loss = outputs[0]
logits = outputs[1]
_, prediction = torch.max(outputs[1], dim=1)
targets = targets.cpu().detach().numpy()
prediction = prediction.cpu().detach().numpy()
accuracy = metrics.accuracy_score(targets, prediction)
acc += accuracy
losses.append(loss.item())
counter += 1
return acc / counter, np.mean(losses)
And when I try to run the eval_model method with my test data, it generates a run time error.
My model info:
I am unable to understand what wrong I am doing. Can anyone please help me out with this? Thank you.
I think the problem is that the training dataset's d['input_ids'] was of size 4*512 = 2048 so it could be divided into 4 and 512.
But the testing dataset's d['input_ids'] is of size 1024, which cannot be divided into 4 and 512.
Since you haven't given the model description, i can't say if you should change it to (-1, 512) or (4, -1) [using -1 in reshape tells numpy to figure that dimension out automatically.
e.g. reshaping an array of 2048 elements into (4, 512) can be done by reshape(4,512) and reshape(-1, 512) and reshape(4, -1) as well.

Runtime error when reading data from a training dataset PyTorch

I have a sample of data in my training dataset which I am able to view if I print the data, but when accessing it to train the data, I keep getting RuntimeError: Expected object of scalar type Double but got scalar type Float for argument #2 'weight' in call to _thnn_conv2d_forward. I am unable to figure out why this is happening. I have also attached an image at the end to give a better understanding of the error message.
The labels.txt files looks as follows (name of image linking to another folder with corresponding images, center points (x, y) and radius)
0000, 0.67 , 0.69 , 0.26
0001, 0.69 , 0.33 , 0.3
0002, 0.16 , 0.27 , 0.15
0003, 0.54 , 0.33 , 0.17
0004, 0.32 , 0.45 , 0.3
0005, 0.78 , 0.26 , 0.17
0006, 0.44 , 0.49 , 0.19
EDIT: This is the loss function and optimizer I am using optimizer =
optim.Adam(model.parameters(), lr=0.001)
nn.CrossEntropyLoss()
My validate model function is as follows:
def validate_model(model, loader):
model.eval() # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
# (dropout is set to zero)
val_running_loss = 0.0
val_running_correct = 0
for int, data in enumerate(loader):
data, target = data['image'].to(device), data['labels'].to(device)
output = model(data)
loss = my_loss(output, target)
val_running_loss = val_running_loss + loss.item()
_, preds = torch.max(output.data, 1)
val_running_correct = val_running_correct+ (preds == target).sum().item()
avg_loss = val_running_loss/len(loader.dataset)
val_accuracy = 100. * val_running_correct/len(loader.dataset)
#----------------------------------------------
# implementation needed here
#----------------------------------------------
return avg_loss, val_accuracy
I have a fit function for working out training loss:
def fit(model, train_dataloader):
model.train()
train_running_loss = 0.0
train_running_correct = 0
for i, data in enumerate(train_dataloader):
print(data)
#I believe this is causing the error, but not sure why.
data, target = data['image'].to(device), data['labels'].to(device)
optimizer.zero_grad()
output = model(data)
loss = my_loss(output, target)
train_running_loss = train_running_loss + loss.item()
_, preds = torch.max(output.data, 1)
train_running_correct = train_running_correct + (preds == target).sum().item()
loss.backward()
optimizer.step()
train_loss = train_running_loss/len(train_dataloader.dataset)
train_accuracy = 100. * train_running_correct/len(train_dataloader.dataset)
print(f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}')
return train_loss, train_accuracy
and the following train_model function which stores the losses and accuracy in a list:
train_losses , train_accuracy = [], []
validation_losses , val_accuracy = [], []
def train_model(model,
optimizer,
train_loader,
validation_loader,
train_losses,
validation_losses,
epochs=1):
"""
Trains a neural network.
Args:
model - model to be trained
optimizer - optimizer used for training
train_loader - loader from which data for training comes
validation_loader - loader from which data for validation comes (maybe at the end, you use test_loader)
train_losses - adding train loss value to this list for future analysis
validation_losses - adding validation loss value to this list for future analysis
epochs - number of runs over the entire data set
"""
#----------------------------------------------
# implementation needed here
#----------------------------------------------
for epoch in range(epochs):
train_epoch_loss, train_epoch_accuracy = fit(model, train_loader)
val_epoch_loss, val_epoch_accuracy = validate_model(model, validation_loader)
train_losses.append(train_epoch_loss)
train_accuracy.append(train_epoch_accuracy)
validation_losses.append(val_epoch_loss)
val_accuracy.append(val_epoch_accuracy)
return
And when I run the following code I get the Runtime Error:
train_model(model,
optimizer,
train_loader,
validation_loader,
train_losses,
validation_losses,
epochs=2)
ERROR: RuntimeError: Expected object of scalar type Double but got
scalar type Float for argument #2 'weight' in call to
_thnn_conv2d_forward
Here is a screenshot of the error message as well:
ERROR
EDIT: this is how my model looks like, I am supposed to detect circles in an image with the given centers and radius in the labels.txt file and paint over them - the painting function was given, I had create the model and the training and validation.
class CircleNet(nn.Module): # nn.Module is parent class
def __init__(self):
super(CircleNet, self).__init__() #calls init of parent class
#----------------------------------------------
# implementation needed here
#----------------------------------------------
#keep dimensions of input image: (I-F+2P)/S +1= (128-3+2)/1 + 1 = 128
#RGB image = input channels = 3. Use 12 filters for first 2 convolution layers, then double
self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
self.conv4 = nn.Conv2d(in_channels=24, out_channels=32, kernel_size=3, stride=1, padding=1)
#Pooling to reduce sizes, and dropout to prevent overfitting
self.pool = nn.MaxPool2d(kernel_size=2)
self.relu = nn.ReLU()
self.drop = nn.Dropout2d(p=0.25)
self.norm1 = nn.BatchNorm2d(12)
self.norm2 = nn.BatchNorm2d(24)
# There are 2 pooling layers, each with kernel size of 2. Output size: 128/(2*2) = 32
# Have 3 output features, corresponding to x-pos, y-pos, radius.
self.fc = nn.Linear(in_features=32 * 32 * 32, out_features=3)
def forward(self, x):
"""
Feed forward through network
Args:
x - input to the network
Returns "x", which is the network's output
"""
#----------------------------------------------
# implementation needed here
#----------------------------------------------
#Conv1
out = self.conv1(x)
out = self.pool(out)
out = self.relu(out)
out = self.norm1(out)
#Conv2
out = self.conv2(out)
out = self.pool(out)
out = self.relu(out)
out = self.norm1(out)
#Conv3
out = self.conv3(out)
out = self.drop(out)
#Conv4
out = self.conv4(out)
out = F.dropout(out, training=self.training)
out = out.view(-1, 32 * 32 * 32)
out = self.fc(out)
return out
EDIT: Should it help here is my custom loss function:
criterion = nn.CrossEntropyLoss()
def my_loss(outputs, labels):
"""
Args:
outputs - output of network ([batch size, 3])
labels - desired labels ([batch size, 3])
"""
loss = torch.zeros(1, dtype=torch.float, requires_grad=True)
loss = loss.to(device)
loss = criterion(outputs, labels)
#----------------------------------------------
# implementation needed here
#----------------------------------------------
# Observe: If you need to iterate and add certain values to loss defined above
# you cannot write: loss +=... because this will raise the error:
# "Leaf variable was used in an inplace operation"
# Instead, to avoid this error write: loss = loss + ...
return loss
Train Loader (given to me):
train_dir = "./train/"
validation_dir = "./validation/"
test_dir = "./test/"
train_dataset = ShapesDataset(train_dir)
train_loader = DataLoader(train_dataset,
batch_size=32,
shuffle=True)
validation_dataset = ShapesDataset(validation_dir)
validation_loader = DataLoader(validation_dataset,
batch_size=1,
shuffle=False)
test_dataset = ShapesDataset(test_dir)
test_loader = DataLoader(test_dataset,
batch_size=1,
shuffle=False)
print("train loader examples :", len(train_dataset))
print("validation loader examples:", len(validation_dataset))
print("test loader examples :", len(test_dataset))
EDIT: This view image, target circle labels and network output was also given:
"""
View first image of a given number of batches assuming that model has been created.
Currently, lines assuming model has been creatd, are commented out. Without a model,
you can view target labels and the corresponding images.
This is given to you so that you may see how loaders and model can be used.
"""
loader = train_loader # choose from which loader to show images
bacthes_to_show = 2
with torch.no_grad():
for i, data in enumerate(loader, 0): #0 means that counting starts at zero
inputs = (data['image']).to(device) # has shape (batch_size, 3, 128, 128)
labels = (data['labels']).to(device) # has shape (batch_size, 3)
img_fnames = data['fname'] # list of length batch_size
#outputs = model(inputs.float())
img = Image.open(img_fnames[0])
print ("showing image: ", img_fnames[0])
labels_str = [ float(("{0:.2f}".format(x))) for x in labels[0]]#labels_np_arr]
#outputs_np_arr = outputs[0] # using ".numpy()" to convert tensor to numpy array
#outputs_str = [ float(("{0:.2f}".format(x))) for x in outputs_np_arr]
print("Target labels :", labels_str )
#print("network coeffs:", outputs_str)
print()
#img.show()
if (i+1) == bacthes_to_show:
break
Here is the output I'm getting, it is supposed to cover the full circle:
Output I am getting
Any idea will be helpful.
I basically added(in the validate_model and fit function) this:
_, target= torch.max(target.data, 1)
below the _, preds = torch.max(output.data, 1) line of code to get the data and target to be of the same length. Also changed the loss function from CrossEntropyLoss to MSELoss
Then in the same functions:
I changed the following line output = model(data) to output = model(data.float())

Lasagne mlp target out of bound

Hi I am trying to modify the mnist example to match it to my dataset. I only try to use the mlp example and it gives a strange error.
Tha dataset is a matrix with 2100 rows and 17 columns, and the output should be one of the 16 possible classes. The error seems happening in the secon phase of the training. The model is build correctly (log info confirmed).
Here is the error log:
ValueError: y_i value out of bounds
Apply node that caused the error:
CrossentropySoftmaxArgmax1HotWithBias(Dot22.0, b, targets)
Toposort index: 33
Inputs types: [TensorType(float64, matrix), TensorType(float64, vector), >TensorType(int32, vector)]
Inputs shapes: [(100, 17), (17,), (100,)]
Inputs strides: [(136, 8), (8,), (4,)]
Inputs values: ['not shown', 'not shown', 'not shown']
Outputs clients: [[Sum{acc_dtype=float64}(CrossentropySoftmaxArgmax1HotWithBias.0)], [CrossentropySoftmax1HotWithBiasDx(Assert{msg='sm and dy do not have the same shape.'}.0, CrossentropySoftmaxArgmax1HotWithBias.1, targets)], []]
HINT: Re-running with most Theano optimization disabled could give you a >back-trace of when this node was created. This can be done with by >setting the Theano flag 'optimizer=fast_compile'. If that does not work, >Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
Here is the code:
def build_mlp(input_var=None):
l_in = lasagne.layers.InputLayer(shape=(None, 16),
input_var=input_var)
# Apply 20% dropout to the input data:
l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
# Add a fully-connected layer of 800 units, using the linear rectifier, and
# initializing weights with Glorot's scheme (which is the default anyway):
l_hid1 = lasagne.layers.DenseLayer(
l_in_drop, num_units=10,
nonlinearity=lasagne.nonlinearities.rectify,
W=lasagne.init.GlorotUniform())
# We'll now add dropout of 50%:
l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)
# Another 800-unit layer:
l_hid2 = lasagne.layers.DenseLayer(
l_hid1_drop, num_units=10,
nonlinearity=lasagne.nonlinearities.rectify)
# 50% dropout again:
l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)
# Finally, we'll add the fully-connected output layer, of 10 softmax units:
l_out = lasagne.layers.DenseLayer(
l_hid2_drop, num_units=17,
nonlinearity=lasagne.nonlinearities.softmax)
# Each layer is linked to its incoming layer(s), so we only need to pass
# the output layer to give access to a network in Lasagne:
return l_out
def main(model='mlp', num_epochs=300):
# Load the dataset
print("Loading data...")
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
# Prepare Theano variables for inputs and targets
input_var = T.matrix('inputs')
target_var = T.ivector('targets')
# Create neural network model (depending on first command line parameter)
print("Building model and compiling functions...")
if model == 'cnn':
network = build_cnn(input_var)
elif model == 'mlp':
network = build_mlp(input_var)
elif model == 'lstm':
network = build_lstm(input_var)
else:
print("Unrecognized model type %r." % model)
# Create a loss expression for training, i.e., a scalar objective we want
# to minimize (for our multi-class problem, it is the cross-entropy loss):
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean()
# We could add some weight decay as well here, see lasagne.regularization.
# Create update expressions for training, i.e., how to modify the
# parameters at each training step. Here, we'll use Stochastic Gradient
# Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
params = lasagne.layers.get_all_params(network, trainable=True)
updates = lasagne.updates.nesterov_momentum(
loss, params, learning_rate=0.01, momentum=0.9)
# Create a loss expression for validation/testing. The crucial difference
# here is that we do a deterministic forward pass through the network,
# disabling dropout layers.
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
target_var)
test_loss = test_loss.mean()
# As a bonus, also create an expression for the classification accuracy:
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
dtype=theano.config.floatX)
# Compile a function performing a training step on a mini-batch (by giving
# the updates dictionary) and returning the corresponding training loss:
train_fn = theano.function([input_var, target_var], loss, updates=updates)
# Compile a second function computing the validation loss and accuracy:
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
# Finally, launch the training loop.
print("Starting training...")
# We iterate over epochs:
for epoch in range(num_epochs):
# In each epoch, we do a full pass over the training data:
train_err = 0
train_batches = 0
start_time = time.time()
for batch in iterate_minibatches(X_train, y_train, 100, shuffle=True):
inputs, targets = batch
train_err += train_fn(inputs, targets)
train_batches += 1
# And a full pass over the validation data:
val_err = 0
val_acc = 0
val_batches = 0
for batch in iterate_minibatches(X_val, y_val, 100, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
val_err += err
val_acc += acc
val_batches += 1
# Then we print the results for this epoch:
print("Epoch {} of {} took {:.3f}s".format(
epoch + 1, num_epochs, time.time() - start_time))
print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
print(" validation loss:\t\t{:.6f}".format(val_err / val_batches))
print(" validation accuracy:\t\t{:.2f} %".format(
val_acc / val_batches * 100))
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 100, shuffle=False):
inputs, targets = batch
err, acc = val_fn(inputs, targets)
test_err += err
test_acc += acc
test_batches += 1
print("Final results:")
print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print(" test accuracy:\t\t{:.2f} %".format(
test_acc / test_batches * 100))
I Figured out the problem:
my dataset does not have an output for every target, becouse it is too small! There are 17 target outputs but my dataset has only 16 different outputs, and it is missing examples of the 17th output.
In order to resolve this problem, just change the softmax with rectify,
from this:
l_out = lasagne.layers.DenseLayer(
l_hid2_drop, num_units=17,
nonlinearity=lasagne.nonlinearities.softmax)
to this:
l_out = lasagne.layers.DenseLayer(
l_hid2_drop, num_units=17,
nonlinearity=lasagne.nonlinearities.rectify)

Categories

Resources