Different result inside train function and outside it

Different result inside train function and outside it - python

I am playing with tensorflow 2. I did my own model similar to how it is done here.
Then I created my own fit function. Now I get the weirdest thing ever. Here is the EXACT copy/paste output from my notebook where I did the tests:
def fit(x_train, y_train, learning_rate=0.01, epochs=10, batch_size=100, normal=True, verbose=True, display_freq=100):
if normal:
x_train = normalize(x_train) # TODO: This normalize could be a bit different for each and be bad.
num_tr_iter = int(len(y_train) / batch_size) # Number of training iterations in each epoch
if verbose:
print("Starting training...")
for epoch in range(epochs):
# Randomly shuffle the training data at the beginning of each epoch
x_train, y_train = randomize(x_train, y_train)
for iteration in range(num_tr_iter):
# Get the batch
start = iteration * batch_size
end = (iteration + 1) * batch_size
x_batch, y_batch = get_next_batch(x_train, y_train, start, end)
# Run optimization op (backpropagation)
# import pdb; pdb.set_trace()
if verbose and (epoch * batch_size + iteration) % display_freq == 0:
current_loss = _apply_loss(y_train, model(x_train, training=True))
current_acc = evaluate_accuracy(x_train, y_train)
print("Epoch: {0}/{1}; batch {2}/{3}; loss: {4:.4f}; accuracy: {5:.2f} %"
.format(epoch, epochs, iteration, num_tr_iter, current_loss, current_acc*100))
train_step(x_batch, y_batch, learning_rate)
current_loss = _apply_loss(y_train, model(x_train, training=True))
current_acc = evaluate_accuracy(x_train, y_train)
print("End: loss: {0:.4f}; accuracy: {1:.2f} %".format(current_loss, current_acc*100))
import logging
logging.getLogger('tensorflow').disabled = True
fit(x_train, y_train)
current_loss = _apply_loss(y_train, model(x_train, training=True))
current_acc = evaluate_accuracy(x_train, y_train)
print("End: loss: {0:.4f}; accuracy: {1:.2f} %".format(current_loss, current_acc*100))
This segment outputs:
Starting training...
Epoch: 0/10; batch 0/80; loss: 0.9533; accuracy: 59.67 %
Epoch: 1/10; batch 0/80; loss: 0.9386; accuracy: 60.15 %
Epoch: 2/10; batch 0/80; loss: 0.9259; accuracy: 60.50 %
Epoch: 3/10; batch 0/80; loss: 0.9148; accuracy: 61.05 %
Epoch: 4/10; batch 0/80; loss: 0.9051; accuracy: 61.15 %
Epoch: 5/10; batch 0/80; loss: 0.8968; accuracy: 61.35 %
Epoch: 6/10; batch 0/80; loss: 0.8896; accuracy: 61.27 %
Epoch: 7/10; batch 0/80; loss: 0.8833; accuracy: 61.51 %
Epoch: 8/10; batch 0/80; loss: 0.8780; accuracy: 61.52 %
Epoch: 9/10; batch 0/80; loss: 0.8733; accuracy: 61.54 %
End: loss: 0.8733; accuracy: 61.54 %
End: loss: 0.4671; accuracy: 77.08 %
Now my question is, how is it that I get a different value on the last 2 lines!? I am doing the same thing right? I am totally puzzled here. I don't even know how to google this.

So the problem was just stupid. It was due to the normalize thing I did at the start of the train example! Removed it and started working Ok.

Related

Deep learning model is training on very less data

I'm training a deep learning model on 100000 rows with 80% of the training data and 20% of test data. The data is splitting however my model is showing the output of training with 2242. Below is the training code with model and output given. Any help will be highly appreciated.
Training Code:
import time
start_time = time.time()
from sklearn.feature_extraction.text import TfidfVectorizer
tweet_table = cleaning_table(tweet_table)
def tokenization_tweets(dataset, features):
tokenization = TfidfVectorizer(max_features=features)
tokenization.fit(dataset)
dataset_transformed = tokenization.transform(dataset).toarray()
return dataset_transformed
def splitting(table):
X_train, X_test, y_train, y_test = train_test_split(table.tweet, table.test, test_size=0.2, shuffle=True)
return X_train, X_test, y_train, y_test
if __name__ == "__main__":
tweet_table['test'] = tweet_table['Overall_Sentiment'].apply(lambda x: 1 if x == 'Positive' else (0 if x == 'Negative' else 2))
if __name__ == "__main__":
X_train, X_test, y_train, y_test = splitting(tweet_table)
#print(tweet_table["test"].value_counts())
#print(tweet_table["Overall_Sentiment"].value_counts())
#print(list(set(y_train)))
#print(list(set(y_test)))
#Create a Neural Network
#Create the model
def train(X_train_mod, y_train, features, shuffle, drop, layer1, layer2, epoch, lr, epsilon, validation):
model_nn = Sequential()
model_nn.add(Dense(layer1, input_shape=(features,), activation='relu'))
model_nn.add(Dropout(drop))
model_nn.add(Dense(layer2, activation='sigmoid'))
model_nn.add(Dropout(drop))
model_nn.add(Dense(3, activation='softmax'))
optimizer = keras.optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=epsilon, decay=0.0, amsgrad=False)
model_nn.compile(loss='sparse_categorical_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
model_nn.fit(np.array(X_train_mod), y_train,
batch_size=32,
epochs=epoch,
verbose=1,
validation_split=validation,
shuffle=shuffle)
return model_nn
def test(X_test, model_nn):
prediction = model_nn.predict(X_test)
return prediction
def model1(X_train, y_train):
features = 3500
shuffle = True
drop = 0.5
layer1 = 512
layer2 = 256
epoch = 5
lr = 0.001
epsilon = None
validation = 0.1
X_train_mod = tokenization_tweets(X_train, features)
model = train(X_train_mod, y_train, features, shuffle, drop, layer1, layer2, epoch, lr, epsilon, validation)
return model;
#model1(X_train, y_train)
#model11(X_train, y_train)
def save_model(model):
# lets assume `model` is main model
model_json = model.to_json()
with open("model.json", "w") as json_file:
json.dump(model_json, json_file)
model.save_weights("model_weights.h5")
#print(len(X_train))
#print(len(y_train))
model_final = model1(X_train, y_train)
Output:
Epoch 1/5
2242/2242 [==============================] - 6s 3ms/step - loss: 0.3426 - accuracy: 0.8476 - val_loss: 0.2690 - val_accuracy: 0.8857
Epoch 2/5
2242/2242 [==============================] - 6s 3ms/step - loss: 0.2399 - accuracy: 0.9015 - val_loss: 0.2471 - val_accuracy: 0.8991
Epoch 3/5
2242/2242 [==============================] - 6s 3ms/step - loss: 0.1912 - accuracy: 0.9205 - val_loss: 0.2447 - val_accuracy: 0.9028
Epoch 4/5
2242/2242 [==============================] - 6s 3ms/step - loss: 0.1454 - accuracy: 0.9399 - val_loss: 0.2547 - val_accuracy: 0.9083
Epoch 5/5
2242/2242 [==============================] - 6s 3ms/step - loss: 0.1046 - accuracy: 0.9552 - val_loss: 0.2874 - val_accuracy: 0.9084
--- 192.1562056541443 seconds ---
Many Thanks

PyTorch: Different training accuracies using same random seed

I am trying to evaluate my model on the whole training set after each epoch.
This is what I did:
torch.manual_seed(1)
model = ConvNet(num_classes=num_classes)
cost_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
def compute_accuracy(model, data_loader):
correct_pred, num_examples = 0, 0
for features, targets in data_loader:
logits = model(features)
predicted_labels = torch.argmax(logits, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100
for epoch in range(num_epochs):
model = model.train()
for features, targets in train_loader:
logits = model(features)
cost = cost_fn(logits, targets)
optimizer.zero_grad()
cost.backward()
optimizer.step()
model = model.eval()
print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
epoch+1, num_epochs,
compute_accuracy(model, train_loader)))
the output was convincing:
Epoch: 001/005 training accuracy: 89.08%
Epoch: 002/005 training accuracy: 90.41%
Epoch: 003/005 training accuracy: 91.70%
Epoch: 004/005 training accuracy: 92.31%
Epoch: 005/005 training accuracy: 92.95%
But then I added another line at the end of the training loop, to also evaluate the model on the whole test set after each epoch:
for epoch in range(num_epochs):
model = model.train()
for features, targets in train_loader:
logits = model(features)
cost = cost_fn(logits, targets)
optimizer.zero_grad()
cost.backward()
optimizer.step()
model = model.eval()
print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
epoch+1, num_epochs,
compute_accuracy(model, train_loader)))
print('\t\t testing accuracy: %.2f%%' % (compute_accuracy(model, test_loader)))
But the training accuracies started to change:
Epoch: 001/005 training accuracy: 89.08%
testing accuracy: 87.66%
Epoch: 002/005 training accuracy: 90.42%
testing accuracy: 89.04%
Epoch: 003/005 training accuracy: 91.84%
testing accuracy: 90.01%
Epoch: 004/005 training accuracy: 91.86%
testing accuracy: 89.83%
Epoch: 005/005 training accuracy: 92.45%
testing accuracy: 90.32%
Am I doing something wrong? I expected the training accuracies to remain the same because the manual seed is 1 in both cases.
Is this an expected output ?

The random seed had been set wasn't stop the model for learning to get higher accuracy becuase the random seed is a number for Pseudo random. In this case, you had told the model to shuffle the training data with a random number("1").

Nonexistant pytorch gradients when dotting tensors in loss function

For the purposes of this MWE I'm trying to fit a linear regression using a custom loss function with multiple terms. However, I'm running into strange behavior when trying to weight the different terms in my loss function by dotting a weight vector with my losses. Just summing the losses works as expected; however, when dotting the weights and losses the backpropagation gets broken somehow and the loss function doesn't decrease.
I've tried enabling and disabling requires_grad on both tensors, but have been unable to replicate the expected behavior.
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
# Hyper-parameters
input_size = 1
output_size = 1
num_epochs = 60
learning_rate = 0.001
# Toy dataset
x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168],
[9.779], [6.182], [7.59], [2.167], [7.042],
[10.791], [5.313], [7.997], [3.1]], dtype=np.float32)
y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573],
[3.366], [2.596], [2.53], [1.221], [2.827],
[3.465], [1.65], [2.904], [1.3]], dtype=np.float32)
# Linear regression model
model = nn.Linear(input_size, output_size)
# Loss and optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
def loss_fn(outputs, targets):
l1loss = torch.norm(outputs - targets, 1)
l2loss = torch.norm(outputs - targets, 2)
# This works as expected
# loss = 1 * l1loss + 1 * l2loss
# Loss never changes, no matter what combination of
# requires_grad I set
loss = torch.dot(torch.tensor([1.0, 1.0], requires_grad=False),
torch.tensor([l1loss, l2loss], requires_grad=True))
return loss
# Train the model
for epoch in range(num_epochs):
# Convert numpy arrays to torch tensors
inputs = torch.from_numpy(x_train)
targets = torch.from_numpy(y_train)
# Forward pass
outputs = model(inputs)
loss = loss_fn(outputs, targets)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch+1) % 5 == 0:
print ('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
# Plot the graph
predicted = model(torch.from_numpy(x_train)).detach().numpy()
plt.plot(x_train, y_train, 'ro', label='Original data')
plt.plot(x_train, predicted, label='Fitted line')
plt.legend()
plt.show()
Expected result: loss function decreases and the linear regression is fitted (see output below)
Epoch [5/60], Loss: 7.9943
Epoch [10/60], Loss: 7.7597
Epoch [15/60], Loss: 7.6619
Epoch [20/60], Loss: 7.6102
Epoch [25/60], Loss: 7.4971
Epoch [30/60], Loss: 7.4106
Epoch [35/60], Loss: 7.3942
Epoch [40/60], Loss: 7.2438
Epoch [45/60], Loss: 7.2322
Epoch [50/60], Loss: 7.1012
Epoch [55/60], Loss: 7.0701
Epoch [60/60], Loss: 6.9612
Actual result: no change in loss function
Epoch [5/60], Loss: 73.7473
Epoch [10/60], Loss: 73.7473
Epoch [15/60], Loss: 73.7473
Epoch [20/60], Loss: 73.7473
Epoch [25/60], Loss: 73.7473
Epoch [30/60], Loss: 73.7473
Epoch [35/60], Loss: 73.7473
Epoch [40/60], Loss: 73.7473
Epoch [45/60], Loss: 73.7473
Epoch [50/60], Loss: 73.7473
Epoch [55/60], Loss: 73.7473
Epoch [60/60], Loss: 73.7473
I'm pretty confused as to why such a simple operation is breaking the backpropagation gradients and would really appreciate it if anyone had some insights on why this isn't working.

Use torch.cat((loss1, loss2)), you are creating new Tensor from existing tensors destroying graph.
Anyway you shouldn't do that unless you are trying to generalize your loss function, it's pretty unreadable. Simple addition is way better.

Pytorch LSTM each epoch starts from 0 accuracy

I'm training a LSTM model for time series prediction and at each epoch my accuracy restarts from 0 as if I'm training for the first time.
I attach below the training method snippet:
def train(model, loader, epoch, mini_batch_size, sequence_size):
model.train()
correct = 0
padded_size = 0
size_input = mini_batch_size * sequence_size
for batch_idx, (inputs, labels, agreement_score) in enumerate(loader):
if(inputs.size(0) == size_input):
inputs = inputs.clone().reshape(mini_batch_size, sequence_size, inputs.size(1))
labels = labels.clone().squeeze().reshape(mini_batch_size*sequence_size)
agreement_score = agreement_score.clone().squeeze().reshape(mini_batch_size*sequence_size)
else:
padded_size = size_input - inputs.size(0)
(inputs, labels, agreement_score) = padd_incomplete_sequences(inputs, labels, agreement_score, mini_batch_size, sequence_size)
inputs, labels, agreement_score = Variable(inputs.cuda()), Variable(labels.cuda()), Variable(agreement_score.cuda())
output = model(inputs)
loss = criterion(output, labels)
loss = loss * agreement_score
loss = loss.mean()
optimizer.zero_grad()
loss.backward()
optimizer.step()
pred = output.data.max(1, keepdim = True)[1]
correct += pred.eq(labels.data.view_as(pred)).cuda().sum()
accuracy = 100. * correct / (len(loader.dataset) + padded_size)
print("Train: Epoch: {}, [{}/{} ({:.0f}%)]\t loss: {:.6f}, Accuracy: {}/{} ({:.0f}%)".format(
epoch,
batch_idx * len(output),
(len(loader.dataset) + padded_size),
100. * batch_idx / (len(loader.dataset)+padded_size),
loss.item(),
correct,
(len(loader.dataset) + padded_size),
accuracy))
accuracy = 100. * correct / (len(loader.dataset) + padded_size)
train_accuracy.append(accuracy)
train_epochs.append(epoch)
train_loss.append(loss.item())
According to that my loop looks like:
for epoch in range(1, 10):
train(audio_lstm_model, train_rnn_audio_loader, epoch, MINI_BATCH_SIZE, SEQUENCE_SIZE_AUDIO)
evaluation(audio_lstm_model,validation_rnn_audio_loader, epoch, MINI_BATCH_SIZE, SEQUENCE_SIZE_AUDIO)
Consequently, my accuracy and loss restarts at every epoch:
Train: Epoch: 1, [0/1039079 (0%)] loss: 0.921637, Accuracy: 0/1039079 (0%)
...
Train: Epoch: 1, [10368/1039079 (0%)] loss: 0.523242, Accuracy: 206010/1039079 (19%)
Test set: loss: 151.4845, Accuracy: 88222/523315 (16%)
Train: Epoch: 2, [0/1039079 (0%)] loss: 0.921497, Accuracy: 0/1039079 (0%)
If anyone has any clue about it, your help is welcomed!
Have a nice day!

The problem turn out to be the fact that the sequence size was too small for the network in order to be able to make some predictions from it.
So after increasing the sequence length by some orders of magnitude, I was able to improve my model after each epoch.

How to get both score and accuracy after training

model.fit(X_train, y_train, batch_size = batch_size,
nb_epoch = 4, validation_data = (X_test, y_test),
show_accuracy = True)
score = model.evaluate(X_test, y_test,
batch_size = batch_size, show_accuracy = True, verbose=0)
gives scalar output and hence the following code doesn't work.
print("Test score", score[0])
print("Test accuracy:", score[1])
The output that I get is:
Train on 20000 samples, validate on 5000 samples
Epoch 1/4
20000/20000 [==============================] - 352s - loss: 0.4515 - val_loss: 0.4232
Epoch 2/4
20000/20000 [==============================] - 381s - loss: 0.2592 - val_loss: 0.3723
Epoch 3/4
20000/20000 [==============================] - 374s - loss: 0.1513 - val_loss: 0.4329
Epoch 4/4
20000/20000 [==============================] - 380s - loss: 0.0838 - val_loss: 0.5044
Keras version 1.0
How can I get the accuracy as well? Please help

If you use Sequential model you can try (CODE UPDATED):
nb_epochs = 4
history = model.fit(X_train, y_train, batch_size = batch_size,
nb_epoch = nb_epochs, validation_data = (X_test, y_test),
show_accuracy = True)
print("Test score", history.history["val_loss"][nb_epochs - 1])
print("Test acc", history.history["val_acc"][nb_epochs - 1])

Thanks Marcin and you are correct.
The code needs to be like this
model.compile(loss='binary_crossentropy',
optimizer = 'adam',
metrics=["accuracy"])
show_accuracy serves no purpose in model.fit and needs to be removed from there.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Different result inside train function and outside it - python

So the problem was just stupid. It was due to the normalize thing I did at the start of the train example! Removed it and started working Ok.

Related

Deep learning model is training on very less data

PyTorch: Different training accuracies using same random seed

Nonexistant pytorch gradients when dotting tensors in loss function

Pytorch LSTM each epoch starts from 0 accuracy

How to get both score and accuracy after training

Categories

Resources