Predicted value in simple LSTM keeps accumulating

Predicted value in simple LSTM keeps accumulating - python

After each epoch y_pred simply keeps increasing
input at each batch is 64x10 tensor, trying to predict max of the vector at each row.
I thought the gradient might not be going to 0 between batches, but I that wasn't the case.
I tried changing LR, epoch, LSTM layers (LSTM to RNN), hidden size etc, nothing helped.
BTW, using simple sequential network of dense and relu instead of lstm worked perfectly
Following is the code:
LR = 0.0001
class LSTM(nn.Module):
def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
super().__init__()
self.hidden_layer_size = hidden_layer_size
self.lstm = nn.LSTM(input_size, hidden_layer_size)
self.linear = nn.Linear(hidden_layer_size, output_size)
# self.hidden_cell = (torch.zeros(1,max_array_len,self.hidden_layer_size),
# torch.zeros(1,max_array_len,self.hidden_layer_size))
def forward(self, input_seq):
# lstm_out,self.hidden_cell = self.lstm(input_seq.view(len(input_seq),max_array_len, 1),self.hidden_cell)
lstm_out,self.hidden_cell = self.lstm(input_seq.view(len(input_seq),max_array_len, 1))
predictions = self.linear(lstm_out[:, -1,:])
return predictions
model=LSTM()
optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=0.8) # optimize all cnn parameters
loss_func = nn.MSELoss() # the target label is not one-hotted
print(model)
EPOCHS=2000
for i in range(EPOCHS):
# model.train()
for step, (seq,labels) in enumerate(train_data):
model.zero_grad()
labels=labels.view(labels.shape[0],1)
y_pred = model(seq)
loss = loss_func(y_pred.float(), labels.float())
loss.backward(retain_graph=True)
optimizer.step()
if i%10 == 0:
# print(y_pred.shape,labels.shape)
print(y_pred)
print(f'epoch: {i:3} train_loss: {loss.item():10.8f}')
print('Finished Training')
y_pred i am gettting is:
tensor([[0.2661],
[0.7536],
[1.4659],
[2.4905],
[3.8662],
[5.4478],
[6.8958],
[7.9347],
[8.5493],
[8.8773],
[9.0486],
[9.1409],
[9.1931],
[9.2244],
[9.2441],
[9.2570],
[9.2657],
[9.2718],
[9.2761],
[9.2792],
[9.2815],
[9.2831],
[9.2843],
[9.2853],
[9.2860],
[9.2865],
[9.2869],
[9.2872],
[9.2874],
[9.2876],
[9.2877],
[9.2878]], grad_fn=<AddmmBackward>)```

Related

Has anyone implemented a optuna Hyperparameter optimization for a Pytorch LSTM?

I am trying to implemented a Optuna Hyperparameter optimization for a Pytorch LSTM. But I do not know how to define my model correctly.
When I just use nn.linear erverything works fine but when I use nn.LSTMCell I get the following error:
AttributeError: 'tuple' object has no attribute 'dim'
The error gets raised because, the LSTM returns a tupel not a tensor. But I do not know how to fix it and can not find an example of an Pytorch LSTM with Optuna optimization online.
Here the Model definition:
def build_model_custom(trail):
# Suggest the number of layers of neural network model
n_layers = trail.suggest_int("n_layers", 1, 3)
layers = []
in_features = 20
for i in range(n_layers):
# Suggest the number of units in each layer
out_features = trail.suggest_int("n_units_l{}".format(i), 4, 18)
layers.append(nn.LSTMCell(in_features, out_features))
in_features = out_features
layers.append(nn.Linear(in_features, 2))
return nn.Sequential(*layers)

I have implemented an example of optuna optimizing LSTM before, I hope it will help you:
def get_best_parameters(args, Dtr, Val):
def objective(trial):
model = TransformerModel(args).to(args.device)
loss_function = nn.MSELoss().to(args.device)
optimizer = trial.suggest_categorical('optimizer',
[torch.optim.SGD,
torch.optim.RMSprop,
torch.optim.Adam])(
model.parameters(), lr=trial.suggest_loguniform('lr', 5e-4, 1e-2))
print('training...')
epochs = 10
val_loss = 0
for epoch in range(epochs):
train_loss = []
for batch_idx, (seq, target) in enumerate(Dtr, 0):
seq, target = seq.to(args.device), target.to(args.device)
optimizer.zero_grad()
y_pred = model(seq)
loss = loss_function(y_pred, target)
train_loss.append(loss.item())
loss.backward()
optimizer.step()
# validation
val_loss = get_val_loss(args, model, Val)
print('epoch {:03d} train_loss {:.8f} val_loss {:.8f}'.format(epoch, np.mean(train_loss), val_loss))
model.train()
return val_loss
sampler = optuna.samplers.TPESampler()
study = optuna.create_study(sampler=sampler, direction='minimize')
study.optimize(func=objective, n_trials=5)
pruned_trials = study.get_trials(deepcopy=False,
states=tuple([TrialState.PRUNED]))
complete_trials = study.get_trials(deepcopy=False,
states=tuple([TrialState.COMPLETE]))
best_trial = study.best_trial
print('val_loss = ', best_trial.value)
for key, value in best_trial.params.items():
print("{}: {}".format(key, value))

I implemented a solution by my self. I am not sure if it's the most pythonic but it works.
Suggestions for improvement are welcome.
def train_and_evaluate(param, model, trail):
# Load Data
train_dataloader = torch.utils.data.DataLoader(Train_Dataset, batch_size=batch_size)
Test_dataloader = torch.utils.data.DataLoader(Test_Dataset, batch_size=batch_size)
criterion = nn.MSELoss()
optimizer = getattr(optim, param['optimizer'])(model.parameters(), lr= param['learning_rate'])
acc = nn.L1Loss()
# Training Loop
for epoch_num in range(EPOCHS):
# Training
total_loss_train = 0
for train_input, train_target in train_dataloader:
output = model.forward(train_input.float())
batch_loss = criterion(output, train_target.float())
total_loss_train += batch_loss.item()
model.zero_grad()
batch_loss.backward()
optimizer.step()
# Evaluation
total_loss_val = 0
total_mae = 0
with torch.no_grad():
for test_input, test_target in Test_dataloader:
output = model(test_input.float())
batch_loss = criterion(output, test_target)
total_loss_val += batch_loss.item()
batch_mae = acc(output, test_target)
total_mae += batch_mae.item()
accuracy = total_mae/len(Test_Dataset)
# Add prune mechanism
trail.report(accuracy, epoch_num)
if trail.should_prune():
raise optuna.exceptions.TrialPruned()
return accuracy

Word2Vec with negative sampling python implementation

I'm trying to implement word2vec with negative sampling in python almost from scratch and quite new in neural networks and faced some issues. Would be very appreciate for any help.
So, I wrote simple nn with a forward pass. I didn't get which element have to have grad_fn, I'd been getting error like 'tensor have no grad_fn' until I add requires_grad_() on the returning value. Is that correct?
dataset = Word2VecNegativeSampling(data, num_negative_samples, 30000)
dataset.generate_dataset()
wordvec_dim = 10
class Word2VecNegativeSamples(nn.Module):
def __init__(self, num_tokens):
super(Word2VecNegativeSamples, self).__init__()
self.input = nn.Linear(num_tokens, 10, bias=False)
self.output = nn.Linear(10, num_tokens, bias=False)
self.num_tokens = num_tokens
def forward(self, input_index_batch, output_indices_batch):
'''
Implements forward pass with negative sampling
Arguments:
input_index_batch - Tensor of ints, shape: (batch_size, ), indices of input words in the batch
output_indices_batch - Tensor if ints, shape: (batch_size, num_negative_samples+1),
indices of the target words for every sample
Returns:
predictions - Tensor of floats, shape: (batch_size, num_negative_samples+1)
'''
results = []
batch_size = len(input_index_batch)
for i in range(batch_size):
input_one_hot = torch.zeros(self.num_tokens)
input_one_hot[input_index_batch[i]] = 1
forward_result = self.output(self.input(input_one_hot))
results.append(torch.tensor([forward_result[out_index] for out_index in output_indices_batch[i]]))
return torch.stack(results).requires_grad_()
nn_model = Word2VecNegativeSamples(data.num_tokens())
nn_model.type(torch.FloatTensor)
After all i'm trying to train the model, but neither loss nor accuracy changing. Is the code for model prediction correct as well?
Here is training code:
def train_neg_sample(model, dataset, train_loader, optimizer, scheduler, num_epochs):
loss = nn.BCEWithLogitsLoss().type(torch.FloatTensor)
loss_history = []
train_history = []
for epoch in range(num_epochs):
model.train() # Enter train mode
dataset.generate_dataset()
loss_accum = 0
correct_samples = 0
total_samples = 0
for i_step, (inp, out, lab) in enumerate(train_loader):
prediction = model(inp, out)
loss_value = loss(prediction, lab)
optimizer.zero_grad()
loss_value.backward()
optimizer.step()
_, indices = torch.max(prediction, 1)
correct_samples += torch.sum(indices == 0)
total_samples += lab.shape[0]
loss_accum += loss_value
scheduler.step()
ave_loss = loss_accum / i_step
train_accuracy = float(correct_samples) / total_samples
loss_history.append(float(ave_loss))
train_history.append(train_accuracy)
print("Epoch#: %i, Average loss: %f, Train accuracy: %f" % (epoch, ave_loss, train_accuracy))
return loss_history, train_history

If your loss function is not changing, it's highly probable that you register the wrong set of parameters to the optimizer. Can you post the code snippet where you initialize your model and optimizer? It is supposed to look like this:
nn_model = Word2VecNegativeSamples(data.num_tokens())
optimizer = optim.SGD(nn_model.parameters(), lr=0.001, momentum=0.9)

Pytorch error when launching two distinct backward

I am building a simple autoencoder followed by an MLP neural nets. Regarging the autoencoder I am not running into any problem
# ---- Prepare training set ----
x_data = train_set_categorical.drop(["churn"], axis=1).to_numpy()
labels = train_set_categorical.loc[:, "churn"].to_numpy()
dataset = TensorDataset(torch.Tensor(x_data), torch.Tensor(labels) )
loader = DataLoader(dataset, batch_size=127)
# ---- Model Initialization ----
model = AE()
# Validation using MSE Loss function
loss_function = nn.MSELoss()
# Using an Adam Optimizer with lr = 0.1
optimizer = torch.optim.Adam(model.parameters(),
lr = 1e-1,
weight_decay = 1e-8)
epochs = 50
outputs = []
losses = []
for epoch in range(epochs):
for (image, _) in loader:
# Output of Autoencoder
embbeding, reconstructed = model(image)
# Calculating the loss function
loss = loss_function(reconstructed, image)
# The gradients are set to zero,
# the the gradient is computed and stored.
# .step() performs parameter update
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Storing the losses in a list for plotting
losses.append(loss)
if epoch == 49:
outputs.append(embbeding)
But then I am feeding an MLP with the outcome of the autoencoder and this is where things starts to fail
class Feedforward(torch.nn.Module):
def __init__(self):
super().__init__()
self.neural = torch.nn.Sequential(
torch.nn.Linear(33, 260),
torch.nn.ReLU(),
torch.nn.Linear(260, 450),
torch.nn.ReLU(),
torch.nn.Linear(450, 260),
torch.nn.ReLU(),
torch.nn.Linear(260, 1),
torch.nn.Sigmoid()
)
def forward(self, x):
outcome = self.neural(x.float())
return outcome
modelz = Feedforward()
criterion = torch.nn.BCELoss()
opt = torch.optim.Adam(modelz.parameters(), lr = 0.01)
modelz.train()
epoch = 20
for epoch in range(epoch):
opt.zero_grad()
# Forward pass
y_pred = modelz(x_train)
# Compute Loss
loss_2 = criterion(y_pred.squeeze(), torch.tensor(y_train).to(torch.float32))
#print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
# Backward pass
loss_2.backward()
opt.step()
I get the following error:
RuntimeError: Trying to backward through the graph a second time, but the saved intermediate results have already been freed. Specify retain_graph=True when calling .backward() or autograd.grad() the first time.
Of course I have tried to add "retain_graph=True" to both backwards or only the first one but it does not seem to solve the problem. If I launch both code independently from another It works but as a sequence I don't know why but it is not.

You should be able to disconnect the output of the auto-encoder from the model by calling embbeding.detach(), before appending it to outputs.

what is the pytorch equivalent of a tensorflow linear regression?

I am learning pytorch, that to do a basic linear regression on this data created this way here:
from sklearn.datasets import make_regression
x, y = make_regression(n_samples=100, n_features=1, noise=15, random_state=42)
y = y.reshape(-1, 1)
print(x.shape, y.shape)
plt.scatter(x, y)
I know that using tensorflow this code can solve:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(units=1, activation='linear', input_shape=(x.shape[1], )))
model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.05), loss='mse')
hist = model.fit(x, y, epochs=15, verbose=0)
but I need to know what the pytorch equivalent would be like, what I tried to do was this:
# Model Class
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.linear = nn.Linear(1,1)
def forward(self, x):
x = self.linear(x)
return x
def predict(self, x):
return self.forward(x)
model = Net()
loss_fn = F.mse_loss
opt = torch.optim.SGD(modelo.parameters(), lr=0.05)
# Funcao para treinar
def fit(num_epochs, model, loss_fn, opt, train_dl):
# Repeat for given number of epochs
for epoch in range(num_epochs):
# Train with batches of data
for xb, yb in train_dl:
# 1. Generate predictions
pred = model(xb)
# 2. Calculate Loss
loss = loss_fn(pred, yb)
# 3. Campute gradients
loss.backward()
# 4. Update parameters using gradients
opt.step()
# 5. Reset the gradients to zero
opt.zero_grad()
# Print the progress
if (epoch+1) % 10 == 0:
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
# Training
fit(200, model, loss_fn, opt, data_loader)
But the model doesn't learn anything, I don't know what I can do anymore.
The input/output dimensions is (1/1)

Dataset
First of all, you should define torch.utils.data.Dataset
import torch
from sklearn.datasets import make_regression
class RegressionDataset(torch.utils.data.Dataset):
def __init__(self):
data = make_regression(n_samples=100, n_features=1, noise=0.1, random_state=42)
self.x = torch.from_numpy(data[0]).float()
self.y = torch.from_numpy(data[1]).float()
def __len__(self):
return len(self.x)
def __getitem__(self, index):
return self.x[index], self.y[index]
It converts numpy data to PyTorch's tensor inside __init__ and converts data to float (numpy has double by default while PyTorch's default is float in order to use less memory).
Apart from that it will simply return tuple of features and respective regression targets.
Fit
Almost there, but you have to flatten output from the model (described below). torch.nn.Linear will return tensors of shape (batch, 1) while your targets are of shape (batch,). flatten() will remove unnecessary 1 dimension.
# 2. Calculate Loss
loss = criterion(pred.flatten(), yb)
Model
That is all you need actually:
model = torch.nn.Linear(1, 1)
Any layer can be called directly, no need for forward and inheritance for simple models.
Calling
The rest is almost okay, you just have to create torch.utils.data.DataLoader and pass instance of our dataset. What DataLoader does is it issues __getitem__ of dataset multiple times and creates a batch of specified size (there is some other funny business, but that's the idea):
dataset = RegressionDataset()
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32)
model = torch.nn.Linear(1, 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=3e-4)
fit(5000, model, criterion, optimizer, dataloader)
Also notice I've used torch.nn.MSELoss(), as we are passing object it looks better than function in this case.
Whole code
To make it easier:
import torch
from sklearn.datasets import make_regression
class RegressionDataset(torch.utils.data.Dataset):
def __init__(self):
data = make_regression(n_samples=100, n_features=1, noise=0.1, random_state=42)
self.x = torch.from_numpy(data[0]).float()
self.y = torch.from_numpy(data[1]).float()
def __len__(self):
return len(self.x)
def __getitem__(self, index):
return self.x[index], self.y[index]
# Funcao para treinar
def fit(num_epochs, model, criterion, optimizer, train_dl):
# Repeat for given number of epochs
for epoch in range(num_epochs):
# Train with batches of data
for xb, yb in train_dl:
# 1. Generate predictions
pred = model(xb)
# 2. Calculate Loss
loss = criterion(pred.flatten(), yb)
# 3. Compute gradients
loss.backward()
# 4. Update parameters using gradients
optimizer.step()
# 5. Reset the gradients to zero
optimizer.zero_grad()
# Print the progress
if (epoch + 1) % 10 == 0:
print(
"Epoch [{}/{}], Loss: {:.4f}".format(epoch + 1, num_epochs, loss.item())
)
dataset = RegressionDataset()
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32)
model = torch.nn.Linear(1, 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=3e-4)
fit(5000, model, criterion, optimizer, dataloader)
You should get around 0.053 loss or so, vary noise or other params for harder/easier regression task.

Why is a simple Binary classification failing in a feedforward neural network?

I am new to Pytorch. I was trying to model a binary classifier on the Kepler dataset. The following was my dataset class.
class KeplerDataset(Dataset):
def __init__(self, test=False):
self.dataframe_orig = pd.read_csv(koi_cumm_path)
if (test == False):
self.data = df_numeric[( df_numeric.koi_disposition == 1 ) | ( df_numeric.koi_disposition == 0 )].values
else:
self.data = df_numeric[~(( df_numeric.koi_disposition == 1 ) | ( df_numeric.koi_disposition == 0 ))].values
self.X_data = torch.FloatTensor(self.data[:, 1:])
self.y_data = torch.FloatTensor(self.data[:, 0])
def __len__(self):
return len(self.data)
def __getitem__(self, index):
return self.X_data[index], self.y_data[index]
Here, I created a custom classifier class with one hidden layer and a single output unit that produces sigmoidal probability of being in class 1 (planet).
class KOIClassifier(nn.Module):
def __init__(self, input_dim, out_dim):
super(KOIClassifier, self).__init__()
self.linear1 = nn.Linear(input_dim, 32)
self.linear2 = nn.Linear(32, 32)
self.linear3 = nn.Linear(32, out_dim)
def forward(self, xb):
out = self.linear1(xb)
out = F.relu(out)
out = self.linear2(out)
out = F.relu(out)
out = self.linear3(out)
out = torch.sigmoid(out)
return out
I then created a train_model function to optimize the loss using SGD.
def train_model(X, y):
criterion = nn.BCELoss()
optim = torch.optim.SGD(model.parameters(), lr=0.001)
n_epochs = 100
losses = []
for epoch in range(n_epochs):
y_pred = model.forward(X)
loss = criterion(y_pred, y)
losses.append(loss.item())
optim.zero_grad()
loss.backward()
optim.step()
losses = []
for X, y in train_loader:
losses.append(train_model(X, y))
But after performing the optimization over the train_loader, When I try predicting on the trainn_loader itself, the prediction values are so much worse.
for features, y in train_loader:
y_pred = model.predict(features)
break
y_pred
> tensor([[4.5436e-02],
[1.5024e-02],
[2.2579e-01],
[4.2279e-01],
[6.0811e-02],
.....
Why is my model not working properly? Is it the problem with the dataset or am I doing something wrong with implementing the Neural net? I will link my Kaggle notebook because more context might be helpful. Please help.

You are optimizing many times (100 steps) on the first batch (first samples), then moving to the next samples. It means that your model will overfit your few samples before going to the next batch. Then, your training will be very non smooth, diverge and go far from your global optimum.
Usually, in a training loop you should:
go over all samples (this is one epoch)
shuffle your dataset in order to visit your samples in a different order (set your pytorch training loader accordingly)
go back to 1. until you reach the max number of epochs
Also you should not define your optimizer each time (nor your criterion).
Your training loop should look like this:
criterion = nn.BCELoss()
optim = torch.optim.SGD(model.parameters(), lr=0.001)
n_epochs = 100
def train_model():
for X, y in train_loader:
optim.zero_grad()
y_pred = model.forward(X)
loss = criterion(y_pred, y)
loss.backward()
optim.step()
for epoch in range(n_epochs):
train_model()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Predicted value in simple LSTM keeps accumulating - python

Related

Has anyone implemented a optuna Hyperparameter optimization for a Pytorch LSTM?

Word2Vec with negative sampling python implementation

Pytorch error when launching two distinct backward

what is the pytorch equivalent of a tensorflow linear regression?

Why is a simple Binary classification failing in a feedforward neural network?

Categories

Resources