Building LSTM model in pytorch equivalent to another one in keras

Building LSTM model in pytorch equivalent to another one in keras - python

I have a model in TensorFlow that I converted to Pytorch. I want to check If the two models are the same or if I'm mixing things up. Here is my code in tensofrlow:
model = Sequential()
model.add(
LSTM(5, input_shape=(4, 1000))
)
model.add(
Dense(1, activation='tanh')
)
model.compile(
loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']
)
model.fit(
X_train, y_train, epochs=100, batch_size=27
)
I built the equivalent model in Pytorch in this way:
class LSTM1(nn.Module):
def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
super(LSTM1, self).__init__()
self.num_classes = num_classes #number of classes
self.num_layers = num_layers #number of layers
self.input_size = input_size #input size
self.seq_length = seq_length #sequence length
self.hidden_size = hidden_size #hidden state
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
num_layers=num_layers, batch_first=True) #lstm
self.fc = nn.Linear(self.hidden_size, num_classes) #fully connected last layer
self.sigmoid = nn.Sigmoid()
def forward(self,x):
h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
# Propagate input through LSTM
output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
out = self.sigmoid(hn)
out = self.fc(out) #Final Output
out = self.sigmoid(out)
return out
num_epochs = 100 #100 epochs
learning_rate = 0.001 #0.001 lr
input_size = 1000 #number of features
num_layers = 5 #number of stacked lstm layers
hidden_size = 1
num_classes = 1 #number of output classes
X_train = np.concatenate((X_phage, X_bac))
y_train = np.concatenate((np.ones(len(X_phage)), np.zeros(len(X_bac))))
X_train_tensors_final = Variable(torch.Tensor(X_train))
y_train_tensors = Variable(torch.Tensor(y_train))
print(X_train_tensors_final.shape)
model = LSTM1(num_classes, input_size, hidden_size, num_layers, X_train_tensors_final.shape[1]) #our lstm class
print("Training Shape", X_train_tensors_final.shape, y_train_tensors.shape)
print(model)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
outputs = model.forward(X_train_tensors_final) #forward pass
optimizer.zero_grad() #caluclate the gradient, manually setting to 0
# obtain the loss function
outputs = outputs[-20:]
y_train_tensors = y_train_tensors.type(torch.LongTensor)
y_train_tensors = torch.reshape(y_train_tensors, (20, 1))
loss = criterion(outputs, y_train_tensors.float())
loss.backward() #calculates the loss of the loss function
optimizer.step() #improve from loss, i.e backprop
train_acc = torch.sum((outputs > 0.5).bool().float() == y_train_tensors)
final_train_acc = train_acc/20
print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
print('Accuracy: ', final_train_acc)
My data is 20 instances, where each instance has 1000 features and 4 timesteps, therefore it is shaped like this (20,4,1000). However, When I run the two models, I got different results (different loss and accuracy). Therefore I'm afraid I have missed something and the models are not the same.
I would appreciate it if someone can validate that these models are the same and if they were then why I'm getting different results?

Related

Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 2])) is deprecated

I am trying to build a multiclass text classification using Pytorch and torchtext. but I am receiving this error whenever output in last hidden layer is 2, but running fine on 1 outputdim. I know there is a problem with batchsize and Data shape. What to do? I don't know the fix.
Constructing iterator:
#set batch size
BATCH_SIZE = 16
train_iterator, valid_iterator = BucketIterator.splits(
(train_data, valid_data),
batch_size = BATCH_SIZE,
sort_key = lambda x: len(x.text),
sort_within_batch=True,
device = device)
Model class:
class classifier(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
bidirectional, dropout):
super(classifier,self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.gru = nn.GRU(embedding_dim,
hidden_dim,
num_layers=n_layers,
bidirectional=bidirectional,
dropout=dropout,
batch_first=True)
self.fc1 = nn.Linear(hidden_dim * 2, 128)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(128, 64)
self.relu2 = nn.ReLU()
self.fc3 = nn.Linear(64, 16)
self.relu3 = nn.ReLU()
self.fc4 = nn.Linear(16, output_dim)
self.act = nn.Sigmoid()
def forward(self, text, text_lengths):
embedded = self.embedding(text)
#embedded = [batch size, sent_len, emb dim]
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'),batch_first=True)
packed_output, hidden = self.gru(packed_embedded)
hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)
dense_1=self.fc1(hidden)
x = self.relu1(dense_1)
x = self.fc2(x)
x = self.relu2(x)
x = self.fc3(x)
x = self.relu3(x)
dense_outputs = self.fc4(x)
#Final activation function
outputs=self.act(dense_outputs)
return outputs
instantiating the model:
size_of_vocab = len(TEXT.vocab)
embedding_dim = 300
num_hidden_nodes = 256
num_output_nodes = 2
num_layers = 4
bidirection = True
dropout = 0.2
model = classifier(size_of_vocab, embedding_dim, num_hidden_nodes,num_output_nodes, num_layers,
bidirectional = True, dropout = dropout).to(device)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model):,} trainable parameters')
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)
print(pretrained_embeddings.shape)
Optimizer and criterion used:
optimizer = optim.Adam(model.parameters())
criterion = nn.BCELoss()
model = model.to(device)
criterion = criterion.to(device)
Training function:
import torchmetrics as tm
metrics = tm.Accuracy()
def train(model, iterator, optimizer, criterion):
#initialize every epoch
epoch_loss = 0
epoch_acc = 0
#set the model in training phase
model.train()
for batch in iterator:
#resets the gradients after every batch
optimizer.zero_grad()
#retrieve text and no. of words
text, text_lengths = batch.text
#convert to 1D tensor
predictions = model(text, text_lengths).squeeze()
#compute the loss
loss = criterion(predictions, batch.label)
#compute the binary accuracy
# acc = binary_accuracy(predictions, batch.label)
acc = metrics(predictions,batch.label)
#backpropage the loss and compute the gradients
loss.backward()
#update the weights
optimizer.step()
#loss and accuracy
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
Full error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-60-eeabf5bacadf> in <module>()
5
6 #train the model
----> 7 train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
8
9 #evaluate the model
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
2906 raise ValueError(
2907 "Using a target size ({}) that is different to the input size ({}) is deprecated. "
-> 2908 "Please ensure they have the same size.".format(target.size(), input.size())
2909 )
2910
ValueError: Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 2])) is deprecated. Please ensure they have the same size.

What you want is CrossEntropyLoss instead of BCELoss.

Get the internal states of LSTM layer after training and initialize the LSTM layer with saved internal state after prediction or before next training?

I have created a model with an LSTM layer as shown below and want to get the internal state (hidden state and cell state) after the training step and save it. After the training step, I will use the network for a prediction and want to reinitialize the LSTM with the saved internal state before the next training step. This way I can continue from the same point after each training step. I haven't been able to find something helpful for the current version of tensoflow, i.e 2.x.
import tensorflow as tf
class LTSMNetwork(object):
def __init__(self, num_channels, num_hidden_neurons, learning_rate, time_steps, batch_size):
self.num_channels = num_channels
self.num_hidden_neurons = num_hidden_neurons
self.learning_rate = learning_rate
self.time_steps = time_steps
self.batch_size =batch_size
def lstm_model(self):
self.model = tf.keras.Sequential()
self.model.add(tf.keras.layers.LSTM(batch_input_shape=(self.batch_size, self.time_steps, self.num_channels),
units=self.num_hidden_neurons[0],
activation='tanh', recurrent_activation='sigmoid',
return_sequences=True, stateful=True))
#self.model.add(tf.keras.layers.LSTM(units=self.num_hidden_neurons[1], stateful=True))
hidden_layer = tf.keras.layers.Dense(units=self.num_hidden_neurons[1], activation=tf.nn.sigmoid)
self.model.add(hidden_layer)
self.model.add(tf.keras.layers.Dense(units=self.num_channels, name="output_layer", activation=tf.nn.tanh))
self.model.compile(optimizer=tf.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse', metrics=['binary_accuracy'])
return self.model
if __name__=='__main__':
num_channels = 3
num_hidden_neurons = [150, 100]
learning_rate = 0.001
time_steps = 1
batch_size = 1
lstm_network = LTSMNetwork(num_channels=num_channels, num_hidden_neurons=num_hidden_neurons,
learning_rate=learning_rate, time_steps=time_steps, batch_size=batch_size)
model = lstm_network.lstm_model()
model.summary()

You can define a custom Callback and save the hidden and cell states at every epoch for example. Afterwards, you can choose from which epoch you want to extract the states and then use lstm_layer.reset_states(*) to set the initial state again:
import tensorflow as tf
class LTSMNetwork(object):
def __init__(self, num_channels, num_hidden_neurons, learning_rate, time_steps, batch_size):
self.num_channels = num_channels
self.num_hidden_neurons = num_hidden_neurons
self.learning_rate = learning_rate
self.time_steps = time_steps
self.batch_size =batch_size
def lstm_model(self):
self.model = tf.keras.Sequential()
self.model.add(tf.keras.layers.LSTM(batch_input_shape=(self.batch_size, self.time_steps, self.num_channels),
units=self.num_hidden_neurons[0],
activation='tanh', recurrent_activation='sigmoid',
return_sequences=True, stateful=True))
hidden_layer = tf.keras.layers.Dense(units=self.num_hidden_neurons[1], activation=tf.nn.sigmoid)
self.model.add(hidden_layer)
self.model.add(tf.keras.layers.Dense(units=self.num_channels, name="output_layer", activation=tf.nn.tanh))
self.model.compile(optimizer=tf.optimizers.Adam(learning_rate=self.learning_rate),
loss='mse', metrics=['binary_accuracy'])
return self.model
states = {}
class CustomCallback(tf.keras.callbacks.Callback):
def __init__(self, lstm_layer):
self.lstm_layer = lstm_layer
def on_epoch_end(self, epoch, logs=None):
states[epoch] = lstm_layer.states
num_channels = 3
num_hidden_neurons = [150, 100]
learning_rate = 0.001
time_steps = 1
batch_size = 1
lstm_network = LTSMNetwork(num_channels=num_channels, num_hidden_neurons=num_hidden_neurons,
learning_rate=learning_rate, time_steps=time_steps, batch_size=batch_size)
model = lstm_network.lstm_model()
lstm_layer = model.layers[0]
x = tf.random.normal((1, 1, 3))
y = tf.random.normal((1, 1, 3))
model.fit(x, y, epochs=5, callbacks=[CustomCallback(lstm_layer)])
model.summary()
lstm_layer.reset_states(states[0]) # Sets hidden state from first epoch.
States consists of 5 internal states for each of the 5 epochs.

I have managed to save the internal state of the LSTM after the training step and reinitialize the LSTM with the saved internal states before the next training step.
You can create a variable and set its value to the currently stored value in a variable. How can I copy a variable in tensorflow
states_ = {}
# Save the hidden state
internal_state_h = lstm_layer.states[0]
v1 = tf.Variable(initial_value=np.zeros((1, 150)), dtype=tf.float32, shape=(1, 150))
copy_state_h = v1.assign(internal_state_h)
# Save the cell state
internal_state_c = lstm_layer.states[1]
v2 = tf.Variable(initial_value=np.zeros((1, 150)), dtype=tf.float32, shape=(1, 150))
copy_state_c = v2.assign(internal_state_c)
# Create a tuple and add it to the dictionary
states_[0] = (copy_state_h, copy_state_c)
# Reset the internal state
lstm_layer.reset_states(states_[0])
A call for prediction changes the internal states, however by following these steps, you can restore the internal states of RNN to what it was before the prediction.

Why does my function get good values for LSTM but not for GRU?

I'm trying to implement a program that compares LSTM's performance vs GRU's performance for word prediction. I am using the same parameters for both of them, however while I am getting good perplexity values for the LSTM, the GRU values I'm getting are absolutely terrible.
I recently attempted to debug the training function since it originally only ranfor the LSTM model but not for the GRU model. As I already said, both models should get similar values, however for now the LSTM models starts with around ~150 perplexity and converges to a normal value, when the GRU model starts with some random value that's in the 1000s that does not converge at all.
I am quite new for all the RNN, LSTM, and GRU stuff, so forgive me if there's something obvious that I am missing.
Any help would be appriciated!
I use the following two models:
class LSTM_Model(nn.Module):
def __init__(self, vocab_size, embed_size, hidden_size, num_layers, dropout=0):
super(LSTM_Model, self).__init__()
self.embed = nn.Embedding(vocab_size, embed_size)
self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True, dropout = dropout)
self.fc = nn.Linear(hidden_size, vocab_size)
def forward(self, x, hidden_state):
x = self.embed(x)
out, (hidden_state, cell_state) = self.lstm(x, hidden_state)
out = out.reshape(out.size(0)*out.size(1), out.size(2)) # Reshape output to (batch_size*sequence_length, hidden_size)
out = self.fc(out)
return out, (hidden_state, cell_state)
class GRU_Model(nn.Module):
def __init__(self, vocab_size, embed_size, hidden_size, num_layers, dropout=0):
super(GRU_Model, self).__init__()
self.embed = nn.Embedding(vocab_size, embed_size)
self.gru = nn.GRU(embed_size, hidden_size, num_layers, batch_first=True, dropout = dropout)
self.fc = nn.Linear(hidden_size, vocab_size)
def forward(self, x, hidden_state):
x = self.embed(x)
out, hidden_state = self.gru(x, hidden_state)
out = out.reshape(out.size(0)*out.size(1), out.size(2)) # Reshape output to (batch_size*sequence_length, hidden_size)
out = self.fc(out)
return out, hidden_state
Training function:
def run_model(model, epochs=epochs, learning_rate=learning_rate, clip=clip, momentum=momentum, LSTM=True, GRU=False, Dropout=False):
# Define loss criterion and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=step_size, gamma=decay_rate)
train_perplexity, test_perplexity, valid_perplexity = [], [], []
# Train the model
for e in range(epochs):
# Set all initial hidden and cell states to zeroes
train_states=init_states(LSTM, GRU, num_layers, batch_size, hidden_size)
test_states=init_states(LSTM, GRU, num_layers, batch_size, hidden_size)
valid_states=init_states(LSTM, GRU, num_layers, batch_size, hidden_size)
# RUN TRAINING SET #
model.train()
for i in range(0, ids.size(1) - seq_length, seq_length):
# Set train_inputs and train_targets
train_inputs = ids[:, i:i+seq_length].to(device)
train_targets = ids[:, (i+1):(i+1)+seq_length].to(device)
# Forward pass
model.zero_grad()
if(LSTM==True):
train_states = [state.detach() for state in train_states] # Detach the hidden state from how it was previously produced
if(GRU==True):
train_states = train_states.data #detach?
train_outputs, train_states = model(train_inputs, train_states)
train_loss = criterion(train_outputs, train_targets.reshape(-1))
# Backward and optimize
train_loss.backward()
clip_grad_norm_(model.parameters(), clip)
optimizer.step()
lr_scheduler.step()
model.eval()
with torch.no_grad():
#test and validation, removed to reduce length
model.train() # reset to train mode after iterating through validation data
train_perplexity.append(math.exp(train_loss.item()))
test_perplexity.append(np.exp(np.mean(test_losses)))
valid_perplexity.append(np.exp(np.mean(valid_losses)))
print('Epoch ' + str(e+1) + '/' + str(epochs) + ': ')
print('Train Perplexity - ' + str(train_perplexity[e]))
print('Test Perplexity - ' + str(test_perplexity[e]))
print('Validation Perplexity - ' + str(valid_perplexity[e]))
print("----------------------------------------------------")
return train_perplexity, test_perplexity, valid_perplexity
Hidden state initialization:
def init_states(LSTM, GRU, num_layers=num_layers, batch_size=batch_size, hidden_size=hidden_size):
if (LSTM==True):
return (torch.FloatTensor(num_layers, batch_size, hidden_size).uniform_(r1, r2).to(device),
torch.FloatTensor(num_layers, batch_size, hidden_size).uniform_(r1, r2).to(device))
if (GRU==True):
return torch.FloatTensor(num_layers, batch_size, hidden_size).uniform_(r1, r2).to(device)

Difference between Keras and Pytorch code with same architecture

I am building an Autoencoder model and I have found two snippets of code with the same architecture model which are in Keras and PyTorch. But when I run it, a large difference in time even though they are using the same architecture. Could you please explain why I am encountering such a huge difference in time as well as performance?
PyTorch Code
class Encoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=64):
super(Encoder, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dim
self.rnn1 = nn.LSTM(
input_size=n_features,
hidden_size=self.hidden_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=self.hidden_dim,
hidden_size=embedding_dim,
num_layers=1,
batch_first=True
)
def forward(self, x):
x = x.reshape((1, self.seq_len, self.n_features))
x, (_, _) = self.rnn1(x)
x, (hidden_n, _) = self.rnn2(x)
return hidden_n.reshape((self.n_features, self.embedding_dim))
class Decoder(nn.Module):
def __init__(self, seq_len, input_dim=64, n_features=1):
super(Decoder, self).__init__()
self.seq_len, self.input_dim = seq_len, input_dim
self.hidden_dim, self.n_features = 2 * input_dim, n_features
self.rnn1 = nn.LSTM(
input_size=input_dim,
hidden_size=input_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=input_dim,
hidden_size=self.hidden_dim,
num_layers=1,
batch_first=True
)
self.output_layer = nn.Linear(self.hidden_dim, n_features)
def forward(self, x):
x = x.repeat(self.seq_len, self.n_features)
x = x.reshape((self.n_features, self.seq_len, self.input_dim))
x, (hidden_n, cell_n) = self.rnn1(x)
x, (hidden_n, cell_n) = self.rnn2(x)
x = x.reshape((self.seq_len, self.hidden_dim))
return self.output_layer(x)
class RecurrentAutoencoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=64):
super(RecurrentAutoencoder, self).__init__()
self.encoder = Encoder(seq_len, n_features, embedding_dim).to(device)
self.decoder = Decoder(seq_len, embedding_dim, n_features).to(device)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
model = RecurrentAutoencoder(seq_len, n_features, 128)
import time
time_dict = {}
def train_model(model, train_dataset, val_dataset, n_epochs):
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.L1Loss(reduction='sum').to(device)
history = dict(train=[], val=[])
best_model_wts = copy.deepcopy(model.state_dict())
best_loss = 10000.0
time_dict[0] = time.time()
for epoch in range(1, n_epochs + 1):
model = model.train()
train_losses = []
for seq_true in train_dataset:
optimizer.zero_grad()
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
val_losses = []
model = model.eval()
with torch.no_grad():
for seq_true in val_dataset:
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
val_losses.append(loss.item())
train_loss = np.mean(train_losses)
val_loss = np.mean(val_losses)
history['train'].append(train_loss)
history['val'].append(val_loss)
if val_loss < best_loss:
best_loss = val_loss
best_model_wts = copy.deepcopy(model.state_dict())
print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')
stop = time.time()
time_dict[epoch] = stop
model.load_state_dict(best_model_wts)
return model.eval(), history
model, history = train_model(
model,
train_dataset,
val_dataset,
n_epochs=10)
Output of print(model)
RecurrentAutoencoder( (encoder): Encoder(
(rnn1): LSTM(1, 256, batch_first=True)
(rnn2): LSTM(256, 128, batch_first=True) )
(decoder): Decoder(
(rnn1): LSTM(128, 128, batch_first=True)
(rnn2): LSTM(128, 256, batch_first=True)
(output_layer): Linear(in_features=256, out_features=1, bias=True) ) )
Keras Code
model = keras.Sequential()
model.add(keras.layers.LSTM(
units=256,
input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(keras.layers.LSTM(units=128, return_sequences=True))
model.add(keras.layers.LSTM(units=256, return_sequences=True))
model.add(keras.layers.TimeDistributed(keras.layers.Dense(units=X_train.shape[2])))
model.compile(loss='mae', optimizer='adam')
nb_epoch = 10
autoencoder.compile(optimizer='adam',
loss='mean_squared_error')
tensorboard = TensorBoard(log_dir='/tmp/logs',
histogram_freq=0,
write_graph=True, #to visualize
write_images=True)
history = autoencoder.fit(X_train, X_train,
epochs=nb_epoch,
shuffle=True,
validation_data=(X_validate, X_validate),
verbose=1,
callbacks=[tensorboard]).history
Output of print(model.summary())
Layer (type) Output Shape Param #
lstm_6 (LSTM) (None, 1, 256) 266240
lstm_7 (LSTM) (None, 1, 128) 197120
lstm_8 (LSTM) (None, 1, 256) 394240
time_distributed (TimeDistri (None, 1, 3) 771
The architecture of both these models look same, but there is a difference in performance and training time.

Test Loss looks weird when plotted

I am using LSTM in-order to perform binary-classification, when I plot the test-loss it is not reducing over time.It is rather fluctuating a lot over time and looks extremely weird.The training loss on the other hand looks normal and is decreasing over time.
Here's a Picture of how it looks
This is my code of the model definition and configuration.
# Create LSTM Model
class LSTMModel(nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
super(LSTMModel, self).__init__()
# Number of hidden dimensions
self.hidden_dim = hidden_dim
# Number of hidden layers
self.layer_dim = layer_dim
# LSTM
self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True, dropout=0.2)
# Readout layer
self.f1 = nn.Linear(hidden_dim, output_dim)
self.softmax = nn.Sigmoid()
def forward(self, x):
# Initialize hidden state with zeros
h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).type(torch.FloatTensor).cuda())
# Initialize cell state
c0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim).type(torch.FloatTensor).cuda())
out, (hn, cn) = self.lstm(x, (h0,c0))
out = self.f1(hn[-1])
out = self.softmax(out)
return out
#LSTM Configuration
batch_size = 10000
num_epochs = 200
learning_rate = 0.001#Try lowering the rate
# Create LSTM
input_dim = 1 # input dimension
hidden_dim = 50 # hidden layer dimension
layer_dim =2 # number of hidden layers
output_dim = 1 # output dimension
model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
model.cuda()
error = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
This is my code for training and testing
from tensorboardcolab import TensorBoardColab
globaliter = 0
globaliter2=0
tb = TensorBoardColab()
for epoch in tqdm(range(num_epochs)):
# Train
model.train()
for i, (inputs, targets) in enumerate(train_loader):
train = Variable(inputs.type(torch.FloatTensor).cuda())
targets = Variable(targets.type(torch.FloatTensor).cuda())
optimizer.zero_grad()
outputs = model(train)
loss = error(outputs, targets)
loss_list_train.append(loss.item())
loss.backward()
optimizer.step()
tb.save_value('Train Loss', 'train_loss', globaliter, loss.item())
globaliter += 1
tb.flush_line('train_loss')
# Test
model.eval()
for inputs, targets in test_loader:
inputs = Variable(inputs.type(torch.FloatTensor).cuda())
targets = Variable(targets.type(torch.FloatTensor).cuda())
outputs = model(inputs)
loss_test = error(outputs, targets)
loss_list_test.append(loss_test.item())
tb.save_value('Test Loss', 'test_loss', globaliter2, loss_test.item())
globaliter2 += 1
tb.flush_line('test_loss')
I'd really be grateful if someone helped me figure this out, or offered suggestions or advice

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Building LSTM model in pytorch equivalent to another one in keras - python

Related

Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 2])) is deprecated

Get the internal states of LSTM layer after training and initialize the LSTM layer with saved internal state after prediction or before next training?

Why does my function get good values for LSTM but not for GRU?

Difference between Keras and Pytorch code with same architecture

Test Loss looks weird when plotted

Categories

Resources