Difference between Keras and Pytorch code with same architecture - python

I am building an Autoencoder model and I have found two snippets of code with the same architecture model which are in Keras and PyTorch. But when I run it, a large difference in time even though they are using the same architecture. Could you please explain why I am encountering such a huge difference in time as well as performance?
PyTorch Code
class Encoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=64):
super(Encoder, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dim
self.rnn1 = nn.LSTM(
input_size=n_features,
hidden_size=self.hidden_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=self.hidden_dim,
hidden_size=embedding_dim,
num_layers=1,
batch_first=True
)
def forward(self, x):
x = x.reshape((1, self.seq_len, self.n_features))
x, (_, _) = self.rnn1(x)
x, (hidden_n, _) = self.rnn2(x)
return hidden_n.reshape((self.n_features, self.embedding_dim))
class Decoder(nn.Module):
def __init__(self, seq_len, input_dim=64, n_features=1):
super(Decoder, self).__init__()
self.seq_len, self.input_dim = seq_len, input_dim
self.hidden_dim, self.n_features = 2 * input_dim, n_features
self.rnn1 = nn.LSTM(
input_size=input_dim,
hidden_size=input_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=input_dim,
hidden_size=self.hidden_dim,
num_layers=1,
batch_first=True
)
self.output_layer = nn.Linear(self.hidden_dim, n_features)
def forward(self, x):
x = x.repeat(self.seq_len, self.n_features)
x = x.reshape((self.n_features, self.seq_len, self.input_dim))
x, (hidden_n, cell_n) = self.rnn1(x)
x, (hidden_n, cell_n) = self.rnn2(x)
x = x.reshape((self.seq_len, self.hidden_dim))
return self.output_layer(x)
class RecurrentAutoencoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=64):
super(RecurrentAutoencoder, self).__init__()
self.encoder = Encoder(seq_len, n_features, embedding_dim).to(device)
self.decoder = Decoder(seq_len, embedding_dim, n_features).to(device)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
model = RecurrentAutoencoder(seq_len, n_features, 128)
import time
time_dict = {}
def train_model(model, train_dataset, val_dataset, n_epochs):
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.L1Loss(reduction='sum').to(device)
history = dict(train=[], val=[])
best_model_wts = copy.deepcopy(model.state_dict())
best_loss = 10000.0
time_dict[0] = time.time()
for epoch in range(1, n_epochs + 1):
model = model.train()
train_losses = []
for seq_true in train_dataset:
optimizer.zero_grad()
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
val_losses = []
model = model.eval()
with torch.no_grad():
for seq_true in val_dataset:
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
val_losses.append(loss.item())
train_loss = np.mean(train_losses)
val_loss = np.mean(val_losses)
history['train'].append(train_loss)
history['val'].append(val_loss)
if val_loss < best_loss:
best_loss = val_loss
best_model_wts = copy.deepcopy(model.state_dict())
print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')
stop = time.time()
time_dict[epoch] = stop
model.load_state_dict(best_model_wts)
return model.eval(), history
model, history = train_model(
model,
train_dataset,
val_dataset,
n_epochs=10)
Output of print(model)
RecurrentAutoencoder( (encoder): Encoder(
(rnn1): LSTM(1, 256, batch_first=True)
(rnn2): LSTM(256, 128, batch_first=True) )
(decoder): Decoder(
(rnn1): LSTM(128, 128, batch_first=True)
(rnn2): LSTM(128, 256, batch_first=True)
(output_layer): Linear(in_features=256, out_features=1, bias=True) ) )
Keras Code
model = keras.Sequential()
model.add(keras.layers.LSTM(
units=256,
input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(keras.layers.LSTM(units=128, return_sequences=True))
model.add(keras.layers.LSTM(units=256, return_sequences=True))
model.add(keras.layers.TimeDistributed(keras.layers.Dense(units=X_train.shape[2])))
model.compile(loss='mae', optimizer='adam')
nb_epoch = 10
autoencoder.compile(optimizer='adam',
loss='mean_squared_error')
tensorboard = TensorBoard(log_dir='/tmp/logs',
histogram_freq=0,
write_graph=True, #to visualize
write_images=True)
history = autoencoder.fit(X_train, X_train,
epochs=nb_epoch,
shuffle=True,
validation_data=(X_validate, X_validate),
verbose=1,
callbacks=[tensorboard]).history
Output of print(model.summary())
Layer (type) Output Shape Param #
lstm_6 (LSTM) (None, 1, 256) 266240
lstm_7 (LSTM) (None, 1, 128) 197120
lstm_8 (LSTM) (None, 1, 256) 394240
time_distributed (TimeDistri (None, 1, 3) 771
The architecture of both these models look same, but there is a difference in performance and training time.

Related

RuntimeError: size mismatch (got input: [6], target: [64])

I wrote a pytorch model for time series classification.But i've got a lot of errors.
batch_size = 64
trainloader = DataLoader(TensorDataset(x_train, y_train),
batch_size=batch_size,
shuffle=True)
testloader = DataLoader(TensorDataset(x_test, y_test),
batch_size=batch_size,
shuffle=False)
class LSTMClassifier(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, num_class):
super().__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.rnn = torch.nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
self.fc = torch.nn.Linear(hidden_dim, num_class)
self.batch_size = None
self.hidden = None
def forward(self, x):
x, (h, c) = self.rnn(x)
logits = self.fc(h[-1])
return logits
input_dim = x_train.shape[1]
hidden_dim = 256
layer_dim = 12
num_class = len(y_train.unique())
model = LSTMClassifier(input_dim, hidden_dim, layer_dim, num_class).to(device)
loss_fn = torch.nn.CrossEntropyLoss()
lr = 1e-3
optimizer = torch.optim.Adam(model.parameters(),lr=lr)
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X,y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
logits = model(X)
loss = loss_fn(logits, torch.max(y, 1)[1])
loss = loss_fn(logits,y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
epochs = 15
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(trainloader, model, loss_fn, optimizer)
test(testloader, model)
print("Done!")
I have done this and have this error
RuntimeError: size mismatch (got input: [6], target: [64])
on this line ----> 8 loss = loss_fn(logits, torch.max(y, 1)[1])
Please can anyone help me with this ?
What should I do to solve it ?

Building LSTM model in pytorch equivalent to another one in keras

I have a model in TensorFlow that I converted to Pytorch. I want to check If the two models are the same or if I'm mixing things up. Here is my code in tensofrlow:
model = Sequential()
model.add(
LSTM(5, input_shape=(4, 1000))
)
model.add(
Dense(1, activation='tanh')
)
model.compile(
loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']
)
model.fit(
X_train, y_train, epochs=100, batch_size=27
)
I built the equivalent model in Pytorch in this way:
class LSTM1(nn.Module):
def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
super(LSTM1, self).__init__()
self.num_classes = num_classes #number of classes
self.num_layers = num_layers #number of layers
self.input_size = input_size #input size
self.seq_length = seq_length #sequence length
self.hidden_size = hidden_size #hidden state
self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
num_layers=num_layers, batch_first=True) #lstm
self.fc = nn.Linear(self.hidden_size, num_classes) #fully connected last layer
self.sigmoid = nn.Sigmoid()
def forward(self,x):
h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
# Propagate input through LSTM
output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
out = self.sigmoid(hn)
out = self.fc(out) #Final Output
out = self.sigmoid(out)
return out
num_epochs = 100 #100 epochs
learning_rate = 0.001 #0.001 lr
input_size = 1000 #number of features
num_layers = 5 #number of stacked lstm layers
hidden_size = 1
num_classes = 1 #number of output classes
X_train = np.concatenate((X_phage, X_bac))
y_train = np.concatenate((np.ones(len(X_phage)), np.zeros(len(X_bac))))
X_train_tensors_final = Variable(torch.Tensor(X_train))
y_train_tensors = Variable(torch.Tensor(y_train))
print(X_train_tensors_final.shape)
model = LSTM1(num_classes, input_size, hidden_size, num_layers, X_train_tensors_final.shape[1]) #our lstm class
print("Training Shape", X_train_tensors_final.shape, y_train_tensors.shape)
print(model)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
outputs = model.forward(X_train_tensors_final) #forward pass
optimizer.zero_grad() #caluclate the gradient, manually setting to 0
# obtain the loss function
outputs = outputs[-20:]
y_train_tensors = y_train_tensors.type(torch.LongTensor)
y_train_tensors = torch.reshape(y_train_tensors, (20, 1))
loss = criterion(outputs, y_train_tensors.float())
loss.backward() #calculates the loss of the loss function
optimizer.step() #improve from loss, i.e backprop
train_acc = torch.sum((outputs > 0.5).bool().float() == y_train_tensors)
final_train_acc = train_acc/20
print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
print('Accuracy: ', final_train_acc)
My data is 20 instances, where each instance has 1000 features and 4 timesteps, therefore it is shaped like this (20,4,1000). However, When I run the two models, I got different results (different loss and accuracy). Therefore I'm afraid I have missed something and the models are not the same.
I would appreciate it if someone can validate that these models are the same and if they were then why I'm getting different results?

RuntimeError: Mat1 and mat2 shapes cannot be multiplied (256x65536 and 1024x4096) - Python

I am trying to use AlexNet to classify spectrogram images generated for 3s audio segments. I am aware that the input image to AlexNet must be 224x224 and have transformed the train and test datasets accordingly. I am encountering the following error: mat1 and mat2 shapes cannot be multiplied (256x65536 and 1024x4096 - see link for full error message) and I am not entirely sure why. Can anyone help me figure out where I am going wrong?
https://i.stack.imgur.com/cnVKp.png
Transform data
data_transform_train = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(256),
transforms.ToTensor(),
transforms.Normalize(norm_mean_train, norm_std_train),
])
Create dataloaders
train_size = int(len(train_data_df))
test_size = int(len(test_data_df))
ins_dataset_train = Audio(
df=train_data_df[:train_size],
transform=data_transform_train,
)
ins_dataset_test = Audio(
df=test_data_df[:test_size],
transform=data_transform_test,
)
train_loader = torch.utils.data.DataLoader(
ins_dataset_train,
batch_size=256,
shuffle=True
)
test_loader = torch.utils.data.DataLoader(
ins_dataset_test,
batch_size=256,
shuffle=True
)
AlexNet Model
class AlexNet(nn.Module):
def __init__(self, output_dim):
super().__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, 3, 2, 1), # in_channels, out_channels, kernel_size, stride, padding
nn.MaxPool2d(2), # kernel_size
nn.ReLU(inplace=True),
nn.Conv2d(64, 192, 3, padding=1),
nn.MaxPool2d(2),
nn.ReLU(inplace=True),
nn.Conv2d(192, 384, 3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, 3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, 3, padding=1),
nn.MaxPool2d(2),
nn.ReLU(inplace=True)
)
self.classifier = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(256 * 2 * 2, 4096),
nn.ReLU(inplace=True),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, output_dim),
)
def forward(self, x):
x = self.features(x)
h = x.view(x.shape[0], -1)
x = self.classifier(h)
return x, h
output_dim = 2
model = AlexNet(output_dim)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model):,} trainable parameters')
def initialize_parameters(m):
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
nn.init.constant_(m.bias.data, 0)
elif isinstance(m, nn.Linear):
nn.init.xavier_normal_(m.weight.data, gain=nn.init.calculate_gain('relu'))
nn.init.constant_(m.bias.data, 0)
model.apply(initialize_parameters)
Learning Rate Finder
class LRFinder:
def __init__(self, model, optimizer, criterion, device):
self.optimizer = optimizer
self.model = model
self.criterion = criterion
self.device = device
torch.save(model.state_dict(), 'init_params.pt')
def range_test(self, iterator, end_lr=10, num_iter=100, smooth_f=0.05, diverge_th=5):
lrs = []
losses = []
best_loss = float('inf')
lr_scheduler = ExponentialLR(self.optimizer, end_lr, num_iter)
iterator = IteratorWrapper(iterator)
for iteration in range(num_iter):
loss = self._train_batch(iterator)
lrs.append(lr_scheduler.get_last_lr()[0])
lr_scheduler.step()
if iteration > 0:
loss = smooth_f * loss + (1 - smooth_f) * losses[-1]
if loss < best_loss:
best_loss = loss
losses.append(loss)
if loss > diverge_th * best_loss:
print("Stopping early, the loss has diverged")
break
model.load_state_dict(torch.load('init_params.pt'))
return lrs, losses
def _train_batch(self, iterator):
self.model.train()
self.optimizer.zero_grad()
x, y = iterator.get_batch()
x = x.to(self.device)
y = y.to(self.device)
y_pred, _ = self.model(x)
loss = self.criterion(y_pred, y)
loss.backward()
self.optimizer.step()
return loss.item()
from torch.optim.lr_scheduler import _LRScheduler
class ExponentialLR(_LRScheduler):
def __init__(self, optimizer, end_lr, num_iter, last_epoch=-1):
self.end_lr = end_lr
self.num_iter = num_iter
super(ExponentialLR, self).__init__(optimizer, last_epoch)
def get_lr(self):
curr_iter = self.last_epoch
r = curr_iter / self.num_iter
return [base_lr * (self.end_lr / base_lr) ** r
for base_lr in self.base_lrs]
class IteratorWrapper:
def __init__(self, iterator):
self.iterator = iterator
self._iterator = iter(iterator)
def __next__(self):
try:
inputs, labels = next(self._iterator)
except StopIteration:
self._iterator = iter(self.iterator)
inputs, labels, *_ = next(self._iterator)
return inputs, labels
def get_batch(self):
return next(self)
start_learning_rate = 1e-7
optimizer = optim.Adam(model.parameters(), lr=start_learning_rate)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.CrossEntropyLoss()
model = model.to(device)
criterion = criterion.to(device)
END_LR = 10
NUM_ITER = 100[![enter image description here][1]][1]
lr_finder = LRFinder(model, optimizer, criterion, device)
lrs, losses = lr_finder.range_test(train_loader, END_LR, NUM_ITER)

Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 2])) is deprecated

I am trying to build a multiclass text classification using Pytorch and torchtext. but I am receiving this error whenever output in last hidden layer is 2, but running fine on 1 outputdim. I know there is a problem with batchsize and Data shape. What to do? I don't know the fix.
Constructing iterator:
#set batch size
BATCH_SIZE = 16
train_iterator, valid_iterator = BucketIterator.splits(
(train_data, valid_data),
batch_size = BATCH_SIZE,
sort_key = lambda x: len(x.text),
sort_within_batch=True,
device = device)
Model class:
class classifier(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
bidirectional, dropout):
super(classifier,self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.gru = nn.GRU(embedding_dim,
hidden_dim,
num_layers=n_layers,
bidirectional=bidirectional,
dropout=dropout,
batch_first=True)
self.fc1 = nn.Linear(hidden_dim * 2, 128)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(128, 64)
self.relu2 = nn.ReLU()
self.fc3 = nn.Linear(64, 16)
self.relu3 = nn.ReLU()
self.fc4 = nn.Linear(16, output_dim)
self.act = nn.Sigmoid()
def forward(self, text, text_lengths):
embedded = self.embedding(text)
#embedded = [batch size, sent_len, emb dim]
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'),batch_first=True)
packed_output, hidden = self.gru(packed_embedded)
hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)
dense_1=self.fc1(hidden)
x = self.relu1(dense_1)
x = self.fc2(x)
x = self.relu2(x)
x = self.fc3(x)
x = self.relu3(x)
dense_outputs = self.fc4(x)
#Final activation function
outputs=self.act(dense_outputs)
return outputs
instantiating the model:
size_of_vocab = len(TEXT.vocab)
embedding_dim = 300
num_hidden_nodes = 256
num_output_nodes = 2
num_layers = 4
bidirection = True
dropout = 0.2
model = classifier(size_of_vocab, embedding_dim, num_hidden_nodes,num_output_nodes, num_layers,
bidirectional = True, dropout = dropout).to(device)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model):,} trainable parameters')
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)
print(pretrained_embeddings.shape)
Optimizer and criterion used:
optimizer = optim.Adam(model.parameters())
criterion = nn.BCELoss()
model = model.to(device)
criterion = criterion.to(device)
Training function:
import torchmetrics as tm
metrics = tm.Accuracy()
def train(model, iterator, optimizer, criterion):
#initialize every epoch
epoch_loss = 0
epoch_acc = 0
#set the model in training phase
model.train()
for batch in iterator:
#resets the gradients after every batch
optimizer.zero_grad()
#retrieve text and no. of words
text, text_lengths = batch.text
#convert to 1D tensor
predictions = model(text, text_lengths).squeeze()
#compute the loss
loss = criterion(predictions, batch.label)
#compute the binary accuracy
# acc = binary_accuracy(predictions, batch.label)
acc = metrics(predictions,batch.label)
#backpropage the loss and compute the gradients
loss.backward()
#update the weights
optimizer.step()
#loss and accuracy
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
Full error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-60-eeabf5bacadf> in <module>()
5
6 #train the model
----> 7 train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
8
9 #evaluate the model
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
2906 raise ValueError(
2907 "Using a target size ({}) that is different to the input size ({}) is deprecated. "
-> 2908 "Please ensure they have the same size.".format(target.size(), input.size())
2909 )
2910
ValueError: Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 2])) is deprecated. Please ensure they have the same size.
What you want is CrossEntropyLoss instead of BCELoss.

Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

I'm trying to implement ResNet18 on pyTorch but I'm having some troubles with it. My code is this:
device = torch.device("cuda:0")
class ResnetBlock(nn.Module):
def __init__(self, strides, nf, nf0, reps, bn):
super(ResnetBlock, self).__init__()
self.adapt = strides == 2
self.layers = []
self.relus = []
self.adapt_layer = nn.Conv2d(nf0, nf, kernel_size=1, stride=strides, padding=0) if self.adapt else None
for i in range(reps):
self.layers.append(nn.Sequential(
nn.Conv2d(nf0, nf, kernel_size=3, stride=strides, padding=1),
nn.BatchNorm2d(nf, eps=0.001, momentum=0.99),
nn.ReLU(),
nn.Conv2d(nf, nf, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(nf, eps=0.001, momentum=0.99)))
self.relus.append(nn.ReLU())
strides = 1
nf0 = nf
def forward(self, x):
for i, (layer, relu) in enumerate(zip(self.layers, self.relus)):
rama = layer(x)
if self.adapt and i == 0:
x = self.adapt_layer(x)
x = x + rama
x = relu(x)
return x
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.MaxPool2d(kernel_size=2, stride=2))
self.blocks = nn.Sequential(
ResnetBlock(1, 64, 64, 2, bn),
ResnetBlock(2, 128, 64, 2, bn),
ResnetBlock(2, 256, 128, 2, bn),
ResnetBlock(2, 512, 256, 2, bn))
self.fcout = nn.Linear(512, 10)
def forward(self, x):
out = self.layer1(x)
out = self.blocks(out)
out = out.reshape(out.size(0), -1)
out = self.fcout(out)
return out
num_epochs = 50
num_classes = 10
batch_size = 50
learning_rate = 0.00001
trans = transforms.ToTensor()
train_dataset = torchvision.datasets.CIFAR10(root="./dataset_pytorch", train=True, download=True, transform=trans)
test_dataset = torchvision.datasets.CIFAR10(root="./dataset_pytorch", train=False, download=True, transform=trans)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
def weights_init(m):
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight.data)
nn.init.zeros_(m.bias.data)
model = ConvNet()
model.apply(weights_init)
model.to(device)
summary(model, (3,32,32))
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, eps=1e-6)
# Train the model
total_step = len(train_loader)
loss_list = []
acc_list = []
acc_list_test = []
for epoch in range(num_epochs):
total = 0
correct = 0
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
optimizer.zero_grad()
# Run the forward pass
outputs = model(images)
loss = criterion(outputs, labels)
loss_list.append(loss.item())
# Backprop and perform Adam optimisation
loss.backward()
optimizer.step()
# Track the accuracy
total += labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct += (predicted == labels).sum().item()
acc_list.append(correct / total)
print("Train")
print('Epoch [{}/{}], Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, (correct / total) * 100))
total_test = 0
correct_test = 0
for i, (images, labels) in enumerate(test_loader):
images = images.to(device)
labels = labels.to(device)
# Run the forward pass
outputs = model(images)
# Track the accuracy
total_test += labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct_test += (predicted == labels).sum().item()
acc_list_test.append(correct_test / total_test)
print("Test")
print('Epoch [{}/{}], Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, (correct_test / total_test) * 100))
It's weird because it's throwing me that error Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same even though I've moved both the model and the data to cuda.
I guess it's related with how I defined or used "ResnetBlock", because if I remove from ConvNet those blocks (removing the line out = self.blocks(out)), the code works. But I don't know what I'm doing wrong.
The problem is in this line:
model.to(device)
to is not in-place. It returns the converted model. You need to change it to:
model = model.to(device)
EDIT: Another problem: vanilla list cannot be tracked by PyTorch. You need to use nn.ModuleList.
From
self.layers = []
self.relus = []
To
self.layers = nn.ModuleList()
self.relus = nn.ModuleList()

Categories

Resources