I am training a multilabel classifier in keras ,i want to convert that to pytorch , i am mostly confused about how to handle the loss ,this is what the code looks like
model = keras.applications.densenet.DenseNet121(include_top=False, input_shape=(224, 224, 3))
x = model.output
x = Flatten()(x)
x = Dense(512)(x)
x = Activation('relu')(x)
x = Dropout(0.5)(x)
output1 = Dense(1, activation = 'sigmoid')(x)
output2 = Dense(1, activation = 'sigmoid')(x)
output3 = Dense(1, activation = 'sigmoid')(x)
output4 = Dense(1, activation = 'sigmoid')(x)
output5 = Dense(1, activation = 'sigmoid')(x)
output6 = Dense(1, activation = 'sigmoid')(x)
output7 = Dense(1, activation = 'sigmoid')(x)
output8 = Dense(1, activation = 'sigmoid')(x)
model = Model(model.inputs,[output1,output2,output3,output4,output5, output6, output7, output8])
# print(model.summary())
model.compile(optimizers.rmsprop(lr = 0.0001, decay = 1e-6),
loss = ["binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy", "binary_crossentropy","binary_crossentropy","binary_crossentropy","binary_crossentropy"],metrics = ["accuracy"])
How can i do this in pytorch ,this is what i have till now
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
self.conv2_drop = nn.Dropout2d()
self.fc1 = nn.Linear(2304, 256)
self.fc2 = nn.Linear(256, 8)
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(x.size(0), -1) # Flatten layer
x = F.relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.sigmoid(x)
model = Net().cuda()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
def train(epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
# data, target = data.cuda(async=True), target.cuda(async=True) # On GPU
data, target = Variable(data).cuda(), Variable(target).float().cuda()
optimizer.zero_grad()
output = model(data)
loss = F.binary_cross_entropy(output, target)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
for epoch in range(1, 10):
train(epoch)
Thanks in advance, any suggestions will be very helpful
Related
I have some code that I am trying to run. But I have a problem with the code. The code is from the internet page. My data is multiple sequence alignment of a specific protein. Firstly I applied one-hot encoding and then tried to put it into a variational autoencoder for training. But I failed. when I ran this code, I got the error message about ''' Input 0 of layer "encoder" is incompatible with the layer: expected shape=(None, 28, 28, 1), found shape=(None, 13, 1)''''
As I am a beginner in this field, I don't know where I made a mistake. would you please help with this issue? Or how to train it with multiple sequence alignment data?
###################################
datax = pd.DataFrame({'sequence':['ACKIENIKYKGKEVESKLGSQLIDIFNDLDRAKEEYDKLSSPEFIAKFGDWINDEVERNVNEDGEPLLIQDVRQDSSKHYFFILKNGERFDLLTR','---------TGKEVQSSLFDQILELVKDPQLAQDAYAQIRTPEFIEKFGDWINDPYESKLDDNGEPILIE-------------------------','-------PNTGKEVQSSLFDQIIELIKDPKLAQDAYEQVRTPEFIEKFGDWINDPYAAKLDDNGEPILVERD-----------------------','-------PNQGKEVESKLYNDLVNLTGSEKAADEAYEQVHHPNFLRWFGDWINNKVSNVVDENGEPLIV--------------------------','ACRIIISLNKGKEVKSKLFDSLLDLTQSEAKAEEAYKKVFSEEFINWFGDWINTPASKVVDENGEPLMVYRFTQEE-------------------','-------PNKGKEVRSKLYDDLLELTGDDNAALEAYKKVHHPNFLRWFGDWINNKVSKVVDENGEPLIVY-------------------------','-------PNKGKEVESKLYNDLVNLTGSEKAADEAYTKVHHPNFLRWFGDWMTNPSSKVVDENGEPKIV--------------------------','-------PNKGQRVDSILYNDLLSLTGNEKSADKAYTKAHTSSFLNWFGDWINTNIQDNVDQNGEPKV---------------------------','-----------------------------NLTSEQYKLVRTPEFIAWFGNWMDDPNASVIDENGEPLVCFH-GTDNSFHIF--------------','---------------------------------KQWVQVRTPAFIEWFGDWMNDPASKGVDENGEPLVVYHGTENKFTQYDFD------------','-------------------------------TPKQYKLVRTLEFKAWFGDWENDPASKVVDENGEPLVVYH--GTDSKHNVFSYEQ---------','--------------GSSLYEQYVVIIDSSNLTTEQYKLVRTPEFKKWFGDWENNPSEAVVDENGEPLVVYH------------------------','------------------------------LTPEQYKLVRTPEFKAWFGDWENNPSSKVVDDNGEPMVVY---HGSRKKAFTEFK----------']})
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
encoder = OneHotEncoder(sparse=False)
onehot = encoder.fit_transform(datax)
print(onehot)
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
latent_dim = 10
encoder_inputs = keras.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
encoder.summary()
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Dense(7 * 7 * 64, activation="relu")(latent_inputs)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)
decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()
class VAE(keras.Model):
def __init__(self, encoder, decoder, **kwargs):
super(VAE, self).__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
self.reconstruction_loss_tracker = keras.metrics.Mean(
name="reconstruction_loss"
)
self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")
#property
def metrics(self):
return [
self.total_loss_tracker,
self.reconstruction_loss_tracker,
self.kl_loss_tracker,
]
def train_step(self, data):
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(data)
reconstruction = self.decoder(z)
reconstruction_loss = tf.reduce_mean(
tf.reduce_sum(
keras.losses.binary_crossentropy(datax, reconstruction), axis=(1, 2)
)
)
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
total_loss = reconstruction_loss + kl_loss
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.total_loss_tracker.update_state(total_loss)
self.reconstruction_loss_tracker.update_state(reconstruction_loss)
self.kl_loss_tracker.update_state(kl_loss)
return {
"loss": self.total_loss_tracker.result(),
"reconstruction_loss": self.reconstruction_loss_tracker.result(),
"kl_loss": self.kl_loss_tracker.result(),
}
data_digits = np.concatenate([train, test], axis=0)
data_digits = np.expand_dims(data_digits, -1).astype("float32") / 255
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam())
vae.fit(mnist_digits, epochs=1, batch_size=128)
##########################################
I'm trying to train a cnn with pytorch. My error message I'm getting is:
Given groups=1, weight of size [8, 32, 3], expected input[1, 9999, 5024] to have 32 channels, but got 9999 channels instead
Before starting to train my architecture I hand in my data and label to the
images_batch = torch.from_numpy(np.array(X))
labels_batch = torch.from_numpy(np.array(y))
dataset_train = TensorDataset(X, y)
train_loader = DataLoader(dataset_train, batch_size=32, shuffle=True)
The dimension of X is (5024, 9999, 1) with 5024 being the number of instances, 9999 the sequence length. The dimension to y is (5024,1).
My current code for the model is the following:
class Model(nn.Module):
def __init__(self, **kwargs):
super().__init__()
self.conv1 = nn.Conv1d(32, 8, kernel_size=3, stride=1, padding=0)
self.conv2 = nn.Conv1d(8, 16, kernel_size=3, stride=1, padding=0)
self.conv3 = nn.Conv1d(16, 32, kernel_size=3, stride=1, padding=0)
#self.fc1 = nn.Linear(32, 2)
def forward(self, X):
X = F.relu(self.conv1(X))
X = F.relu(self.conv2(X))
X = F.max_pool2d(X,2)
X = self.conv3(X)
X = F.max_pool2d(X,2)
#X = self.fc1(X)
return F.softmax(X,dim =1)
device = torch.device('cuda') if torch.cuda.is_available() else 'cpu'
model = Model().to('cpu')
# loss and optimizer
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
def check_accuracy(loader, model):
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x,y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
_, predictions = scores.max(1)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy"
f" {float(num_correct) / float(num_samples) * 100:.2f}"
)
model.train()
check_accuracy(train_loader, model)
for epoch in range(num_epochs):
for batch_idx, (data, targets) in enumerate(train_loader):
data.to(device=device)
targets.to(device=device)
data = data.reshape(data[0],1)
scores = model(data)
loss = criterion(scores, targets)
optimizer.zero_grad()
loss.backward()
optimizer.step()
I'm aware that in tensorflow the ordering of the tensor is different than in pytorch.
I am trying to calculate the dot of gradients of the same layer of two different epochs but when I am using print(model.layer1[0].weight.grad) it returns
tensor([[[[-1.1855e-03, -3.7884e-03, -2.8973e-03, -2.8847e-03, -9.6510e-04],
[-2.0213e-03, -4.4927e-03, -5.4852e-03, -6.6060e-03, -3.5726e-03],
[ 7.4499e-04, -1.8440e-03, -5.0472e-03, -5.6322e-03, -1.9532e-03],
[-4.5696e-04, 9.6445e-04, -1.4923e-03, -2.9467e-03, -1.4610e-03],
[ 2.4987e-04, 2.2086e-03, -7.6576e-04, -2.7009e-03, -2.8571e-03]]],
[[[ 2.1447e-03, 3.1090e-03, 6.8175e-03, 6.4778e-03, 3.0501e-03],
[ 2.0214e-03, 3.9936e-03, 7.9528e-03, 6.0224e-03, 1.7545e-03],
[ 3.8781e-03, 5.6659e-03, 6.6901e-03, 5.4041e-03, 7.8014e-04],
[ 4.4273e-03, 3.4548e-03, 5.7185e-03, 4.1650e-03, 9.9067e-04],
[ 4.6075e-03, 4.1176e-03, 6.8392e-03, 3.4005e-03, 1.0009e-03]]],
[[[-3.8654e-04, -2.9567e-03, -6.1341e-03, -8.3991e-03, -8.2343e-03],
[-2.9113e-03, -5.4605e-03, -6.3008e-03, -8.2075e-03, -9.6702e-03],
[-1.5218e-03, -4.4105e-03, -5.5651e-03, -6.8926e-03, -6.6076e-03],
[-6.0357e-04, -3.1118e-03, -4.4441e-03, -4.0519e-03, -3.9733e-03],
[-2.8683e-04, -1.6281e-03, -4.2213e-03, -5.5304e-03, -5.0142e-03]]],
[[[-3.7607e-04, -1.7234e-04, -1.4569e-03, -3.5825e-04, 1.4530e-03],
[ 2.6226e-04, 8.5076e-04, 1.2195e-03, 2.7885e-03, 2.5953e-03],
[-7.7404e-04, 1.0984e-03, 7.8208e-04, 5.1286e-03, 4.6842e-03],
[-1.8183e-03, 8.9730e-04, 1.0955e-03, 4.9259e-03, 6.4677e-03],
[ 1.1674e-03, 4.0651e-03, 4.5886e-03, 8.3678e-03, 8.9893e-03]]],
Are that the gradients? If yes, why they are not vector? Below there is my neural network
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.layer2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2))
self.drop_out = nn.Dropout()
self.fc1 = nn.Linear(7 * 7 * 64, 1000)
self.fc2 = nn.Linear(1000, 10)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.drop_out(out)
out = self.fc1(out)
out = self.fc2(out)
return out
Below is the code of how I train and compute the gradients
model = ConvNet()
klisi=[]
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
loss_list = []
acc_list = []
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Run the forward pass
outputs = model(images)
loss = criterion(outputs, labels)
loss_list.append(loss.item())
# Backprop and perform Adam optimisation
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Track the accuracy
total = labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct = (predicted == labels).sum().item()
acc_list.append(correct / total)
if (i + 1) % 100 == 0:
print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, i + 1, total_step, loss.item(),
(correct / total) * 100))
print(model.layer1[0].weight.grad)
klisi.append(model.layer1[0].weight.grad)
print(optimizer.param_groups[0]['lr'])
optimizer.param_groups[0]['lr'] *= 0.9999
I am building an Autoencoder model and I have found two snippets of code with the same architecture model which are in Keras and PyTorch. But when I run it, a large difference in time even though they are using the same architecture. Could you please explain why I am encountering such a huge difference in time as well as performance?
PyTorch Code
class Encoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=64):
super(Encoder, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dim
self.rnn1 = nn.LSTM(
input_size=n_features,
hidden_size=self.hidden_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=self.hidden_dim,
hidden_size=embedding_dim,
num_layers=1,
batch_first=True
)
def forward(self, x):
x = x.reshape((1, self.seq_len, self.n_features))
x, (_, _) = self.rnn1(x)
x, (hidden_n, _) = self.rnn2(x)
return hidden_n.reshape((self.n_features, self.embedding_dim))
class Decoder(nn.Module):
def __init__(self, seq_len, input_dim=64, n_features=1):
super(Decoder, self).__init__()
self.seq_len, self.input_dim = seq_len, input_dim
self.hidden_dim, self.n_features = 2 * input_dim, n_features
self.rnn1 = nn.LSTM(
input_size=input_dim,
hidden_size=input_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=input_dim,
hidden_size=self.hidden_dim,
num_layers=1,
batch_first=True
)
self.output_layer = nn.Linear(self.hidden_dim, n_features)
def forward(self, x):
x = x.repeat(self.seq_len, self.n_features)
x = x.reshape((self.n_features, self.seq_len, self.input_dim))
x, (hidden_n, cell_n) = self.rnn1(x)
x, (hidden_n, cell_n) = self.rnn2(x)
x = x.reshape((self.seq_len, self.hidden_dim))
return self.output_layer(x)
class RecurrentAutoencoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=64):
super(RecurrentAutoencoder, self).__init__()
self.encoder = Encoder(seq_len, n_features, embedding_dim).to(device)
self.decoder = Decoder(seq_len, embedding_dim, n_features).to(device)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
model = RecurrentAutoencoder(seq_len, n_features, 128)
import time
time_dict = {}
def train_model(model, train_dataset, val_dataset, n_epochs):
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.L1Loss(reduction='sum').to(device)
history = dict(train=[], val=[])
best_model_wts = copy.deepcopy(model.state_dict())
best_loss = 10000.0
time_dict[0] = time.time()
for epoch in range(1, n_epochs + 1):
model = model.train()
train_losses = []
for seq_true in train_dataset:
optimizer.zero_grad()
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
val_losses = []
model = model.eval()
with torch.no_grad():
for seq_true in val_dataset:
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
val_losses.append(loss.item())
train_loss = np.mean(train_losses)
val_loss = np.mean(val_losses)
history['train'].append(train_loss)
history['val'].append(val_loss)
if val_loss < best_loss:
best_loss = val_loss
best_model_wts = copy.deepcopy(model.state_dict())
print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')
stop = time.time()
time_dict[epoch] = stop
model.load_state_dict(best_model_wts)
return model.eval(), history
model, history = train_model(
model,
train_dataset,
val_dataset,
n_epochs=10)
Output of print(model)
RecurrentAutoencoder( (encoder): Encoder(
(rnn1): LSTM(1, 256, batch_first=True)
(rnn2): LSTM(256, 128, batch_first=True) )
(decoder): Decoder(
(rnn1): LSTM(128, 128, batch_first=True)
(rnn2): LSTM(128, 256, batch_first=True)
(output_layer): Linear(in_features=256, out_features=1, bias=True) ) )
Keras Code
model = keras.Sequential()
model.add(keras.layers.LSTM(
units=256,
input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(keras.layers.LSTM(units=128, return_sequences=True))
model.add(keras.layers.LSTM(units=256, return_sequences=True))
model.add(keras.layers.TimeDistributed(keras.layers.Dense(units=X_train.shape[2])))
model.compile(loss='mae', optimizer='adam')
nb_epoch = 10
autoencoder.compile(optimizer='adam',
loss='mean_squared_error')
tensorboard = TensorBoard(log_dir='/tmp/logs',
histogram_freq=0,
write_graph=True, #to visualize
write_images=True)
history = autoencoder.fit(X_train, X_train,
epochs=nb_epoch,
shuffle=True,
validation_data=(X_validate, X_validate),
verbose=1,
callbacks=[tensorboard]).history
Output of print(model.summary())
Layer (type) Output Shape Param #
lstm_6 (LSTM) (None, 1, 256) 266240
lstm_7 (LSTM) (None, 1, 128) 197120
lstm_8 (LSTM) (None, 1, 256) 394240
time_distributed (TimeDistri (None, 1, 3) 771
The architecture of both these models look same, but there is a difference in performance and training time.
I'm trying to implement ResNet18 on pyTorch but I'm having some troubles with it. My code is this:
device = torch.device("cuda:0")
class ResnetBlock(nn.Module):
def __init__(self, strides, nf, nf0, reps, bn):
super(ResnetBlock, self).__init__()
self.adapt = strides == 2
self.layers = []
self.relus = []
self.adapt_layer = nn.Conv2d(nf0, nf, kernel_size=1, stride=strides, padding=0) if self.adapt else None
for i in range(reps):
self.layers.append(nn.Sequential(
nn.Conv2d(nf0, nf, kernel_size=3, stride=strides, padding=1),
nn.BatchNorm2d(nf, eps=0.001, momentum=0.99),
nn.ReLU(),
nn.Conv2d(nf, nf, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(nf, eps=0.001, momentum=0.99)))
self.relus.append(nn.ReLU())
strides = 1
nf0 = nf
def forward(self, x):
for i, (layer, relu) in enumerate(zip(self.layers, self.relus)):
rama = layer(x)
if self.adapt and i == 0:
x = self.adapt_layer(x)
x = x + rama
x = relu(x)
return x
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.MaxPool2d(kernel_size=2, stride=2))
self.blocks = nn.Sequential(
ResnetBlock(1, 64, 64, 2, bn),
ResnetBlock(2, 128, 64, 2, bn),
ResnetBlock(2, 256, 128, 2, bn),
ResnetBlock(2, 512, 256, 2, bn))
self.fcout = nn.Linear(512, 10)
def forward(self, x):
out = self.layer1(x)
out = self.blocks(out)
out = out.reshape(out.size(0), -1)
out = self.fcout(out)
return out
num_epochs = 50
num_classes = 10
batch_size = 50
learning_rate = 0.00001
trans = transforms.ToTensor()
train_dataset = torchvision.datasets.CIFAR10(root="./dataset_pytorch", train=True, download=True, transform=trans)
test_dataset = torchvision.datasets.CIFAR10(root="./dataset_pytorch", train=False, download=True, transform=trans)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
def weights_init(m):
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight.data)
nn.init.zeros_(m.bias.data)
model = ConvNet()
model.apply(weights_init)
model.to(device)
summary(model, (3,32,32))
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, eps=1e-6)
# Train the model
total_step = len(train_loader)
loss_list = []
acc_list = []
acc_list_test = []
for epoch in range(num_epochs):
total = 0
correct = 0
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
optimizer.zero_grad()
# Run the forward pass
outputs = model(images)
loss = criterion(outputs, labels)
loss_list.append(loss.item())
# Backprop and perform Adam optimisation
loss.backward()
optimizer.step()
# Track the accuracy
total += labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct += (predicted == labels).sum().item()
acc_list.append(correct / total)
print("Train")
print('Epoch [{}/{}], Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, (correct / total) * 100))
total_test = 0
correct_test = 0
for i, (images, labels) in enumerate(test_loader):
images = images.to(device)
labels = labels.to(device)
# Run the forward pass
outputs = model(images)
# Track the accuracy
total_test += labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct_test += (predicted == labels).sum().item()
acc_list_test.append(correct_test / total_test)
print("Test")
print('Epoch [{}/{}], Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, (correct_test / total_test) * 100))
It's weird because it's throwing me that error Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same even though I've moved both the model and the data to cuda.
I guess it's related with how I defined or used "ResnetBlock", because if I remove from ConvNet those blocks (removing the line out = self.blocks(out)), the code works. But I don't know what I'm doing wrong.
The problem is in this line:
model.to(device)
to is not in-place. It returns the converted model. You need to change it to:
model = model.to(device)
EDIT: Another problem: vanilla list cannot be tracked by PyTorch. You need to use nn.ModuleList.
From
self.layers = []
self.relus = []
To
self.layers = nn.ModuleList()
self.relus = nn.ModuleList()