Can't get dimensions right - CNN for text classification

Can't get dimensions right - CNN for text classification - python

This is my CNN class:
class CNN(nn.Module):
def __init__(
self,
vocab_size,
emb_dim,
out_channels,
kernel_sizes,
dropout,
):
super().__init__()
self.embedding = nn.Embedding(vocab_size, emb_dim)
self.conv_0 = nn.Conv2d(in_channels=1, out_channels=out_channels, kernel_size=(kernel_sizes[0], emb_dim), 2)
self.conv_1 = nn.Conv2d(in_channels=1, out_channels=out_channels, kernel_size=(kernel_sizes[1], emb_dim), 2)
self.conv_2 = nn.Conv2d(in_channels=1, out_channels=out_channels, kernel_size=(kernel_sizes[2], emb_dim), 2)
self.fc = nn.Linear(len(kernel_sizes) * out_channels, 1)
self.dropout = nn.Dropout(dropout)
def forward(self, text):
embedded = self.embedding(text)
print('embedded', embedded.shape)
embedded = embedded.unsqueeze(1) # may be reshape here
print('embedded', embedded.shape)
conved_0 = F.relu(self.conv_0(embedded)).squeeze(3) # may be reshape here
print('conved_0', conved_0.shape)
conved_1 = F.relu(self.conv_1(embedded)).squeeze(3) # may be reshape here
print('conved_1', conved_1.shape)
conved_2 = F.relu(self.conv_2(embedded)).squeeze(3) # may be reshape here
print('conved_2', conved_2.shape)
pooled_0 = F.max_pool1d(conved_0, conved_0.shape[2]).squeeze(2)
print('pooled_0', pooled_0.shape)
pooled_1 = F.max_pool1d(conved_1, conved_1.shape[2]).squeeze(2)
print('pooled_1', pooled_1.shape)
pooled_2 = F.max_pool1d(conved_2, conved_2.shape[2]).squeeze(2)
print('pooled_2', pooled_2.shape)
cat = self.dropout(torch.cat((pooled_0, pooled_1, pooled_2), dim=1))
print('cat', cat.shape)
return self.fc(cat)
Variables:
kernel_sizes = [3, 4, 5]
vocab_size = len(TEXT.vocab)
out_channels = 64
dropout = 0.2
dim = 300
model = CNN(vocab_size=vocab_size, emb_dim=dim, out_channels=out_channels,
kernel_sizes=kernel_sizes, dropout=dropout)
And training:
import numpy as np
min_loss = np.inf
cur_patience = 0
for epoch in range(1, max_epochs + 1):
train_loss = 0.0
model.train()
pbar = tqdm(enumerate(train_iter), total=len(train_iter), leave=False)
pbar.set_description(f"Epoch {epoch}")
for it, batch in pbar:
#YOUR CODE GOES HERE
opt.zero_grad()
input = batch.text[0].to(device)
output = model(input)
train_loss = loss_func(output, batch.label)
train_loss.backward()
opt.step()
train_loss /= len(train_iter)
val_loss = 0.0
model.eval()
pbar = tqdm(enumerate(valid_iter), total=len(valid_iter), leave=False)
pbar.set_description(f"Epoch {epoch}")
for it, batch in pbar:
# YOUR CODE GOES HERE
input = batch.text[0].to(device)
output = model(input)
val_loss = loss_fn(output, batch.label)
val_loss /= len(valid_iter)
if val_loss < min_loss:
min_loss = val_loss
best_model = model.state_dict()
else:
cur_patience += 1
if cur_patience == patience:
cur_patience = 0
break
print('Epoch: {}, Training Loss: {}, Validation Loss: {}'.format(epoch, train_loss, val_loss))
model.load_state_dict(best_model)
I get this error:
RuntimeError: Expected 4-dimensional input for 4-dimensional weight [64, 1, 3, 300], but got 3-dimensional input of size [894, 1, 300] instead
in this line:
---> 32 conved_0 = F.relu(self.conv_0(embedded)).squeeze(3)
I've tried using Conv1d, but still had problems with dimensions. Could somebody please explain what should I fix here for the network to train?
EDIT:
This is my class but with Conv1d:
class CNN(nn.Module):
def __init__(
self,
vocab_size,
emb_dim,
out_channels,
kernel_sizes,
dropout,
):
super().__init__()
self.embedding = nn.Embedding(vocab_size, emb_dim)
self.conv_0 = nn.Conv1d(in_channels=1, out_channels=out_channels, kernel_size=kernel_sizes[0])
self.conv_1 = nn.Conv1d(in_channels=1, out_channels=out_channels, kernel_size=kernel_sizes[1])
self.conv_2 = nn.Conv1d(in_channels=1, out_channels=out_channels, kernel_size=kernel_sizes[2])
self.fc = nn.Linear(len(kernel_sizes) * out_channels, 1)
self.dropout = nn.Dropout(dropout)
def forward(self, text):
embedded = self.embedding(text)
print('embedded', embedded.shape)
embedded = embedded.unsqueeze(1) # may be reshape here
print('embedded', embedded.shape)
conved_0 = F.relu(self.conv_0(embedded)) # may be reshape here
print('conved_0', conved_0.shape)
conved_1 = F.relu(self.conv_1(embedded)) # may be reshape here
print('conved_1', conved_1.shape)
conved_2 = F.relu(self.conv_2(embedded)) # may be reshape here
print('conved_2', conved_2.shape)
pooled_0 = F.max_pool1d(conved_0, conved_0.shape[2]).squeeze(2)
print('pooled_0', pooled_0.shape)
pooled_1 = F.max_pool1d(conved_1, conved_1.shape[2]).squeeze(2)
print('pooled_1', pooled_1.shape)
pooled_2 = F.max_pool1d(conved_2, conved_2.shape[2]).squeeze(2)
print('pooled_2', pooled_2.shape)
cat = self.dropout(torch.cat((pooled_0, pooled_1, pooled_2), dim=1))
print('cat', cat.shape)
return self.fc(cat)
Dimensions output:
embedded torch.Size([1115, 300])
embedded torch.Size([1115, 1, 300])
conved_0 torch.Size([1115, 64, 298])
conved_1 torch.Size([1115, 64, 297])
conved_2 torch.Size([1115, 64, 296])
pooled_0 torch.Size([1115, 64])
pooled_1 torch.Size([1115, 64])
pooled_2 torch.Size([1115, 64])
cat torch.Size([1115, 192])
Error:
ValueError: Target size (torch.Size([128])) must be the same as input size (torch.Size([1115, 1]))

What I was missing is that I had batch_first parameter set to True, which SWAPED batch_size and seq_len. Once I've set it to False, everything worked perfectly.

Related

Pythorch LSTM timeseries prediction hidden[0] size match error

I want to learn 100 for 3000days and predict 100 next day but I don't know why this error is happen and resolve this problem
please help me
batchsize = 100, hidden_dim = 10, seq_length = 60, data_dim = 100, output_dim = 100
class Net(nn.Module):
def __init__(self, input_dim, hidden_dim, batch_size, output_dim, layers):
super(Net, self).__init__()
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.output_dim = output_dim
self.layers = layers
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=layers,
batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim, bias = True)
self.hidden = self.reset_hidden_state()
def reset_hidden_state(self):
return (
torch.zeros(self.layers, self.batch_size, self.hidden_dim),
torch.zeros(self.layers, self.batch_size, self.hidden_dim))
def forward(self, x):
x, self.hidden = self.lstm(x, self.hidden)
x = self.fc(x[:, -1,:]) # [batch_size, seq_len, hidden_dim]
return x
# Train part
def train_model(model, train_df, num_epochs = None, lr = None, verbose = 10, patience = 10):
criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
nb_epochs = num_epochs
train_hist = np.zeros(nb_epochs)
for epoch in range(nb_epochs):
avg_cost = 0
total_batch = len(train_df)
for batch_idx, samples in enumerate(train_df):
x_train, y_train = samples
# seq별 hidden state reset
model.reset_hidden_state()
model.hidden = [hidden.to(device) for hidden in model.reset_hidden_state()]
outputs = model(x_train)
loss = criterion(outputs, y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_cost += loss/total_batch
train_hist[epoch] = avg_cost
if epoch % verbose == 0:
print('Epoch:', '%04d' % (epoch), 'train loss :', '{:.4f}'.format(avg_cost))
if (epoch % patience == 0) & (epoch != 0):
if train_hist[epoch-patience] < train_hist[epoch]:
print('\n Early Stopping')
break
return model.eval(), train_hist
prediction part
net = Net(data_dim, hidden_dim, batch , output_dim, 1).to(device)
model, train_hist = train_model(net, dataloader, num_epochs = nb_epochs, lr = learning_rate, verbose = 20, patience = 10)
with torch.no_grad():
pred = np.zeros((2940,1,100))
for pr in range(len(trainX_tensor)):
model.reset_hidden_state()
predicted = model(torch.unsqueeze(trainX_tensor[pr], 0)).cpu()
print(predicted.shape)
#predicted = predicted.item()
pred[pr,:,:] = predicted
#print(pr)
# INVERSE
pred_inverse = scaler.inverse_transform(pred)
Expected hidden[0] size (1, 1, 10), got [1, 100, 10]
how can i resolve hidden[0] size fix?

Very small grad for parameters in PyTorch

I'm implementing ELMo model (paper + GRU architecture) using pytorch on sentiment analysis task (2 classes).
My problem is after training model for 3 epochs (almost takes 7 hours), parameters are almost constant, I mean parameters get update but grad value for every parameter is almost zero and parameters updates so slow.
After training model for almost 100 samples (just for test and long time for every epoch) I printed model output on trained samples (64 sentences) and you can see all of outputs are almost 0.61 or 0.62 (models output before applying sigmoid is almost zero):
[0.6190, 0.6177, 0.6218, 0.6209, 0.6216, 0.6177, 0.6218, 0.6248, 0.6187,
0.6209, 0.6208, 0.6197, 0.6208, 0.6201, 0.6164, 0.6204, 0.6187, 0.6186,
0.6172, 0.6227, 0.6180, 0.6176, 0.6177, 0.6189, 0.6167, 0.6162, 0.6204,
0.6212, 0.6212, 0.6170, 0.6175, 0.6188, 0.6200, 0.6207, 0.6211, 0.6186,
0.6171, 0.6190, 0.6171, 0.6215, 0.6204, 0.6166, 0.6169, 0.6189, 0.6192,
0.6171, 0.6198, 0.6210, 0.6217, 0.6182, 0.6205, 0.6167, 0.6185, 0.6185,
0.6247, 0.6201, 0.6183, 0.6172, 0.6248, 0.6156, 0.6187, 0.6221, 0.6184,
0.6200]
mean grad value for first layer (character based embedding) in 7 iterations (with batch size 4):
-3.2057e-08
-1.0591e-07
8.0309e-10
-3.1149e-08
1.7176e-08
1.0479e-08
-5.9668e-08
loss values:
0.6922
0.6888
0.6932
0.6933
0.705
0.6812
0.7068
first layer parameters (before training):
Parameter containing:
tensor([[-0.8127, 0.0848, -1.8994, ..., -0.4188, 0.0737, 1.7480],
[-0.9858, 1.2334, -1.5336, ..., -0.1520, -0.8097, 1.5319],
[-0.3637, 0.2356, -0.6203, ..., -0.2677, 0.3540, -0.8167],
...,
[ 0.5995, 0.0444, 0.5533, ..., -0.6380, -0.2782, 0.4377],
[-1.1214, 0.1163, 0.6494, ..., 0.9082, 0.0925, -2.0435],
[ 1.1774, 2.0876, 1.2902, ..., 0.1933, 0.6906, -0.9966]],
device='cuda:0', requires_grad=True)
first layer parameters (after training on 1000 iterations):
Parameter containing:
tensor([[ 0.4986, -0.1885, -2.1546, ..., 1.6023, 1.0103, -0.0118],
[-0.2110, -0.0524, -0.5779, ..., -1.7709, -0.6997, 1.7685],
[-0.8088, -0.0187, 0.4958, ..., 0.2945, -0.8318, 0.5191],
...,
[ 0.0324, 0.6847, 0.7107, ..., -0.5620, 1.1643, -0.1883],
[ 0.3290, -1.5829, -1.2789, ..., -0.6205, -1.9693, -0.8639],
[ 1.1525, 1.1839, 1.4262, ..., 0.1396, -0.0622, -1.1427]],
device='cuda:0', requires_grad=True)
conv1d_embed module (Embedding + Convolution 1D):
class Conv1d_Embed(nn.Module):
def __init__(self, embed_dim, filters_list):
super(Conv1d_Embed, self).__init__()
self.filters_list = filters_list
self.embed = nn.Embedding(num_embeddings=chars_count, embedding_dim=embed_dim, device=device)
self.conv_list = nn.ModuleList(modules=None)
self.conv_norm_layer = nn.LayerNorm([100, np.sum(np.array(self.filters_list)[:, 0])])
for filter in filters_list:
conv = nn.Conv1d(in_channels=embed_dim, out_channels=filter[0], kernel_size=filter[1], stride=1, padding=0, dilation=1, device=device)
self.conv_list.append(conv)
def forward(self, X):
X = self.embed(X).permute(0, 1, 3, 2)
X_conv = torch.empty(size=(X.shape[0], X.shape[1], np.sum(np.array(self.filters_list)[:, 0])))
for sentence_idx in range(X.shape[0]):
idx_sum = 0
for convolution in self.conv_list:
torch.cuda.empty_cache()
conv_result = convolution(X[sentence_idx])
conv_result = torch.max(conv_result, dim=2).values
seq_columns = convolution.out_channels
X_conv[sentence_idx][:, idx_sum:idx_sum + seq_columns] = conv_result
idx_sum += seq_columns
X_conv = self.conv_norm_layer(X_conv)
X_conv = torch.relu(X_conv)
torch.cuda.empty_cache()
return X_conv
highway network module:
class Highway_Network(nn.Module):
def __init__(self, H_act:str, in_dim:int):
super(Highway_Network, self).__init__()
if H_act == 'relu': self.H_act = nn.ReLU()
elif H_act == 'tanh': self.H_act = nn.Tanh()
else: self.H_act = nn.Sigmoid()
self.in_dim = in_dim
self.H = nn.Linear(in_features=in_dim, out_features=in_dim, bias=False, device=device)
self.T = nn.Linear(in_features=in_dim, out_features=in_dim, bias=True, device=device)
def forward(self, X):
T = torch.sigmoid(self.T(X))
H = self.H_act(self.H(X))
y = (H * T) + (X * (1 - T))
torch.cuda.empty_cache()
return y
ELMo module:
class ELMo(nn.Module):
def __init__(self, in_dim_for_highway, embed_dim, filters_list, proj_size, rnn_hidden_size):
super(ELMo, self).__init__()
self.conv1d_embed = Conv1d_Embed(embed_dim, filters_list)
self.highway_layer1 = Highway_Network(H_act='tanh', in_dim=in_dim_for_highway)
self.highway_layer2 = Highway_Network(H_act='tanh', in_dim=in_dim_for_highway)
self.proj_after_highway = nn.Linear(in_features=in_dim_for_highway, out_features=proj_size, bias=True, device=device)
self.norm_after_highway = nn.LayerNorm([100, proj_size], device=device)
self.rnn_layer1_forward = nn.GRU(input_size=proj_size, hidden_size=rnn_hidden_size, num_layers=1, bias=True,
batch_first=True, dropout=0, bidirectional=False, device=device)
self.rnn_layer1_backward = nn.GRU(input_size=proj_size, hidden_size=rnn_hidden_size, num_layers=1, bias=True,
batch_first=True, dropout=0, bidirectional=False, device=device)
self.rnn_layer2_forward = nn.GRU(input_size=proj_size, hidden_size=rnn_hidden_size, num_layers=1, bias=True,
batch_first=True, dropout=0, bidirectional=False, device=device)
self.rnn_layer2_backward = nn.GRU(input_size=proj_size, hidden_size=rnn_hidden_size, num_layers=1, bias=True,
batch_first=True, dropout=0, bidirectional=False, device=device)
self.proj_after_rnn1_forward = nn.Linear(in_features=rnn_hidden_size, out_features=proj_size, bias=True, device=device)
self.proj_after_rnn1_backward = nn.Linear(in_features=rnn_hidden_size, out_features=proj_size, bias=True, device=device)
self.proj_after_rnn2_forward = nn.Linear(in_features=rnn_hidden_size, out_features=proj_size, bias=True, device=device)
self.proj_after_rnn2_backward = nn.Linear(in_features=rnn_hidden_size, out_features=proj_size, bias=True, device=device)
self.output_layer = nn.Linear(in_features=102400, out_features=1, bias=True, device=device)
def forward(self, X):
output = self.conv1d_embed(X).to(device)
output = self.highway_layer1(output)
output = self.highway_layer2(output)
output = self.proj_after_highway(output)
output = self.norm_after_highway(output)
output = torch.relu(output)
forward_output = self.rnn_layer1_forward(output)[0] # forward
forward_output = torch.relu(forward_output)
forward_output = self.proj_after_rnn1_forward(forward_output)
forward_output = torch.relu(forward_output)
backward_output = self.rnn_layer1_backward(torch.flip(output, dims=[1]))[0] # backward
backward_output = torch.relu(backward_output)
backward_output = self.proj_after_rnn1_backward(backward_output)
backward_output = torch.relu(backward_output)
forward_output = self.rnn_layer2_forward(forward_output)[0]
forward_output = torch.relu(forward_output)
forward_output = self.proj_after_rnn2_forward(forward_output)
forward_output = torch.relu(forward_output)
backward_output = self.rnn_layer2_backward(backward_output)[0]
backward_output = torch.relu(backward_output)
backward_output = self.proj_after_rnn2_backward(backward_output)
backward_output = torch.relu(backward_output)
backward_output = torch.flip(backward_output, dims=[1])
output = torch.concat((forward_output, backward_output), dim=2)
output = output.reshape((output.shape[0], output.shape[1] * output.shape[2]))
output = self.output_layer(output)
output = torch.sigmoid(output)
return output
some other details:
embed_dim = 50
model_location = 'drive/MyDrive/elmo_dataset_words_lower_100/elmo_model.mdl'
optimizer_location = 'drive/MyDrive/elmo_dataset_words_lower_100/elmo_optimizer.optm'
filters_list = [[32, 1], [32, 2], [64, 3], [128, 4], [256, 5], [512, 6], [1024, 7]]
in_dim_for_highway = np.sum(np.array(filters_list)[:, 0])
proj_size = 512
rnn_hidden_size = 4096
Feedforward + Backward module:
model = ELMo(in_dim_for_highway, embed_dim, filters_list, proj_size, rnn_hidden_size)
optimizer = optim.Adam(params=model.parameters(), lr=1e-5)
# model.load_state_dict(torch.load(model_location))
# optimizer.load_state_dict(torch.load(optimizer_location))
print(summary(model))
batch_size = 4
epochs = 5 # Started by 5
bce = nn.BCELoss()
new_slices = slices = pd.read_csv('drive/MyDrive/elmo_dataset_words_lower_100/slice_list.csv').drop(columns=['Unnamed: 0']) # slice 10 is for test
for slice_idx in range(len(slices)):
slice_path = slices.iloc[slice_idx, :].values[0]
print(f'Training ELMo on {slice_path}...')
dataset = np.load(slice_path)
labels = torch.Tensor(dataset['labels'].astype(np.float32)).to('cpu')
dataset = torch.Tensor(dataset['data']).type(torch.int32).to('cpu')
for label_idx in range(len(labels)):
if labels[label_idx] == -1: labels[label_idx] = 0
# elif labels[label_idx] == 0: labels[label_idx] = 1
elif labels[label_idx] == 1: labels[label_idx] = 1
dataset_size = dataset.shape[0]
dataset_loss = list()
idx = torch.randperm(dataset.shape[0])
dataset = dataset[idx] # Randomization
labels = labels[idx] # Randomization
for batch in range(batch_size, dataset.shape[0] + batch_size, batch_size):
optimizer.zero_grad()
X = dataset[batch - batch_size:batch].to(device)
y = labels[batch - batch_size:batch].to(device)
output = model(X).squeeze()
loss = bce(output, y)
loss.backward()
optimizer.step()
print(torch.mean(list(model.parameters())[0].grad))
loss_value = loss.item()
dataset_loss.append(loss_value)
print(f'Batch: {batch} - Loss: {loss_value} - Dataset size: {dataset_size}')
print('---------------------')
torch.save(model.state_dict(), model_location)
torch.save(optimizer.state_dict(), optimizer_location)
print(f'Dataset slice: {slice_path} - Loss: {np.mean(dataset_loss)}')
print(f'Trained model saved in {model_location}')
print(f'Optimizer saved in {optimizer_location}')
print('---------------------')
new_slices = new_slices.drop(index=slice_idx)
new_slices.to_csv('drive/MyDrive/elmo_dataset_words_lower_100/slice_list.csv')
del X, y, dataset, labels, output
collect()
I tested every hyper-parameter you think (batch size, learning rate, activation functions, projection size and etc) and checked labels.
What is problem? I think there is mistake in using pytorch modules like autograd...

PyTorch ValueError: Target size (torch.Size([64])) must be the same as input size (torch.Size([15]))

I'm currently using this repo to perform NLP and learn more about CNN's using my own dataset, and I keep running into an error regarding a shape mismatch:
ValueError: Target size (torch.Size([64])) must be the same as input size (torch.Size([15]))
10 }
11 for epoch in tqdm(range(params['epochs'])):
---> 12 train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
13 valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
14 epoch_mins, epoch_secs = epoch_time(start_time, end_time)
57 print("PredictionShapeAfter:")
58 print(predictions.shape)
---> 59 loss = criterion(predictions, batch.l)
60
61 acc = binary_accuracy(predictions, batch.l)
Doing some digging, I found that my CNN's prediction is a different size compared to the training data truth it's being compared to:
Input Shape:
torch.Size([15, 64])
Truth Shape:
torch.Size([64])
embedded unsqueezed: torch.Size([15, 1, 64, 100])
cat shape: torch.Size([15, 300])
Prediction Shape Before Squeeze:
torch.Size([15, 1])
PredictionShapeAfter:
torch.Size([15])
The model is making the prediction shape (the last value in this list) as the first dimension of the inputs. Is this a common problem and is there a way to rectify this issue?
My Model:
class CNN(nn.Module):
def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim,
dropout, pad_idx):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
self.convs = nn.ModuleList([
nn.Conv2d(in_channels = 1,
out_channels = n_filters,
kernel_size = (fs, embedding_dim))
for fs in filter_sizes
])
self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, text):
embedded = self.embedding(text)
embedded = embedded.unsqueeze(1)
print(f"embedded unsqueezed: {embedded.shape}")
conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
cat = self.dropout(torch.cat(pooled, dim = 1))
print(f"cat shape: {cat.shape}")
return self.fc(cat)
My Training function:
def train(model, iterator, optimizer, criterion):
epoch_loss = 0
epoch_acc = 0
model.train()
for batch in iterator:
optimizer.zero_grad()
print("InputShape:")
print(batch.t.shape)
print("Truth Shape:")
print(batch.l.shape)
predictions = model(batch.t)
print("Prediction Shape Before Squeeze:")
print(predictions.shape)
predictions = predictions.squeeze(1)
print("PredictionShapeAfter:")
print(predictions.shape)
loss = criterion(predictions, batch.l)
acc = binary_accuracy(predictions, batch.l)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
My full code can be found at this link.

Your issue is here:
self.convs = nn.ModuleList([
nn.Conv2d(in_channels = 1,
out_channels = n_filters,
kernel_size = (fs, embedding_dim))
for fs in filter_sizes
])
You are inputting data of shape [15, 1, 64, 100], which the convolutions are interpreting as batches of size 15, of 1-channel images of HxW 64x100.
What it appears you want is a batch of size 64, so swap those dimensions first:
...
embedded = embedded.swapdims(0,2)
conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
...

Pytorch: 1D target tensor expected, multi-target not supported

I want to train a 1D CNN on time series. I get the following error message 1D target tensor expected, multi-target not supported
Here is the code with simulated data corresponding to the structures of my data as well as the error message
import torch
from torch.utils.data import DataLoader
import torch.utils.data as data
import torch.nn as nn
import numpy as np
import random
from tqdm.notebook import tqdm
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
train_dataset = []
n_item = 20
for i in range(0,n_item):
train_data = np.random.uniform(-10, 10, 500)
train_dataset.append(train_data)
train_dataset = np.asarray(train_dataset)
train_dataset.shape
ecg_train = torch.from_numpy(train_dataset).float()
labels_train = np.random.randint(2, size=n_item)
labels_train = torch.from_numpy(labels_train).long()
val_dataset = []
n_item = 10
for i in range(0,n_item):
val_data = np.random.uniform(-10, 10, 500)
val_dataset.append(val_data)
val_dataset = np.asarray(val_dataset)
val_dataset.shape
ecg_validation = torch.from_numpy(val_dataset).float()
labels_validation = np.random.randint(2, size=n_item)
labels_validation = torch.from_numpy(labels_validation).long()
class ECGNet(data.Dataset):
"""ImageNet Limited dataset."""
def __init__(self, ecgs, labls, transform=None):
self.ecg = ecgs
self.target = labls
self.transform = transform
def __getitem__(self, idx):
ecgVec = self.ecg[idx] #.reshape(10, -1)
labelID = self.target[idx].reshape(1)
return ecgVec,labelID
def __len__(self):
return len(self.ecg)
train_data = ECGNet(ecg_train,
labels_train,
)
print("size of Training dataset: {}".format(len(train_data)))
validation_data = ECGNet(ecg_validation,
labels_validation,
)
print("size of Training dataset: {}".format(len(validation_data)))
batch_size = 1
train_dataloader = DataLoader(dataset = train_data,
batch_size=batch_size,
shuffle = True,
num_workers = 0)
val_dataloader = DataLoader(dataset = validation_data,
batch_size=batch_size,
shuffle = True,
num_workers = 0)
def train_epoch(model, train_dataloader, optimizer, loss_fn):
losses = []
correct_predictions = 0
# Iterate mini batches over training dataset
for images, labels in tqdm(train_dataloader):
images = images.to(device)
#labels = labels.squeeze_()
labels = labels.to(device)
#labels = labels.to(device=device, dtype=torch.int64)
# Run predictions
output = model(images)
# Set gradients to zero
optimizer.zero_grad()
# Compute loss
loss = loss_fn(output, labels)
# Backpropagate (compute gradients)
loss.backward()
# Make an optimization step (update parameters)
optimizer.step()
# Log metrics
losses.append(loss.item())
predicted_labels = output.argmax(dim=1)
correct_predictions += (predicted_labels == labels).sum().item()
accuracy = 100.0 * correct_predictions / len(train_dataloader.dataset)
# Return loss values for each iteration and accuracy
mean_loss = np.array(losses).mean()
return mean_loss, accuracy
def evaluate(model, dataloader, loss_fn):
losses = []
correct_predictions = 0
with torch.no_grad():
for images, labels in dataloader:
images = images.to(device)
#labels = labels.squeeze_()
labels = labels.to(device=device, dtype=torch.int64)
# Run predictions
output = model(images)
# Compute loss
loss = loss_fn(output, labels)
# Save metrics
predicted_labels = output.argmax(dim=1)
correct_predictions += (predicted_labels == labels).sum().item()
losses.append(loss.item())
mean_loss = np.array(losses).mean()
accuracy = 100.0 * correct_predictions / len(dataloader.dataset)
# Return mean loss and accuracy
return mean_loss, accuracy
def train(model, train_dataloader, val_dataloader, optimizer, n_epochs, loss_function):
# We will monitor loss functions as the training progresses
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []
for epoch in range(n_epochs):
model.train()
train_loss, train_accuracy = train_epoch(model, train_dataloader, optimizer, loss_fn)
model.eval()
val_loss, val_accuracy = evaluate(model, val_dataloader, loss_fn)
train_losses.append(train_loss)
val_losses.append(val_loss)
train_accuracies.append(train_accuracy)
val_accuracies.append(val_accuracy)
print('Epoch {}/{}: train_loss: {:.4f}, train_accuracy: {:.4f}, val_loss: {:.4f}, val_accuracy: {:.4f}'.format(epoch+1, n_epochs,
train_losses[-1],
train_accuracies[-1],
val_losses[-1],
val_accuracies[-1]))
return train_losses, val_losses, train_accuracies, val_accuracies
class Simple1DCNN(torch.nn.Module):
def __init__(self):
super(Simple1DCNN, self).__init__()
self.layer1 = torch.nn.Conv1d(in_channels=50,
out_channels=20,
kernel_size=5,
stride=2)
self.act1 = torch.nn.ReLU()
self.layer2 = torch.nn.Conv1d(in_channels=20,
out_channels=10,
kernel_size=1)
self.fc1 = nn.Linear(10* 3, 2)
def forward(self, x):
print(x.shape)
x = x.view(1, 50,-1)
print(x.shape)
x = self.layer1(x)
print(x.shape)
x = self.act1(x)
print(x.shape)
x = self.layer2(x)
print(x.shape)
x = x.view(1,-1)
print(x.shape)
x = self.fc1(x)
print(x.shape)
print(x)
return x
model_a = Simple1DCNN()
model_a = model_a.to(device)
criterion = nn.CrossEntropyLoss()
loss_fn = torch.nn.CrossEntropyLoss()
n_epochs_a = 50
learning_rate_a = 0.01
alpha_a = 1e-5
momentum_a = 0.9
optimizer = torch.optim.SGD(model_a.parameters(),
momentum = momentum_a,
nesterov = True,
weight_decay = alpha_a,
lr=learning_rate_a)
train_losses_a, val_losses_a, train_acc_a, val_acc_a = train(model_a,
train_dataloader,
val_dataloader,
optimizer,
n_epochs_a,
loss_fn
)
Error message:
cpu
size of Training dataset: 20
size of Training dataset: 10
0%| | 0/20 [00:00<?, ?it/s]
torch.Size([1, 500])
torch.Size([1, 50, 10])
torch.Size([1, 20, 3])
torch.Size([1, 20, 3])
torch.Size([1, 10, 3])
torch.Size([1, 30])
torch.Size([1, 2])
tensor([[ 0.5785, -1.0169]], grad_fn=<AddmmBackward>)
Traceback (most recent call last):
File "SO_question.py", line 219, in <module>
train_losses_a, val_losses_a, train_acc_a, val_acc_a = train(model_a,
File "SO_question.py", line 137, in train
train_loss, train_accuracy = train_epoch(model, train_dataloader, optimizer, loss_fn)
File "SO_question.py", line 93, in train_epoch
loss = loss_fn(output, labels)
File "/Users/mymac/Documents/programming/python/mainenv/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/Users/mymac/Documents/programming/python/mainenv/lib/python3.8/site-packages/torch/nn/modules/loss.py", line 961, in forward
return F.cross_entropy(input, target, weight=self.weight,
File "/Users/mymac/Documents/programming/python/mainenv/lib/python3.8/site-packages/torch/nn/functional.py", line 2468, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
File "/Users/mymac/Documents/programming/python/mainenv/lib/python3.8/site-packages/torch/nn/functional.py", line 2264, in nll_loss
ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
RuntimeError: 1D target tensor expected, multi-target not supported
What am I doing wrong?

You are using nn.CrossEntropyLoss as the criterion for your training. You correctly passed the labels as indices of the ground truth class: 0s and 1s. However, as the error message suggests, it needs to be a 1D tensor!
Simply remove the reshape in ECGNet's __getitem__:
def __getitem__(self, idx):
ecgVec = self.ecg[idx]
labelID = self.target[idx]
return ecgVec,labelID
Edit
I want to increase the batch_size to 8. But now I get the error [...]
You are doing a lot of broadcasting (flattening) which surely will affect the batch size. As a general rule of thumb never fiddle with axis=0. For instance, if you have an input shape of (8, 500), straight off you have a problem when doing x.view(1, 50, -1). Since the resulting tensor will be (1, 50, 80) (the desired shape would have been (8, 50, 10)). Instead, you could broadcast with x.view(x.size(0), 50, -1).
Same with x.view(1, -1) later down forward. You are looking to flatten the tensor, but you should not flatten it along with the batches, they need to stay separated! It's safer to use torch.flatten, yet I prefer nn.Flatten which flattens from axis=1 to axis=-1 by default.
My personal advice is to start with a simple setup (without train loops etc...) to verify the architecture and intermediate output shapes. Then, add the necessary logic to handle the training.
class ECGNet(data.Dataset):
"""ImageNet Limited dataset."""
def __init__(self, ecgs, labls, transform=None):
self.ecg = ecgs
self.target = labls
self.transform = transform
def __getitem__(self, idx):
ecgVec = self.ecg[idx]
labelID = self.target[idx]
return ecgVec, labelID
def __len__(self):
return len(self.ecg)
class Simple1DCNN(nn.Module):
def __init__(self):
super(Simple1DCNN, self).__init__()
self.layer1 = nn.Conv1d(in_channels=50,
out_channels=20,
kernel_size=5,
stride=2)
self.act1 = nn.ReLU()
self.layer2 = nn.Conv1d(in_channels=20,
out_channels=10,
kernel_size=1)
self.fc1 = nn.Linear(10*3, 2)
self.flatten = nn.Flatten()
def forward(self, x):
x = x.view(x.size(0), 50, -1)
x = self.layer1(x)
x = self.act1(x)
x = self.layer2(x)
x = self.flatten(x)
x = self.fc1(x)
return x
batch_size = 8
train_data = ECGNet(ecg_train, labels_train)
train_dl = DataLoader(dataset=train_data,
batch_size=batch_size,
shuffle=True,
num_workers=0)
model = Simple1DCNN()
criterion = nn.CrossEntropyLoss()
Then
>>> x, y = next(iter(train_dl))
>>> y_hat = model(x)
>>> y_hat.shape
torch.Size([8, 2])
Also, make sure your loss works:
>>> criterion(y_hat, y)
tensor(..., grad_fn=<NllLossBackward>)

Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

I'm trying to implement ResNet18 on pyTorch but I'm having some troubles with it. My code is this:
device = torch.device("cuda:0")
class ResnetBlock(nn.Module):
def __init__(self, strides, nf, nf0, reps, bn):
super(ResnetBlock, self).__init__()
self.adapt = strides == 2
self.layers = []
self.relus = []
self.adapt_layer = nn.Conv2d(nf0, nf, kernel_size=1, stride=strides, padding=0) if self.adapt else None
for i in range(reps):
self.layers.append(nn.Sequential(
nn.Conv2d(nf0, nf, kernel_size=3, stride=strides, padding=1),
nn.BatchNorm2d(nf, eps=0.001, momentum=0.99),
nn.ReLU(),
nn.Conv2d(nf, nf, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(nf, eps=0.001, momentum=0.99)))
self.relus.append(nn.ReLU())
strides = 1
nf0 = nf
def forward(self, x):
for i, (layer, relu) in enumerate(zip(self.layers, self.relus)):
rama = layer(x)
if self.adapt and i == 0:
x = self.adapt_layer(x)
x = x + rama
x = relu(x)
return x
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.MaxPool2d(kernel_size=2, stride=2))
self.blocks = nn.Sequential(
ResnetBlock(1, 64, 64, 2, bn),
ResnetBlock(2, 128, 64, 2, bn),
ResnetBlock(2, 256, 128, 2, bn),
ResnetBlock(2, 512, 256, 2, bn))
self.fcout = nn.Linear(512, 10)
def forward(self, x):
out = self.layer1(x)
out = self.blocks(out)
out = out.reshape(out.size(0), -1)
out = self.fcout(out)
return out
num_epochs = 50
num_classes = 10
batch_size = 50
learning_rate = 0.00001
trans = transforms.ToTensor()
train_dataset = torchvision.datasets.CIFAR10(root="./dataset_pytorch", train=True, download=True, transform=trans)
test_dataset = torchvision.datasets.CIFAR10(root="./dataset_pytorch", train=False, download=True, transform=trans)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
def weights_init(m):
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight.data)
nn.init.zeros_(m.bias.data)
model = ConvNet()
model.apply(weights_init)
model.to(device)
summary(model, (3,32,32))
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, eps=1e-6)
# Train the model
total_step = len(train_loader)
loss_list = []
acc_list = []
acc_list_test = []
for epoch in range(num_epochs):
total = 0
correct = 0
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
optimizer.zero_grad()
# Run the forward pass
outputs = model(images)
loss = criterion(outputs, labels)
loss_list.append(loss.item())
# Backprop and perform Adam optimisation
loss.backward()
optimizer.step()
# Track the accuracy
total += labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct += (predicted == labels).sum().item()
acc_list.append(correct / total)
print("Train")
print('Epoch [{}/{}], Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, (correct / total) * 100))
total_test = 0
correct_test = 0
for i, (images, labels) in enumerate(test_loader):
images = images.to(device)
labels = labels.to(device)
# Run the forward pass
outputs = model(images)
# Track the accuracy
total_test += labels.size(0)
_, predicted = torch.max(outputs.data, 1)
correct_test += (predicted == labels).sum().item()
acc_list_test.append(correct_test / total_test)
print("Test")
print('Epoch [{}/{}], Accuracy: {:.2f}%'
.format(epoch + 1, num_epochs, (correct_test / total_test) * 100))
It's weird because it's throwing me that error Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same even though I've moved both the model and the data to cuda.
I guess it's related with how I defined or used "ResnetBlock", because if I remove from ConvNet those blocks (removing the line out = self.blocks(out)), the code works. But I don't know what I'm doing wrong.

The problem is in this line:
model.to(device)
to is not in-place. It returns the converted model. You need to change it to:
model = model.to(device)
EDIT: Another problem: vanilla list cannot be tracked by PyTorch. You need to use nn.ModuleList.
From
self.layers = []
self.relus = []
To
self.layers = nn.ModuleList()
self.relus = nn.ModuleList()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Can't get dimensions right - CNN for text classification - python

What I was missing is that I had batch_first parameter set to True, which SWAPED batch_size and seq_len. Once I've set it to False, everything worked perfectly.

Related

Pythorch LSTM timeseries prediction hidden[0] size match error

Very small grad for parameters in PyTorch

PyTorch ValueError: Target size (torch.Size([64])) must be the same as input size (torch.Size([15]))

Pytorch: 1D target tensor expected, multi-target not supported

Input type (torch.cuda.FloatTensor) and weight type (torch.FloatTensor) should be the same

Categories

Resources