RuntimeError: size mismatch (got input: [6], target: [64]) - python

I wrote a pytorch model for time series classification.But i've got a lot of errors.
batch_size = 64
trainloader = DataLoader(TensorDataset(x_train, y_train),
batch_size=batch_size,
shuffle=True)
testloader = DataLoader(TensorDataset(x_test, y_test),
batch_size=batch_size,
shuffle=False)
class LSTMClassifier(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, layer_dim, num_class):
super().__init__()
self.hidden_dim = hidden_dim
self.layer_dim = layer_dim
self.rnn = torch.nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
self.fc = torch.nn.Linear(hidden_dim, num_class)
self.batch_size = None
self.hidden = None
def forward(self, x):
x, (h, c) = self.rnn(x)
logits = self.fc(h[-1])
return logits
input_dim = x_train.shape[1]
hidden_dim = 256
layer_dim = 12
num_class = len(y_train.unique())
model = LSTMClassifier(input_dim, hidden_dim, layer_dim, num_class).to(device)
loss_fn = torch.nn.CrossEntropyLoss()
lr = 1e-3
optimizer = torch.optim.Adam(model.parameters(),lr=lr)
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X,y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
logits = model(X)
loss = loss_fn(logits, torch.max(y, 1)[1])
loss = loss_fn(logits,y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
epochs = 15
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(trainloader, model, loss_fn, optimizer)
test(testloader, model)
print("Done!")
I have done this and have this error
RuntimeError: size mismatch (got input: [6], target: [64])
on this line ----> 8 loss = loss_fn(logits, torch.max(y, 1)[1])
Please can anyone help me with this ?
What should I do to solve it ?

Related

How can I solve RuntimeError: CUDA error: device-side assert triggered

gc.collect()
torch.cuda.empty_cache()
class KcBERTDataset(Dataset):
def __init__(self, subjects, targets, tokenizer, max_len):
self.subjects = subjects
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.subjects)
def __getitem__(self, item):
subject = str(self.subjects[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
subject,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
padding = 'max_length',
truncation = True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'subject_text': subject,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
def create_data_loader(df, tokenizer, max_len, batch_size, shuffle_=False, valid=False):
if valid:
ds = KcBERTDataset(
subjects=df["article"].to_numpy(),
targets=np.zeros(len(df)),
tokenizer=tokenizer,
max_len=max_len
)
else:
ds = KcBERTDataset(
subjects=df["article"].to_numpy(),
targets=df["label"].to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=4,
shuffle = shuffle_
)
BATCH_SIZE =32
MAX_LEN =32
train = df_train
df_valid = df_test
tokenizer_kcbert = AutoTokenizer.from_pretrained("beomi/kcbert-large")
train_data_loader = create_data_loader(train, tokenizer_kcbert, MAX_LEN, BATCH_SIZE, shuffle_=True)
valid_data_loader = create_data_loader(df_valid, tokenizer_kcbert, MAX_LEN, BATCH_SIZE, valid=True)
class KcBERTClassifier(nn.Module):
def __init__(self, n_classes):
super(KcBERTClassifier, self).__init__()
self.bert = BertModel.from_pretrained("beomi/kcbert-large")
self.drop = nn.Dropout(p=0.5)
self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
def forward(self, input_ids, attention_mask):
_, pooled_output = self.bert(
input_ids=input_ids,
attention_mask=attention_mask,
return_dict=False
)
output = self.drop(pooled_output)
return self.out(output)
device = torch.device("cuda")
EPOCHS = 2
loss_fn = nn.CrossEntropyLoss().to(device)
from tqdm import tqdm
def train_epoch(model,data_loader,loss_fn,optimizer,device,scheduler,n_examples):
model = model.train()
losses = []
correct_predictions = 0
for d in tqdm(data_loader):
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets)
correct_predictions += torch.sum(preds == targets)
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
return correct_predictions.double() / n_examples, np.mean(losses)
EPOCHS = 2
model_kcbert = KcBERTClassifier(n_classes=20).to(device)
optimizer = AdamW(model_kcbert.parameters(), lr=1e-5)
total_steps = len(train_data_loader) * EPOCHS
scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=int(total_steps*0.1), num_training_steps=total_steps)
for epoch in range(EPOCHS):
print(f'Epoch {epoch + 1}/{EPOCHS}')
print('-' * 10)
train_acc, train_loss = train_epoch(
model_kcbert,
train_data_loader,
loss_fn,
optimizer,
device,
scheduler,
len(train)
)
print(f'Train loss {train_loss} accuracy {train_acc}')
y_review_texts, y_pred, y_pred_probs = get_predictions(
model_kcbert,
valid_data_loader
)
answers.append(y_pred.tolist())
In this code, i met "RuntimeError: CUDA error: device-side assert triggered" error in line "model_kcbert = KcBERTClassifier(n_classes=20).to(device)"
I looked up the information and found out that the size of input and output are different, but I don't know how to fix it.
If you look at the data, there are Row with articles, and there is a Row that emotionally analyze articles and label them with -1,0,1.
How can I solve "CUDA error: device-side assert triggered"
I've tried n_classes with 3, and I've tried enough with 20.
But same error happened

Pythorch LSTM timeseries prediction hidden[0] size match error

I want to learn 100 for 3000days and predict 100 next day but I don't know why this error is happen and resolve this problem
please help me
batchsize = 100, hidden_dim = 10, seq_length = 60, data_dim = 100, output_dim = 100
class Net(nn.Module):
def __init__(self, input_dim, hidden_dim, batch_size, output_dim, layers):
super(Net, self).__init__()
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.output_dim = output_dim
self.layers = layers
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=layers,
batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim, bias = True)
self.hidden = self.reset_hidden_state()
def reset_hidden_state(self):
return (
torch.zeros(self.layers, self.batch_size, self.hidden_dim),
torch.zeros(self.layers, self.batch_size, self.hidden_dim))
def forward(self, x):
x, self.hidden = self.lstm(x, self.hidden)
x = self.fc(x[:, -1,:]) # [batch_size, seq_len, hidden_dim]
return x
# Train part
def train_model(model, train_df, num_epochs = None, lr = None, verbose = 10, patience = 10):
criterion = nn.MSELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
nb_epochs = num_epochs
train_hist = np.zeros(nb_epochs)
for epoch in range(nb_epochs):
avg_cost = 0
total_batch = len(train_df)
for batch_idx, samples in enumerate(train_df):
x_train, y_train = samples
# seq별 hidden state reset
model.reset_hidden_state()
model.hidden = [hidden.to(device) for hidden in model.reset_hidden_state()]
outputs = model(x_train)
loss = criterion(outputs, y_train)
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_cost += loss/total_batch
train_hist[epoch] = avg_cost
if epoch % verbose == 0:
print('Epoch:', '%04d' % (epoch), 'train loss :', '{:.4f}'.format(avg_cost))
if (epoch % patience == 0) & (epoch != 0):
if train_hist[epoch-patience] < train_hist[epoch]:
print('\n Early Stopping')
break
return model.eval(), train_hist
prediction part
net = Net(data_dim, hidden_dim, batch , output_dim, 1).to(device)
model, train_hist = train_model(net, dataloader, num_epochs = nb_epochs, lr = learning_rate, verbose = 20, patience = 10)
with torch.no_grad():
pred = np.zeros((2940,1,100))
for pr in range(len(trainX_tensor)):
model.reset_hidden_state()
predicted = model(torch.unsqueeze(trainX_tensor[pr], 0)).cpu()
print(predicted.shape)
#predicted = predicted.item()
pred[pr,:,:] = predicted
#print(pr)
# INVERSE
pred_inverse = scaler.inverse_transform(pred)
Expected hidden[0] size (1, 1, 10), got [1, 100, 10]
how can i resolve hidden[0] size fix?

Input contains NaN, infinity or a value too large for dtype('float32'). Pythorch

I try to train model but in vain. I see the error
Input contains NaN, infinity or a value too large for dtype('float32').
I think it can be connected with Mse function, because with MAE it works somehow also with RMSE it works somehow (on the second epoch i have RMSE = 10). I can't figure out what i do wrong.
# Count Nan
df = pd.read_csv('data.txt.zip', header=None)
X = df.iloc[:, 1:].values
y = df.iloc[:, 0].values
train_size = 463715
X_train = X[:train_size, :]
y_train = y[:train_size]
X_test = X[train_size:, :]
y_test = y[train_size:]
#ToTensor
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test)
# Create TensorDataset
train_ds = TensorDataset(X_train, y_train)
test_ds = TensorDataset(X_test, y_test)
val_num = 92743
train_num = 370972
# Divide train data into train and validation data
train_ds, val_ds = random_split(train_ds, [train_num, val_num])
# Evaluate accuracy
def accuracy(y_true, y_pred):
return r2_score(y_true, y_pred)
# create Class
class BaselineModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(BaselineModel, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.linear1 = nn.Linear(90, 45)
self.linear2 = nn.Linear(45, 1)
self.linear3 = nn.Linear(45, 15)
self.linear4 = nn.Linear(15, 1)
self.batch = nn.BatchNorm2d(hidden_size)
self.relu = nn.ReLU()
self.lreku = nn.LeakyReLU()
self.elu = nn.ELU()
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.elu(self.linear1(x))
return self.linear2(x)
def training_step(self, criterion, batch):
x_train, y_train = batch
y_pred = self(x_train)
loss = (criterion(y_pred, y_train.unsqueeze(1)))
return loss
def validation_step(self, criterion, batch):
x_val, y_val = batch
y_pred = self(x_val)
loss = (criterion(y_pred, y_val.unsqueeze(1)))
acc = accuracy(y_val, y_pred)
return {'val_loss': loss, 'val_acc': acc}
def validation_epoch_end(self, y_pred):
batch_losses = [x['val_loss'] for x in y_pred]
epoch_loss = torch.stack(batch_losses).mean()
batch_accs = [x['val_acc'] for x in y_pred]
epoch_acc = np.mean(batch_accs)
#epoch_acc = torch.stack(batch_accs).mean()
return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
def epoch_end(self, epoch, result):
print(f"Epoch {epoch}, val_loss: {result['val_loss']}, val_acc: {result['val_acc']} ")
model = BaselineModel(input_size = 90, hidden_size = 45, output_size = 1)
# Evaluate
def evaluate(model, criterion, val_loader):
with torch.no_grad():
y_pred = [model.validation_step(criterion, batch) for batch in val_loader]
return model.validation_epoch_end(y_pred)
# Train
def train(model, criterion, optimizer, train_loader, val_loader, lr, epochs):
history = []
for epoch in range(epochs):
for batch in train_loader:
optimizer.zero_grad()
loss = model.training_step(criterion, batch)
loss.backward()
optimizer.step()
result = evaluate(model, criterion, val_loader)
model.epoch_end(epoch, result)
history.append(result)
#return history
# Create train_loader & val_loader
batch_size = 128
train_loader = DataLoader(train_ds, batch_size = batch_size, shuffle = True)
val_loader = DataLoader(val_ds, batch_size = batch_size, shuffle = True)
# Create parameters and Train
lr = 0.05
optimizer = torch.optim.SGD(model.parameters(), lr, momentum = 0.9)
criterion = F.mse_loss
epochs = 10
train(model, criterion, optimizer, train_loader, val_loader, lr, epochs)
Yes, it is because of your loss of function. if the value of the loss function after some epoch becomes very small or very large then when you want to use it in backpropagation to train the model, you face this error. To handle that, you should use Early Stopping to Halt the Training. so you should implement Callback, Callbacks provide a way to execute code and interact with the training model process automatically.

Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 2])) is deprecated

I am trying to build a multiclass text classification using Pytorch and torchtext. but I am receiving this error whenever output in last hidden layer is 2, but running fine on 1 outputdim. I know there is a problem with batchsize and Data shape. What to do? I don't know the fix.
Constructing iterator:
#set batch size
BATCH_SIZE = 16
train_iterator, valid_iterator = BucketIterator.splits(
(train_data, valid_data),
batch_size = BATCH_SIZE,
sort_key = lambda x: len(x.text),
sort_within_batch=True,
device = device)
Model class:
class classifier(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
bidirectional, dropout):
super(classifier,self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.gru = nn.GRU(embedding_dim,
hidden_dim,
num_layers=n_layers,
bidirectional=bidirectional,
dropout=dropout,
batch_first=True)
self.fc1 = nn.Linear(hidden_dim * 2, 128)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(128, 64)
self.relu2 = nn.ReLU()
self.fc3 = nn.Linear(64, 16)
self.relu3 = nn.ReLU()
self.fc4 = nn.Linear(16, output_dim)
self.act = nn.Sigmoid()
def forward(self, text, text_lengths):
embedded = self.embedding(text)
#embedded = [batch size, sent_len, emb dim]
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'),batch_first=True)
packed_output, hidden = self.gru(packed_embedded)
hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)
dense_1=self.fc1(hidden)
x = self.relu1(dense_1)
x = self.fc2(x)
x = self.relu2(x)
x = self.fc3(x)
x = self.relu3(x)
dense_outputs = self.fc4(x)
#Final activation function
outputs=self.act(dense_outputs)
return outputs
instantiating the model:
size_of_vocab = len(TEXT.vocab)
embedding_dim = 300
num_hidden_nodes = 256
num_output_nodes = 2
num_layers = 4
bidirection = True
dropout = 0.2
model = classifier(size_of_vocab, embedding_dim, num_hidden_nodes,num_output_nodes, num_layers,
bidirectional = True, dropout = dropout).to(device)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model):,} trainable parameters')
pretrained_embeddings = TEXT.vocab.vectors
model.embedding.weight.data.copy_(pretrained_embeddings)
print(pretrained_embeddings.shape)
Optimizer and criterion used:
optimizer = optim.Adam(model.parameters())
criterion = nn.BCELoss()
model = model.to(device)
criterion = criterion.to(device)
Training function:
import torchmetrics as tm
metrics = tm.Accuracy()
def train(model, iterator, optimizer, criterion):
#initialize every epoch
epoch_loss = 0
epoch_acc = 0
#set the model in training phase
model.train()
for batch in iterator:
#resets the gradients after every batch
optimizer.zero_grad()
#retrieve text and no. of words
text, text_lengths = batch.text
#convert to 1D tensor
predictions = model(text, text_lengths).squeeze()
#compute the loss
loss = criterion(predictions, batch.label)
#compute the binary accuracy
# acc = binary_accuracy(predictions, batch.label)
acc = metrics(predictions,batch.label)
#backpropage the loss and compute the gradients
loss.backward()
#update the weights
optimizer.step()
#loss and accuracy
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
Full error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-60-eeabf5bacadf> in <module>()
5
6 #train the model
----> 7 train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
8
9 #evaluate the model
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy(input, target, weight, size_average, reduce, reduction)
2906 raise ValueError(
2907 "Using a target size ({}) that is different to the input size ({}) is deprecated. "
-> 2908 "Please ensure they have the same size.".format(target.size(), input.size())
2909 )
2910
ValueError: Using a target size (torch.Size([16])) that is different to the input size (torch.Size([16, 2])) is deprecated. Please ensure they have the same size.
What you want is CrossEntropyLoss instead of BCELoss.

Difference between Keras and Pytorch code with same architecture

I am building an Autoencoder model and I have found two snippets of code with the same architecture model which are in Keras and PyTorch. But when I run it, a large difference in time even though they are using the same architecture. Could you please explain why I am encountering such a huge difference in time as well as performance?
PyTorch Code
class Encoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=64):
super(Encoder, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dim
self.rnn1 = nn.LSTM(
input_size=n_features,
hidden_size=self.hidden_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=self.hidden_dim,
hidden_size=embedding_dim,
num_layers=1,
batch_first=True
)
def forward(self, x):
x = x.reshape((1, self.seq_len, self.n_features))
x, (_, _) = self.rnn1(x)
x, (hidden_n, _) = self.rnn2(x)
return hidden_n.reshape((self.n_features, self.embedding_dim))
class Decoder(nn.Module):
def __init__(self, seq_len, input_dim=64, n_features=1):
super(Decoder, self).__init__()
self.seq_len, self.input_dim = seq_len, input_dim
self.hidden_dim, self.n_features = 2 * input_dim, n_features
self.rnn1 = nn.LSTM(
input_size=input_dim,
hidden_size=input_dim,
num_layers=1,
batch_first=True
)
self.rnn2 = nn.LSTM(
input_size=input_dim,
hidden_size=self.hidden_dim,
num_layers=1,
batch_first=True
)
self.output_layer = nn.Linear(self.hidden_dim, n_features)
def forward(self, x):
x = x.repeat(self.seq_len, self.n_features)
x = x.reshape((self.n_features, self.seq_len, self.input_dim))
x, (hidden_n, cell_n) = self.rnn1(x)
x, (hidden_n, cell_n) = self.rnn2(x)
x = x.reshape((self.seq_len, self.hidden_dim))
return self.output_layer(x)
class RecurrentAutoencoder(nn.Module):
def __init__(self, seq_len, n_features, embedding_dim=64):
super(RecurrentAutoencoder, self).__init__()
self.encoder = Encoder(seq_len, n_features, embedding_dim).to(device)
self.decoder = Decoder(seq_len, embedding_dim, n_features).to(device)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
model = RecurrentAutoencoder(seq_len, n_features, 128)
import time
time_dict = {}
def train_model(model, train_dataset, val_dataset, n_epochs):
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.L1Loss(reduction='sum').to(device)
history = dict(train=[], val=[])
best_model_wts = copy.deepcopy(model.state_dict())
best_loss = 10000.0
time_dict[0] = time.time()
for epoch in range(1, n_epochs + 1):
model = model.train()
train_losses = []
for seq_true in train_dataset:
optimizer.zero_grad()
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
loss.backward()
optimizer.step()
train_losses.append(loss.item())
val_losses = []
model = model.eval()
with torch.no_grad():
for seq_true in val_dataset:
seq_true = seq_true.to(device)
seq_pred = model(seq_true)
loss = criterion(seq_pred, seq_true)
val_losses.append(loss.item())
train_loss = np.mean(train_losses)
val_loss = np.mean(val_losses)
history['train'].append(train_loss)
history['val'].append(val_loss)
if val_loss < best_loss:
best_loss = val_loss
best_model_wts = copy.deepcopy(model.state_dict())
print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')
stop = time.time()
time_dict[epoch] = stop
model.load_state_dict(best_model_wts)
return model.eval(), history
model, history = train_model(
model,
train_dataset,
val_dataset,
n_epochs=10)
Output of print(model)
RecurrentAutoencoder( (encoder): Encoder(
(rnn1): LSTM(1, 256, batch_first=True)
(rnn2): LSTM(256, 128, batch_first=True) )
(decoder): Decoder(
(rnn1): LSTM(128, 128, batch_first=True)
(rnn2): LSTM(128, 256, batch_first=True)
(output_layer): Linear(in_features=256, out_features=1, bias=True) ) )
Keras Code
model = keras.Sequential()
model.add(keras.layers.LSTM(
units=256,
input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(keras.layers.LSTM(units=128, return_sequences=True))
model.add(keras.layers.LSTM(units=256, return_sequences=True))
model.add(keras.layers.TimeDistributed(keras.layers.Dense(units=X_train.shape[2])))
model.compile(loss='mae', optimizer='adam')
nb_epoch = 10
autoencoder.compile(optimizer='adam',
loss='mean_squared_error')
tensorboard = TensorBoard(log_dir='/tmp/logs',
histogram_freq=0,
write_graph=True, #to visualize
write_images=True)
history = autoencoder.fit(X_train, X_train,
epochs=nb_epoch,
shuffle=True,
validation_data=(X_validate, X_validate),
verbose=1,
callbacks=[tensorboard]).history
Output of print(model.summary())
Layer (type) Output Shape Param #
lstm_6 (LSTM) (None, 1, 256) 266240
lstm_7 (LSTM) (None, 1, 128) 197120
lstm_8 (LSTM) (None, 1, 256) 394240
time_distributed (TimeDistri (None, 1, 3) 771
The architecture of both these models look same, but there is a difference in performance and training time.

Categories

Resources