How to deal with NotImplementedError in training Unet model? - python

def train_fn(data_loader, model, optimizer):
model.train()
total_loss = 0.0
for images, masks in tqdm(data_loader):
images = images.to(DEVICE)
masks = masks.to(DEVICE)
optimizer.zero_grad()
logits, loss = model(images,masks)
loss.backward()
optimizer.step()
total_loss += loss.item()
return total_loss/ len(data_loader)
def eval_fn(data_loader, model):
model.eval()
total_loss = 0.0
with torch.no_grad():
for images, masks in tqdm(data_loader):
images = images.to(DEVICE)
masks = masks.to(DEVICE)
logits, loss = model(images,masks)
total_loss += loss.item()
return total_loss/ len(data_loader)
optimizer = torch.optim.Adam(model.parameters(), lr = LR)
best_valid_loss = np.Inf
for i in range(EPOCHS):
train_loss = train_fn(trainloader, model, optimizer)
valid_loss = eval_fn(validloader, model)
if valid_loss < best_valid_loss:
torch.save(model.state_dict(), 'best_model.pt')
print("SAVED_MODEL")
best_valid_loss = valid_loss
print(f"Epoch : {i+1} Train_loss: {train_loss} Valid_loss: {valid_loss}")
I get the following error when I try to train the model:
0%| | 0/15 [00:00<?, ?it/s]
NotImplementedError Traceback (most recent call last)
in ()
4
5
----> 6 train_loss = train_fn(trainloader, model, optimizer)
7 valid_loss = eval_fn(validloader, model)
8
2 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _forward_unimplemented(self, *input)
199 registered hooks while the latter silently ignores them.
200 """
--> 201 # raise NotImplementedError
202
203
NotImplementedError:
How do I deal with this?

Looking at the link you provided in the comment, your model definition looks like this:
class SegmentationModel(nn.Module):
def __init__(self):
super(SegmentationModel,self).__init__()
self.arc = smp.Unet(
encoder_name = ENCODER,
encoder_weights = WEIGHTS,
in_channels = 3,
classes = 1,
activation = None
)
def forward(self, images, masks = None):
logits = self.arc(images)
if masks != None:
loss1 = DiceLoss(mode = 'binary')(logits, masks)
loss2 = nn.BCEWithLogitsLoss()(logits,masks)
return logits, loss1 + loss2
return logits
If you look close, you'll see forward() has an erratic extra indentation, making it an internal function inside __init__() rather than a method of a SegmentationModel. Shift it a bit to left, and it should work fine:
class SegmentationModel(nn.Module):
def __init__(self):
super(SegmentationModel,self).__init__()
self.arc = smp.Unet(
encoder_name = ENCODER,
encoder_weights = WEIGHTS,
in_channels = 3,
classes = 1,
activation = None
)
def forward(self, images, masks = None):
logits = self.arc(images)
if masks != None:
loss1 = DiceLoss(mode = 'binary')(logits, masks)
loss2 = nn.BCEWithLogitsLoss()(logits,masks)
return logits, loss1 + loss2
return logits

Related

TypeError: 'GraphModule' object is not subscriptable in PyTorch quantization

Based on these tutorials, I want to apply torch.quantization API on Federated Learning:
https://colab.research.google.com/github/pytorch/tutorials/blob/gh-pages/_downloads/static_quantization_tutorial.ipynb#scrollTo=UBwjcUDJ7L_1
https://pytorch.org/tutorials/prototype/fx_graph_mode_ptq_static.html
But it seems that it doesn't test with FL yet! Although it supposes to be applicable "if the the code is symbolic traceable"! Based on a torch expert's answer here https://discuss.pytorch.org/t/quantization-in-federated-learning/152885
(what does that means?)
I tried to test it, but got an error! This is the code:
# model
class CNNCifar(nn.Module):
def __init__(self):
super(CNNCifar, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 100)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.reshape(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return F.log_softmax(x, dim=1)
# training CIFAR100 dataset with this model is not shown here, but it works with no issues
# quantization
import torch.quantization.quantize_fx as quantize_fx
import copy
# client models quantize
c_model_q = []
for i in range(3):
model= client_models[i]
model.train()
model_to_quantize = copy.deepcopy(model)
qconfig_dict = {"": torch.quantization.get_default_qat_qconfig('qnnpack')}
model_prepared = quantize_fx.prepare_qat_fx(model_to_quantize, qconfig_dict)
c_model_q.append(model_prepared)
# golbal quantization
model_fp = global_model
# quantization aware training for static quantization
model_to_quantize = copy.deepcopy(model_fp)
qconfig_dict = {"": torch.quantization.get_default_qat_qconfig('qnnpack')} # or ...("fbgemm")
model_to_quantize.train()
# prepare
g_model_q = quantize_fx.prepare_qat_fx(model_to_quantize, qconfig_dict)
print(g_model_q.graph)
# train the model
# global model
g_model_q = g_model_q.cpu()
cmodel_q = []
for i in range(3):
cmodel = c_model_q[i]
cmodel.cpu()
cmodel_q.append(cmodel)
for model in cmodel_q:
model.load_state_dict(g_model_q.state_dict()) # initial synchronizing with global model
opt = [optim.SGD(model.parameters(), lr=0.1) for model in cmodel_q]
# helper function for local training in num_selected
def q_client_update(cmodel_q, optimizer, train_loader, epoch=5):
model.train()
for e in range(epoch):
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.cpu(), target.cpu()
optimizer.zero_grad()
output = cmodel_q(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
return loss.item()
# take the mean of the weights and aggregated into the global weights
def q_server_aggregate(g_model_q, cmodel_q):
global_dict = g_model_q.state_dict()
for k in global_dict.keys():
global_dict[k] = torch.stack([cmodel_q[i].state_dict()[k].float() for i in range(len(cmodel_q))], 0).mean(0)
g_model_q.load_state_dict(global_dict)
for model in cmodel_q:
model.load_state_dict(g_model_q.state_dict())
def q_test(g_model_q, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.cpu(), target.cpu()
output = g_model_q(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
acc = correct / len(test_loader.dataset)
return test_loss, acc
losses_train = []
losses_test = []
acc_train = []
acc_test = []
# Runnining FL
for r in range(num_rounds):
# select random clients
client_idx = np.random.permutation(num_clients)[:num_selected]
# client update
loss = 0
for i in tqdm(range(num_selected)):
loss += q_client_update(cmodel_q[i], opt[i], train_loader[client_idx[i]], epoch=epochs)
losses_train.append(loss)
# server aggregate
q_server_aggregate(g_model_q, cmodel_q)
cmodel_q = quantize_fx.convert_fx(cmodel_q[i])
cmodel_q.eval()
g_model_q = quantize_fx.convert_fx(g_model_q)
g_model_q.eval()
test_loss, acc = q_test(g_model_q, test_loader)
losses_test.append(test_loss)
acc_test.append(acc)
print('%d-th round' % r)
print('average train loss %0.3g | test loss %0.3g | test acc: %0.3f' % (loss / num_selected, test_loss, acc))
100%|██████████| 3/3 [01:02<00:00, 20.82s/it]
/usr/local/lib/python3.7/dist-packages/torch/nn/quantized/_reference/modules/linear.py:41: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
torch.tensor(weight_qparams["scale"], dtype=torch.float, device=device))
/usr/local/lib/python3.7/dist-packages/torch/nn/quantized/_reference/modules/linear.py:46: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
dtype=torch.int, device=device))
0-th round
average train loss 3.3 | test loss 3.23 | test acc: 0.220
0%| | 0/3 [00:00<?, ?it/s]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-21-0b68c58ee902> in <module>()
11 loss = 0
12 for i in tqdm(range(num_selected)):
---> 13 loss += q_client_update(cmodel_q[i], opt[i], train_loader[client_idx[i]], epoch=epochs)
14
15 losses_train.append(loss)
TypeError: 'GraphModule' object is not subscriptable
The model type before quantization is: __main__.CNNCifar
After quantization: torch.fx.graph_module.GraphModule.__new__.<locals>.GraphModuleImpl
What does object is not subscriptable mean? How I can fix it?

PyTorch adapt binary classification model to output probabilities of both classes

My dataset has 14 features and a target containing {0,1}.
I have trained this binary classifier:
class SimpleBinaryClassifier(nn.Module):
def __init__(self,input_shape):
super().__init__()
self.fc1 = nn.Linear(input_shape,64)
self.fc2 = nn.Linear(64,32)
self.dropout = nn.Dropout(p=0.1)
self.fc3 = nn.Linear(32,1)
def forward(self,x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.dropout(x)
x = self.fc3(x)
return x
with the following criterion and training loop:
criterion = nn.BCEWithLogitsLoss()
def binary_acc(y_pred, y_test):
y_pred_tag = torch.round(torch.sigmoid(y_pred))
correct_results_sum = (y_pred_tag == y_test).sum().float()
acc = correct_results_sum/y_test.shape[0]
acc = torch.round(acc * 100)
return acc
model.train()
for e in range(1, EPOCHS+1):
epoch_loss = 0
epoch_acc = 0
for X_batch, y_batch in train_loader:
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
optimizer.zero_grad()
y_pred = model(X_batch)
loss = criterion(y_pred, y_batch.unsqueeze(1))
acc = binary_acc(y_pred, y_batch.unsqueeze(1))
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_acc += acc.item()
print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')
This model, when called like sigmoid(model(input_tensor)) outputs a single number in [0,1]. The pipeline I'm working with, expects a model to output probabilities [p_class1, p_class2].
How can I adapt the model and the training loop?
If I set the output of the last layer to 2, I have problems with the criterion inside the training loop.
class SimpleBinaryClassifier2(nn.Module):
def __init__(self,input_shape):
super().__init__()
self.fc1 = nn.Linear(input_shape,64)
self.fc2 = nn.Linear(64,32)
self.dropout = nn.Dropout(p=0.1)
self.fc3 = nn.Linear(32,2) # now it's 2
def forward(self,x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.dropout(x)
x = self.fc3(x)
return x
I use the CrossEntropy
model = SimpleBinaryClassifier2(input_shape=14)
model.to(device)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss()
and replace y_pred_tag = torch.round(torch.sigmoid(y_pred)) with argmax(softmax)
def binary_acc2(y_pred, y_test):
y_pred_tag = torch.argmax(torch.softmax(y_pred), dim=1)
correct_results_sum = (y_pred_tag == y_test).sum().float()
acc = correct_results_sum/y_test.shape[0]
acc = torch.round(acc * 100)
return acc
Then the train loop rises an error:
model.train()
for e in range(1, EPOCHS+1):
epoch_loss = 0
epoch_acc = 0
for X_batch, y_batch in train_loader:
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
optimizer.zero_grad()
y_pred = model(X_batch)
loss = criterion(y_pred, y_batch)
acc = binary_acc(y_pred, y_batch)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_acc += acc.item()
​
print(f'Epoch {e+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | Acc: {epoch_acc/len(train_loader):.3f}')
The error is the following:
RuntimeError: "nll_loss_forward_reduce_cuda_kernel_2d_index" not implemented for 'Float'
I already looked up on this other post where the cause of that error was that the element was a Float and not a tensor, but in my case the datasets are tensors:
train_dataset = GenericDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
test_dataset = GenericDataset(torch.FloatTensor(X_test), torch.FloatTensor(y_test))
According to nn.CrossEntropyLoss description it expects target as long and not float, while in your train_dataset you clearly convert it to float

Error:Dimension out of range. Multi-class text classification with nn.CrossEntropyLoss()

I' a newbie in text classification and I'm trying to create a multiclass text classification. First this code worked like a charm for a binary classification but I'm trying to convert it into multiclass clasification. I switched loss function from nn.BCELoss() to nn.CrossEntropyLoss() and I removed the softmax because CrossEntropyLoss has that included but now I get an error IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
Please let me know if I need to add anything else
n_epochs = 3
best_valid_loss = float('inf')
t_loss=[]
v_loss=[]
for epoch in range(n_epochs):
train_loss, train_acc = train(model, train_iterator,optimizer, criterion)
valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'saved_weights.pt')
t_loss.append(train_loss)
v_loss.append(valid_loss)
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
IndexError Traceback (most recent call last)
<ipython-input-130-88e45751bc44> in <module>()
6 for epoch in range(n_epochs):
7
----> 8 train_loss, train_acc = train(model, train_iterator,optimizer, criterion)
9
10 valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
3 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction)
2822 if size_average is not None or reduce is not None:
2823 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 2824 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index)
2825
2826
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
Here is the full code to recreate:
import numpy as np
import pandas as pd
from pandas import DataFrame
import re
import spacy
import string
import matplotlib.pyplot as plt
import torch
import torchtext
from torchtext.legacy import data
from torchtext.legacy import datasets
#from torchtext import data
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
seed=50
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
Data loader:
class DataFrameDataset(data.Dataset):
def __init__(self, df, fields, is_test=False, **kwargs):
examples = []
for i, row in df.iterrows():
label = row.target if not is_test else None
text = row.text
examples.append(data.Example.fromlist([text, label], fields))
super().__init__(examples, fields, **kwargs)
#staticmethod
def sort_key(ex):
return len(ex.text)
#classmethod
def splits(cls, fields, train_df, val_df=None, test_df=None, **kwargs):
train_data, val_data, test_data = (None, None, None)
data_field = fields
if train_df is not None:
train_data = cls(train_df.copy(), data_field, **kwargs)
if val_df is not None:
val_data = cls(val_df.copy(), data_field, **kwargs)
if test_df is not None:
test_data = cls(test_df.copy(), data_field, True, **kwargs)
return tuple(d for d in (train_data, val_data, test_data) if d is not None)
Data preparation
def data_preparation(train, valid, batch_size):
text=data.Field(tokenize='spacy', batch_first=True, include_lengths=True) #spacy je tokenizer
label=data.LabelField(dtype=torch.float, batch_first=True)
fields = [('text',text),('label', label)]
train_ds, valid_ds=DataFrameDataset.splits(fields, train_df=train, val_df=valid)
text.build_vocab(train_ds, min_freq=3, vectors="glove.6B.200d")
label.build_vocab(train_ds)
print("Size of TEXT vocabulary:",len(text.vocab))
print("Size of LABEL vocabulary:",len(label.vocab))
train_iterator, valid_iterator=data.BucketIterator.splits((train_ds,valid_ds),
batch_size=batch_size, sort_key= lambda x:len(x.text),
sort_within_batch=True,
device=device)
vocab_size=len(text.vocab)
return text, vocab_size, train_iterator, valid_iterator
&
text, vocab_size, train_iterator, valid_iterator=data_preparation(train_d, valid_d,batch_size=128)
Defining the classifier
class classifier(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers,
bidirectional, dropout):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.lstm = nn.LSTM(embedding_dim,
hidden_dim,
num_layers= n_layers,
dropout=dropout,
batch_first=True)
self.fc = nn.Linear(hidden_dim*2 , output_dim)
#self.softmax = nn.Softmax(dim=1)
def forward(self, text, text_lengths):
embedded = self.embedding(text)
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.cpu(),batch_first=True)
packed_output, (hidden, cell) = self.lstm(packed_embedded)
hidden = torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)
#dense_outputs=self.fc(hidden)
#outputs=self.softmax(dense_outputs)
outputs=self.fc(hidden)
return outputs
Parameters & defining model, loss function, optimizer...
size_of_vocab = vocab_size
embedding_dim = 50
num_hidden_nodes = 20
num_output_nodes = 1
num_layers = 3
bidirectional = True
dropout = 0.2
model = classifier(size_of_vocab, embedding_dim, num_hidden_nodes,num_output_nodes, num_layers,
bidirectional, dropout = dropout)
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)
criterion = nn.CrossEntropyLoss()
model = model.to(device)
criterion = criterion.to(device)
def binary_accuracy(preds, y):
rounded_preds = torch.round(preds)
correct = (rounded_preds == y).float()
acc = correct.sum() / len(correct)
return acc
Model training and evaluating
def train(model, iterator,optimizer, criterion):
epoch_loss = 0
epoch_acc = 0
model.train()
for batch in iterator:
optimizer.zero_grad()
text, text_lengths = batch.text
predictions = model(text, text_lengths).squeeze()
loss = criterion(predictions, batch.label)
acc = binary_accuracy(predictions, batch.label)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
&
def evaluate(model, iterator, criterion):
epoch_loss = 0
epoch_acc = 0
model.eval()
with torch.no_grad():
for batch in iterator:
text, text_lengths = batch.text
predictions = model(text, text_lengths).squeeze()
loss = criterion(predictions, batch.label)
acc = binary_accuracy(predictions, batch.label)
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
&
n_epochs = 3
best_valid_loss = float('inf')
t_loss=[]
v_loss=[]
for epoch in range(n_epochs):
train_loss, train_acc = train(model, train_iterator,optimizer, criterion)
valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'saved_weights.pt')
t_loss.append(train_loss)
v_loss.append(valid_loss)
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')

Python(PyTorch): TypeError: string indices must be integers

I have written the following code to train a bert model on my dataset but when I execute it I get an error at the part where I implement tqdm. I have written the entire training code below with full description of the error. How to fix this?
Code
Model
TRANSFORMERS = {
"bert-multi-cased": (BertModel, BertTokenizer, "bert-base-uncased"),
}
class Transformer(nn.Module):
def __init__(self, model, num_classes=1):
"""
Constructor
Arguments:
model {string} -- Transformer to build the model on. Expects "camembert-base".
num_classes {int} -- Number of classes (default: {1})
"""
super().__init__()
self.name = model
model_class, tokenizer_class, pretrained_weights = TRANSFORMERS[model]
bert_config = BertConfig.from_json_file(MODEL_PATHS[model] + 'bert_config.json')
bert_config.output_hidden_states = True
self.transformer = BertModel(bert_config)
self.nb_features = self.transformer.pooler.dense.out_features
self.pooler = nn.Sequential(
nn.Linear(self.nb_features, self.nb_features),
nn.Tanh(),
)
self.logit = nn.Linear(self.nb_features, num_classes)
def forward(self, tokens):
"""
Usual torch forward function
Arguments:
tokens {torch tensor} -- Sentence tokens
Returns:
torch tensor -- Class logits
"""
_, _, hidden_states = self.transformer(
tokens, attention_mask=(tokens > 0).long()
)
hidden_states = hidden_states[-1][:, 0] # Use the representation of the first token of the last layer
ft = self.pooler(hidden_states)
return self.logit(ft)
Training
def fit(model, train_dataset, val_dataset, epochs=1, batch_size=8, warmup_prop=0, lr=5e-4):
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
optimizer = AdamW(model.parameters(), lr=lr)
num_warmup_steps = int(warmup_prop * epochs * len(train_loader))
num_training_steps = epochs * len(train_loader)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps)
loss_fct = nn.BCEWithLogitsLoss(reduction='mean').cuda()
for epoch in range(epochs):
model.train()
start_time = time.time()
optimizer.zero_grad()
avg_loss = 0
for step, (x, y_batch) in tqdm(enumerate(train_loader), total=len(train_loader)):
y_pred = model(x.to(device))
loss = loss_fct(y_pred.view(-1).float(), y_batch.float().to(device))
loss.backward()
avg_loss += loss.item() / len(train_loader)
xm.optimizer_step(optimizer, barrier=True)
#optimizer.step()
scheduler.step()
model.zero_grad()
optimizer.zero_grad()
model.eval()
preds = []
truths = []
avg_val_loss = 0.
with torch.no_grad():
for x, y_batch in tqdm(val_loader):
y_pred = model(x.to(device))
loss = loss_fct(y_pred.detach().view(-1).float(), y_batch.float().to(device))
avg_val_loss += loss.item() / len(val_loader)
probs = torch.sigmoid(y_pred).detach().cpu().numpy()
preds += list(probs.flatten())
truths += list(y_batch.numpy().flatten())
score = roc_auc_score(truths, preds)
dt = time.time() - start_time
lr = scheduler.get_last_lr()[0]
print(f'Epoch {epoch + 1}/{epochs} \t lr={lr:.1e} \t t={dt:.0f}s \t loss={avg_loss:.4f} \t val_loss={avg_val_loss:.4f} \t val_auc={score:.4f}')
Error
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<timed eval> in <module>
<ipython-input-19-e47eae808597> in fit(model, train_dataset, val_dataset, epochs, batch_size, warmup_prop, lr)
22 for step, (x, y_batch) in tqdm(enumerate(train_loader), total=len(train_loader)):
23
---> 24 y_pred = model(x.to(device))
25
26 loss = loss_fct(y_pred.view(-1).float(), y_batch.float().to(device))
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
724 result = self._slow_forward(*input, **kwargs)
725 else:
--> 726 result = self.forward(*input, **kwargs)
727 for hook in itertools.chain(
728 _global_forward_hooks.values(),
<ipython-input-11-2002cc7ec843> in forward(self, tokens)
41 )
42
---> 43 hidden_states = hidden_states[-1][:, 0] # Use the representation of the first token of the last layer
44
45 ft = self.pooler(hidden_states)
TypeError: string indices must be integers
Your code is designed for an older version of the transformers library:
AttributeError: 'str' object has no attribute 'dim' in pytorch
As such you will need to either downgrade to version 3.0.0, or adapt the code to deal with the new-format output of bert.

Implementing a custom dataset with PyTorch

I'm attempting to modify this feedforward network taken from https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/feedforward_neural_network/main.py
to utilize my own dataset.
I define a custom dataset of two 1 dim arrays as input and two scalars the corresponding output :
x = torch.tensor([[5.5, 3,3,4] , [1 , 2,3,4], [9 , 2,3,4]])
print(x)
y = torch.tensor([1,2,3])
print(y)
import torch.utils.data as data_utils
my_train = data_utils.TensorDataset(x, y)
my_train_loader = data_utils.DataLoader(my_train, batch_size=50, shuffle=True)
I've updated the hyperparameters to match new input_size (2) & num_classes (3).
I've also changed images = images.reshape(-1, 28*28).to(device) to images = images.reshape(-1, 4).to(device)
As the training set is minimal I've changed the batch_size to 1.
Upon making these modifications I receive error when attempting to train :
RuntimeError Traceback (most recent call
last) in ()
51
52 # Forward pass
---> 53 outputs = model(images)
54 loss = criterion(outputs, labels)
55
/home/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
--> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
in forward(self, x)
31
32 def forward(self, x):
---> 33 out = self.fc1(x)
34 out = self.relu(out)
35 out = self.fc2(out)
/home/.local/lib/python3.6/site-packages/torch/nn/modules/module.py in call(self, *input, **kwargs)
489 result = self._slow_forward(*input, **kwargs)
490 else:
--> 491 result = self.forward(*input, **kwargs)
492 for hook in self._forward_hooks.values():
493 hook_result = hook(self, input, result)
/home/.local/lib/python3.6/site-packages/torch/nn/modules/linear.py in forward(self, input)
53
54 def forward(self, input):
---> 55 return F.linear(input, self.weight, self.bias)
56
57 def extra_repr(self):
/home/.local/lib/python3.6/site-packages/torch/nn/functional.py
in linear(input, weight, bias)
990 if input.dim() == 2 and bias is not None:
991 # fused op is marginally faster
--> 992 return torch.addmm(bias, input, weight.t())
993
994 output = input.matmul(weight.t())
RuntimeError: size mismatch, m1: [3 x 4], m2: [2 x 3] at
/pytorch/aten/src/THC/generic/THCTensorMathBlas.cu:249
How to amend code to match expected dimensionality ? I'm unsure what code to change as I've changed all parameters that require updating ?
Source prior to changes :
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='../../data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = torchvision.datasets.MNIST(root='../../data',
train=False,
transform=transforms.ToTensor())
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
images = images.reshape(-1, 28*28).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.reshape(-1, 28*28).to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')
Source post changes :
x = torch.tensor([[5.5, 3,3,4] , [1 , 2,3,4], [9 , 2,3,4]])
print(x)
y = torch.tensor([1,2,3])
print(y)
import torch.utils.data as data_utils
my_train = data_utils.TensorDataset(x, y)
my_train_loader = data_utils.DataLoader(my_train, batch_size=50, shuffle=True)
print(my_train)
print(my_train_loader)
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
input_size = 2
hidden_size = 3
num_classes = 3
num_epochs = 5
batch_size = 1
learning_rate = 0.001
# MNIST dataset
train_dataset = my_train
# Data loader
train_loader = my_train_loader
# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
images = images.reshape(-1, 4).to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.reshape(-1, 4).to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))
# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')
You need to change input_size to 4 (2*2), and not 2 as your modified code currently shows.
If you compare it to the original MNIST example, you'll see that input_size is set to 784 (28*28) and not just to 28.

Categories

Resources