I'm trying to implement the Laplace Posteriori Approximation on the last layer for the classification results obtained by BERT model. I get an error regarding input size, and after I fix it by extracting just embeddings and class labels from BERT to feed them into Laplace, I get another bunch of errors regarding input dimensions that I don't know how to debug.
As this is something I didn't find on the internet, and includes relatively new libraries, I will post here just the first error I got, code that might help in debugging and useful links.
I will update post if needed.
Of course, if someone knows how to implement Laplace Posteriori Approximation with BERT in some other library like Scikit or Trax, it would be helpful. Also, some other Transformer classification model with some other confidence approximation will be useful for me. Any help is appreciated!
Code:
# Import
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torch import nn
from transformers import BertTokenizer
from transformers import BertModel
from transformers import BertForSequenceClassification
from sklearn.model_selection import train_test_split
import time
import os
#Toy Data
data_a_b_c = ["""category a. This is category a. In category a we talk about animals.
This category includes lions, fish, tigers, birds, elephants, mouses, dogs, cats, and all other animals."""] * 60 \
+ ["""category b. This is category b. In category b we talk about people. This category members are
Abraham Maslow, John Lennon, Drazen Petrovic, Nikola Tesla, Slavoljub Penkala, Nenad Bakic and Larry Page."""] * 60 \
+ ["""category c. This is category c. Category c is dedicated to car brands like Lamborgini, Rimac-Buggati, BMW, Mercedes,
Honda, Opel, Wolkswagen, and etc."""] * 60
label_0_1_2 = [0] * 60 + [1] * 60 + [2] * 60
d = {'text': data_a_b_c, 'labels': label_0_1_2}
df = pd.DataFrame(data=d)
print(df.head(3))
print(df.tail(3))
print(df.info())
# Parameters
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
batch_size = 2
learning_rate = 3e-4
epochs = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
labels = pd.Series(df.labels.values).to_dict()
num_classes = 3
print(f'Tokenizer: {tokenizer}, Batch size:{batch_size}, Learning rate:{learning_rate}, Epochs:{epochs}')
print('Device: ', device)
print('Number of possible classes: ', num_classes)
# Model Architecture
class TransformerModel(nn.Module):
def __init__(self, num_classes, dropout=0.5):
super(TransformerModel, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768, num_classes)
self.relu = nn.ReLU()
def forward(self, input_id, mask):
_, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output)
final_layer = self.relu(linear_output)
return final_layer
# Prepare Data Function
def prepare_data(data, labels):
texts = tokenizer(data, padding='max_length', max_length=512, truncation=True, return_tensors="pt")
input_ids = texts['input_ids']
attention_mask = texts['attention_mask']
train_dataset = TensorDataset(input_ids, attention_mask, torch.LongTensor(labels))
dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
return dataloader
#Run Training Function
def run_training(train_dataloader, val_dataloader, epochs=epochs, lr=learning_rate):
def train(dataloader):
model.train()
total_acc, total_count = 0, 0
log_interval = 128
start_time = time.time()
for idx, (input_id, mask, label) in enumerate(train_dataloader):
# print(idx)
mask = mask.to(device)
input_id = input_id.to(device)
label = label.type(torch.LongTensor).to(device)
output = model(input_id, mask)
optimizer.zero_grad()
loss = criterion(output, label)
loss.backward()
# torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
optimizer.step()
total_acc += (output.argmax(1) == label).sum().item()
total_count += label.size(0)
if idx % log_interval == 0 and idx > 0:
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches '
'| accuracy {:8.3f}'.format(epoch, idx, len(dataloader),
total_acc / total_count))
total_acc, total_count = 0, 0
start_time = time.time()
def evaluate(dataloader):
model.eval()
total_acc, total_count = 0, 0
with torch.no_grad():
for idx, (input_id, mask, label) in enumerate(dataloader):
mask = mask.to(device)
input_id = input_id.to(device)
label = label.to(device)
output = model(input_id, mask)
total_acc += (output.argmax(1) == label).sum().item()
total_count += label.size(0)
return total_acc / total_count
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
device = 'cuda'
model.to(device)
total_accu = None
for epoch in range(1, epochs + 1):
epoch_start_time = time.time()
train(train_dataloader)
accu_val = evaluate(val_dataloader)
if total_accu is not None and total_accu > accu_val:
scheduler.step()
else:
total_accu = accu_val
print('-' * 59)
print('| end of epoch {:3d} | time: {:5.2f}s | '
'valid accuracy {:8.3f} '.format(epoch,
time.time() - epoch_start_time,
accu_val))
print('-' * 59)
# Data Split And Preparation
X_train, X_test, y_train, y_test = train_test_split(df.text.values.tolist(), df.labels.values.tolist(), test_size=0.2, random_state=2)
train_dataloader = prepare_data(X_train, y_train)
val_dataloader = prepare_data(X_test, y_test)
# Run The Model
model = TransformerModel(num_classes)
run_training(train_dataloader, val_dataloader)
print('finished')
# Save And Load The Model (if needed)
PATH = ".../Torch_BERT_model"
torch.save(model, os.path.join(PATH, "Toy_Data_BERT.pth"))
model = torch.load(os.path.join(PATH, "Toy_Data_BERT.pth"))
print(model)
# Laplace
from laplace import Laplace
la = Laplace(model, 'classification', subset_of_weights='last_layer', hessian_structure='full')
la.fit(train_dataloader)
Error I get:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) ~\AppData\Local\Temp\ipykernel_7144\3779742208.py in <cell line:
2>()
1 la = Laplace(model, 'classification', subset_of_weights='last_layer', hessian_structure='full')
----> 2 la.fit(train_dataloader)
~\anaconda3\lib\site-packages\laplace\lllaplace.py in fit(self,
train_loader, override)
98
99 if self.model.last_layer is None:
--> 100 X, _ = next(iter(train_loader))
101 with torch.no_grad():
102 try:
ValueError: too many values to unpack (expected 2)
Useful link for Laplace implementation with examples:
https://aleximmer.github.io/Laplace/#full-example-optimization-of-the-marginal-likelihood-and-prediction
Code that might help in debugging:
for x in train_dataloader:
print("The length of batch is:", len(x))
print()
print("The batch looks like:", x)
print()
print("The length of the first element in the batch is:") #embedding
print(len(x[0]))
print("The length of the second element in the batch is:") #1 if place is filled with word, 0 if it's empty?
print(len(x[1]))
print("The length of the third element in the batch is:") #category
print(len(x[2]))
print()
print("The lengths of the first tensor and second tensor in the first element in the batch is:")
print(len(x[0][0]), len(x[0][1])) # = max_length (512)
print("The lengths of the first tensor and second tensor in the second element in the batch is:")
print(len(x[1][0]), len(x[1][1])) # = max_length (512)
print()
print()
The laplace library expects that the dataloader returns two parameters (X,y) and that the model requires exactly one argument to make its prediction (code). But your model forward pass requires two arguments, namely input_id and mask, and your dataloader returns three arguments input_id, mask, and labels.
There are several ways to work around this limitation (e.g. return a dict with input_ids and attention_mask). The way that requires the least understanding of the internals of the laplace library is to generate the attention mask at runtime in the forward pass (not great for the performance):
class TransformerModel(nn.Module):
def __init__(self, num_classes, pad_id, dropout=0.5):
super(TransformerModel, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768, num_classes)
self.relu = nn.ReLU()
self.pad_id = pad_id
def forward(self, input_id):
mask = (input_ids!=self.pad_id).type(input_ids.dtype)
_, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output)
final_layer = self.relu(linear_output)
return final_layer
model = TransformerModel(num_classes, tokenizer.pad_token_id)
Related
I'm trying to reconstruct this paper about hierarchical autoencoder for paragraphs.
The idea is: Break a paragraph into sentences, then encode each sentence using an LSTM, and then using these encoding as an input for another LSTM that encode the entire paragraph.
Then, using a mirror decoder, decode the encoded paragraph using an LSTM into multiple sentences, and then use another LSTM to decode each word, with a linear layer on top and predicts the word.
The objective is to try to predict the original paragraph.
I've done some preprocessing, and right now I save each paragraph as a tensor of (maxSentence,maxWordsPerSentence,VocabSize), using one hot encoding.
My problem is, there model is not learning. The loss stays exactly the same and it doesn't seem as anything is happening.. I wasn't sure on how to calculate the loss (I've ran a batch all together and decoded it into multiple paragraphs, and then calculated the loss against the entire batch predictions, my train function is added below. I don't know if that is the problem (maybe I should calculate loss sentence by sentence instead the entire paragraph?) or maybe I have a problem in my model.
Encoder code:
class Encoder(nn.Module):
def __init__(self, input_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout):
super().__init__()
#self.embedding = nn.Embedding(input_dim, emb_dim)
self.rnn_sent = nn.GRU(input_dim, enc_hid_dim, bidirectional = True)
self.rnn_par = nn.GRU(enc_hid_dim*2, dec_hid_dim, bidirectional = True)
def forward(self, src):
outputs, hidden = self.rnn_sent(src[:,0,0])
total_out = outputs.unsqueeze(0).permute(1,0,2)
for i in range(1,src.shape[1]):
for j in range(src.shape[2]):
outputs, hidden = self.rnn_sent(src[:,i,j],hidden)
total_out = torch.cat((total_out,outputs.unsqueeze(0).permute(1,0,2)),dim=1)
outputs_par, hidden_par = self.rnn_par(total_out[:,0])
for i in range(total_out.shape[1]):
outputs_par, hidden_par = self.rnn_par(total_out[:,i],hidden_par)
return outputs_par, hidden_par
Decoder code:
class Decoder(nn.Module):
def __init__(self, output_dim, emb_dim, enc_hid_dim, dec_hid_dim, dropout, attention):
super().__init__()
self.output_dim = output_dim
self.attention = attention
#self.embedding = nn.Embedding(output_dim, emb_dim)
self.rnn_par = nn.GRU((enc_hid_dim * 2), dec_hid_dim*2)
self.rnn_sen = nn.GRU(output_dim, dec_hid_dim*2)
self.fc_out = nn.Linear(dec_hid_dim*2, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, input, hidden, encoder_outputs):
output, hidden = self.rnn_par(encoder_outputs)
all_par = output.unsqueeze(0).permute(1,0,2)
for i in range(1,max_par_len):
output,hidden = self.rnn_par(output,hidden)
all_par = torch.cat((all_par,output.unsqueeze(0).permute(1,0,2)),dim=1)
for i in range(max_par_len):
output_arg = self.fc_out(all_par[:,i])
#output_argmax = F.one_hot(output_arg.argmax(dim = 1), self.output_dim).to(torch.float)
output_argmax = torch.softmax(output_arg,dim=1)
output_sen, hidden_sen = self.rnn_sen(output_argmax)
all_par_sen = output_argmax.unsqueeze(0).permute(1,0,2)
for j in range(max_sen_len - 1):
output_sen,hidden_sen = self.rnn_sen(output_argmax,hidden_sen)
output_arg = self.fc_out(output_sen)
output_argmax = torch.softmax(output_arg,dim=1)
all_par_sen = torch.cat((all_par_sen,output_argmax.unsqueeze(0).permute(1,0,2)),dim=1)
if i == 0:
all_doc = all_par_sen.unsqueeze(0).permute(1,0,2,3)
else:
all_doc = torch.cat((all_doc,all_par_sen.unsqueeze(0).permute(1,0,2,3)),dim=1)
i+=1
return all_doc ,hidden_sen
And my train function:
def train(model, iterator, optimizer, criterion, clip, epoch):
model.train()
epoch_loss = 0
data = tqdm(iterator)
for i, batch in enumerate(data):
src = batch[0].to(device)#.to(torch.long)#.reshape(batch[0].shape[0],-1)
trg = batch[0].to(device)#.to(torch.long)#.reshape(batch[0].shape[0],-1)
target = torch.argmax(trg,dim=3).view(-1)
print(target)
optimizer.zero_grad()
output = model(src, trg).view(-1,OUTPUT_DIM)
loss = criterion(output, target)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
epoch_loss += loss.item()
N_EPOCHS = 20
CLIP = 1
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss(ignore_index = vocabulary['<pad>'])
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
start_time = time.time()
train_loader, valid_loader = data_loaders['train_loader'], data_loaders['test_loader']
train_loss = train(model, train_loader, optimizer, criterion, CLIP,f'{epoch+1}/{N_EPOCHS}')
#valid_loss = evaluate(model, valid_loader, criterion)
end_time = time.time()
epoch_mins, epoch_secs = epoch_time(start_time, end_time)
print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
I'm new to Neural Networks and I'm trying to train a CNN model on a custom dataset (cats and dogs images in a single directory). So I guess I do the very usual stuff here which is in the most tutorials, but just in case I will give here my full code.
First I generate .csv file to be processed:
import os
import torch
device = ("cuda" if torch.cuda.is_available() else "cpu")
train_df = pd.DataFrame(columns=["img_name","label"])
train_df["img_name"] = os.listdir("train/")
for idx, i in enumerate(os.listdir("train/")):
if "cat" in i:
train_df["label"][idx] = 0
if "dog" in i:
train_df["label"][idx] = 1
train_df.to_csv (r'train_csv.csv', index = False, header=True)
Then I prepare the dataset:
from torch.utils.data import Dataset
import pandas as pd
import os
from PIL import Image
import torch
class CatsAndDogsDataset(Dataset):
def __init__(self, root_dir, annotation_file, transform=None):
self.root_dir = root_dir
self.annotations = pd.read_csv(annotation_file)
self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self, index):
img_id = self.annotations.iloc[index, 0]
img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")
y_label = torch.tensor(float(self.annotations.iloc[index, 1]))
if self.transform is not None:
img = self.transform(img)
return (img, y_label)
This is my model:
import torch.nn as nn
import torchvision.models as models
class CNN(nn.Module):
def __init__(self, train_CNN=False, num_classes=1):
super(CNN, self).__init__()
self.train_CNN = train_CNN
self.inception = models.inception_v3(pretrained=True, aux_logits=False)
self.inception.fc = nn.Linear(self.inception.fc.in_features, num_classes)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.5)
self.sigmoid = nn.Sigmoid()
def forward(self, images):
features = self.inception(images)
return self.sigmoid(self.dropout(self.relu(features))).squeeze(1)
This is my hyper-params, transformations and dataloaders:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
num_epochs = 10
learning_rate = 0.00001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 0
transform = transforms.Compose(
[
transforms.Resize((356, 356)),
transforms.RandomCrop((299, 299)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]
)
dataset = CatsAndDogsDataset("train","train_csv.csv",transform=transform)
print(len(dataset))
train_set, validation_set = torch.utils.data.random_split(dataset,[162,40])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers,pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers, pin_memory=pin_memory)
model = CNN().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for name, param in model.inception.named_parameters():
if "fc.weight" in name or "fc.bias" in name:
param.requires_grad = True
else:
param.requires_grad = train_CNN
and accuracy check:
def check_accuracy(loader, model):
if loader == train_loader:
print("Checking accuracy on training data")
else:
print("Checking accuracy on validation data")
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
)
model.train()
return f"{float(num_correct)/float(num_samples)*100:.2f}"
And this is my training function:
from tqdm import tqdm
def train():
model.train()
for epoch in range(num_epochs):
loop = tqdm(train_loader, total = len(train_loader), leave = True)
if epoch % 2 == 0:
loop.set_postfix(val_acc = check_accuracy(validation_loader, model))
for imgs, labels in loop:
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loop.set_description(f"Epoch [{epoch}/{num_epochs}]")
loop.set_postfix(loss = loss.item())
if __name__ == "__main__":
train()
0%| | 0/6 [00:00<?, ?it/s]Checking accuracy on validation data
0%| | 0/6 [01:13<?, ?it/s, val_acc=60.00]Got 24 / 40 with accuracy 60.00
Epoch [0/10]: 100%|██████████| 6/6 [06:02<00:00, 60.39s/it, loss=0.693]
Epoch [1/10]: 100%|██████████| 6/6 [04:49<00:00, 48.23s/it, loss=0.693]
...
Epoch [8/10]: 100%|██████████| 6/6 [06:07<00:00, 61.29s/it, loss=0.693]
Epoch [9/10]: 100%|██████████| 6/6 [04:55<00:00, 49.19s/it, loss=0.781]
The model gets trained fine but when I try to use it for prediction I get different results each time I run this last piece in my Jupyter Notebooks:
model.eval()
img = Image.open('train/cat.22.png').convert("RGB")
img_t = transform(img)
batch_t = torch.unsqueeze(img_t, 0)
out = model(batch_t)
print(out)
tensor([0.5276], grad_fn=)
tensor([0.5000], grad_fn=)
tensor([0.5064], grad_fn=)
etc. Each time different result for the same image. Is this normal? Why this is happening?
I don't see you loading your trained model. This means every time you initialize the CNN module, the inception.fc layer will get initialized with random weights, this is most probably the reason why you are getting different results on each inference.
Edit: You have a random transform in your transformation pipeline, namely RandomCrop.
According to this answer on the use of model.eval(), I believe you might want to ensure that you have the lower half of the code cell wrapped in a with torch.no_grad(): context. I think it may still be learning/updating parameters unless inside that context.
I have trained a CNN model in PyTorch to detect skin diseases in 6 different classes. My model came out with an accuracy of 92% and I saved it in a .pth file. I wish to use this model for predictions but I don't know how to do so. If anyone can aid me in the necessary steps, I will be grateful.
I have tried just taking the image input straight from the folder, resizing it, and then running it through the model for predictions. The error I face is a ModuleAttributeAError which says there is no attribute named predict. Now I do not understand where I went wrong and I know this is a simple task for most but I was hoping for some guidance in this regard. The dataset I used is the Skin Cancer MNIST: HAM10000 dataset from Kaggle and trained it on ResNet18. If anyone has any pointers on fine-tuning the model, I would greatly appreciate it.
TLDR: I get an error called ModuleAttributeError that says the 'ResNet' module has no attribute 'predict'.
The image is preprocessed here as follows:
import os, cv2,itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
from tqdm import tqdm
from glob import glob
from PIL import Image
# pytorch libraries
import torch
from torch import optim,nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
from torchvision import models,transforms
# sklearn libraries
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
np.random.seed(10)
torch.manual_seed(10)
torch.cuda.manual_seed(10)
print(os.listdir("/content/drive/My Drive/input"))
from google.colab import drive
drive.mount('/content/drive')
"""**Data analysis and preprocessing**"""
data_dir = '/content/drive/My Drive/input'
all_image_path = glob(os.path.join(data_dir, '*', '*.jpg'))
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in all_image_path}
lesion_type_dict = {
'nv': 'Melanocytic nevi',
'mel': 'Melanoma',
'bkl': 'Benign keratosis-like lesions ',
'bcc': 'Basal cell carcinoma',
'akiec': 'Actinic keratoses',
'vasc': 'Vascular lesions',
'df': 'Dermatofibroma'
}
def compute_img_mean_std(image_paths):
"""
computing the mean and std of three channel on the whole dataset,
first we should normalize the image from 0-255 to 0-1
"""
img_h, img_w = 224, 224
imgs = []
means, stdevs = [], []
for i in tqdm(range(len(image_paths))):
img = cv2.imread(image_paths[i])
img = cv2.resize(img, (img_h, img_w))
imgs.append(img)
imgs = np.stack(imgs, axis=3)
print(imgs.shape)
imgs = imgs.astype(np.float32) / 255.
for i in range(3):
pixels = imgs[:, :, i, :].ravel() # resize to one row
means.append(np.mean(pixels))
stdevs.append(np.std(pixels))
means.reverse() # BGR --> RGB
stdevs.reverse()
print("normMean = {}".format(means))
print("normStd = {}".format(stdevs))
return means,stdevs
# norm_mean,norm_std = compute_img_mean_std(all_image_path)
norm_mean = (0.763035, 0.54564625, 0.5700399)
norm_std = (0.1409281, 0.15261264, 0.16997051)
df_original = pd.read_csv(os.path.join(data_dir, 'HAM10000_metadata.csv'))
df_original['path'] = df_original['image_id'].map(imageid_path_dict.get)
df_original['cell_type'] = df_original['dx'].map(lesion_type_dict.get)
df_original['cell_type_idx'] = pd.Categorical(df_original['cell_type']).codes
df_original.head()
# this will tell us how many images are associated with each lesion_id
df_undup = df_original.groupby('lesion_id').count()
# now we filter out lesion_id's that have only one image associated with it
df_undup = df_undup[df_undup['image_id'] == 1]
df_undup.reset_index(inplace=True)
df_undup.head()
# here we identify lesion_id's that have duplicate images and those that have only one image.
def get_duplicates(x):
unique_list = list(df_undup['lesion_id'])
if x in unique_list:
return 'unduplicated'
else:
return 'duplicated'
# create a new colum that is a copy of the lesion_id column
df_original['duplicates'] = df_original['lesion_id']
# apply the function to this new column
df_original['duplicates'] = df_original['duplicates'].apply(get_duplicates)
df_original.head()
df_original['duplicates'].value_counts()
# now we filter out images that don't have duplicates
df_undup = df_original[df_original['duplicates'] == 'unduplicated']
df_undup.shape
# now we create a val set using df because we are sure that none of these images have augmented duplicates in the train set
y = df_undup['cell_type_idx']
_, df_val = train_test_split(df_undup, test_size=0.2, random_state=101, stratify=y)
df_val.shape
df_val['cell_type_idx'].value_counts()
# This set will be df_original excluding all rows that are in the val set
# This function identifies if an image is part of the train or val set.
def get_val_rows(x):
# create a list of all the lesion_id's in the val set
val_list = list(df_val['image_id'])
if str(x) in val_list:
return 'val'
else:
return 'train'
# identify train and val rows
# create a new colum that is a copy of the image_id column
df_original['train_or_val'] = df_original['image_id']
# apply the function to this new column
df_original['train_or_val'] = df_original['train_or_val'].apply(get_val_rows)
# filter out train rows
df_train = df_original[df_original['train_or_val'] == 'train']
print(len(df_train))
print(len(df_val))
df_train['cell_type_idx'].value_counts()
df_val['cell_type'].value_counts()
# Copy fewer class to balance the number of 7 classes
data_aug_rate = [15,10,5,50,0,40,5]
for i in range(7):
if data_aug_rate[i]:
df_train=df_train.append([df_train.loc[df_train['cell_type_idx'] == i,:]]*(data_aug_rate[i]-1), ignore_index=True)
df_train['cell_type'].value_counts()
# # We can split the test set again in a validation set and a true test set:
# df_val, df_test = train_test_split(df_val, test_size=0.5)
df_train = df_train.reset_index()
df_val = df_val.reset_index()
# df_test = df_test.reset_index()
Here is where I build the model:
# feature_extract is a boolean that defines if we are finetuning or feature extracting.
# If feature_extract = False, the model is finetuned and all model parameters are updated.
# If feature_extract = True, only the last layer parameters are updated, the others remain fixed.
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
# Initialize these variables which will be set in this if statement. Each of these
# variables is model specific.
model_ft = None
input_size = 0
if model_name == "resnet":
""" Resnet18, resnet34, resnet50, resnet101
"""
model_ft = models.resnet18(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "vgg":
""" VGG11_bn
"""
model_ft = models.vgg11_bn(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
input_size = 224
elif model_name == "densenet":
""" Densenet121
"""
model_ft = models.densenet121(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "inception":
""" Inception v3
"""
model_ft = models.inception_v3(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
# Handle the auxilary net
num_ftrs = model_ft.AuxLogits.fc.in_features
model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
# Handle the primary net
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs,num_classes)
input_size = 299
else:
print("Invalid model name, exiting...")
exit()
return model_ft, input_size
# resnet,vgg,densenet,inception
model_name = 'resnet'
num_classes = 7
feature_extract = False
# Initialize the model for this run
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
# Define the device:
device = torch.device('cuda:0')
# Put the model on the device:
model = model_ft.to(device)
# norm_mean = (0.49139968, 0.48215827, 0.44653124)
# norm_std = (0.24703233, 0.24348505, 0.26158768)
# define the transformation of the train images.
train_transform = transforms.Compose([transforms.Resize((input_size,input_size)),transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),transforms.RandomRotation(20),
transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std)])
# define the transformation of the val images.
val_transform = transforms.Compose([transforms.Resize((input_size,input_size)), transforms.ToTensor(),
transforms.Normalize(norm_mean, norm_std)])
# Define a pytorch dataloader for this dataset
class HAM10000(Dataset):
def __init__(self, df, transform=None):
self.df = df
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, index):
# Load data and get label
X = Image.open(self.df['path'][index])
y = torch.tensor(int(self.df['cell_type_idx'][index]))
if self.transform:
X = self.transform(X)
return X, y
# Define the training set using the table train_df and using our defined transitions (train_transform)
training_set = HAM10000(df_train, transform=train_transform)
train_loader = DataLoader(training_set, batch_size=64, shuffle=True, num_workers=4)
# Same for the validation set:
validation_set = HAM10000(df_val, transform=train_transform)
val_loader = DataLoader(validation_set, batch_size=64, shuffle=False, num_workers=4)
# we use Adam optimizer, use cross entropy loss as our loss function
optimizer = optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.CrossEntropyLoss().to(device)
Lastly, is the training process with a prediction function:
# this function is used during training process, to calculation the loss and accuracy
class AverageMeter(object):
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
total_loss_train, total_acc_train = [],[]
def train(train_loader, model, criterion, optimizer, epoch):
model.train()
train_loss = AverageMeter()
train_acc = AverageMeter()
curr_iter = (epoch - 1) * len(train_loader)
for i, data in enumerate(train_loader):
images, labels = data
N = images.size(0)
# print('image shape:',images.size(0), 'label shape',labels.size(0))
images = Variable(images).to(device)
labels = Variable(labels).to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
prediction = outputs.max(1, keepdim=True)[1]
train_acc.update(prediction.eq(labels.view_as(prediction)).sum().item()/N)
train_loss.update(loss.item())
curr_iter += 1
if (i + 1) % 100 == 0:
print('[epoch %d], [iter %d / %d], [train loss %.5f], [train acc %.5f]' % (
epoch, i + 1, len(train_loader), train_loss.avg, train_acc.avg))
total_loss_train.append(train_loss.avg)
total_acc_train.append(train_acc.avg)
return train_loss.avg, train_acc.avg
def validate(val_loader, model, criterion, optimizer, epoch):
model.eval()
val_loss = AverageMeter()
val_acc = AverageMeter()
with torch.no_grad():
for i, data in enumerate(val_loader):
images, labels = data
N = images.size(0)
images = Variable(images).to(device)
labels = Variable(labels).to(device)
outputs = model(images)
prediction = outputs.max(1, keepdim=True)[1]
val_acc.update(prediction.eq(labels.view_as(prediction)).sum().item()/N)
val_loss.update(criterion(outputs, labels).item())
print('------------------------------------------------------------')
print('[epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss.avg, val_acc.avg))
print('------------------------------------------------------------')
return val_loss.avg, val_acc.avg
import cv2
from PIL import Image, ImageOps
import numpy as np
model = model_ft
model.load_state_dict(torch.load("/content/drive/MyDrive/input/trainbest.pth"))
model.eval()
def import_and_predict(image_data, model):
size = (224, 224)
image = ImageOps.fit(image_data, size, Image.ANTIALIAS)
img = np.asarray(image)
image_reshape = img[np.newaxis,...]
prediction = model.predict(img_reshape)
return prediction
image = Image.open('/content/0365-0596-abd-88-05-0712-gf03.jpg')
# st.image(image, use_column_width = True)
predictions = import_and_predict(image, model)
class_names = ["Melanocytic nevi", "dermatofibroma", "Benign keratosis-like lesions", "Basal cell carcinoma", "Actinic keratoses", "Vascular lesions", "Dermatofibroma"]
string = "It is: " + class_names[np.argmax(predictions)]
print(string)
Here is the error that comes immediately after this is executed.
---------------------------------------------------------------------------
ModuleAttributeError Traceback (most recent call last)
<ipython-input-219-d563271b78c6> in <module>()
32 image = Image.open('/content/0365-0596-abd-88-05-0712-gf03.jpg')
33 # st.image(image, use_column_width = True)
---> 34 predictions = import_and_predict(image, model)
35 class_names = ["Melanocytic nevi", "dermatofibroma", "Benign keratosis-like lesions", "Basal cell carcinoma", "Actinic keratoses", "Vascular lesions", "Dermatofibroma"]
36 string = "It is: " + class_names[np.argmax(predictions)]
1 frames
<ipython-input-219-d563271b78c6> in import_and_predict(image_data, model)
27 img = np.asarray(image)
28 image_reshape = img[np.newaxis,...]
---> 29 prediction = model.predict(img_reshape)
30 return prediction
31
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __getattr__(self, name)
777 return modules[name]
778 raise ModuleAttributeError("'{}' object has no attribute '{}'".format(
--> 779 type(self).__name__, name))
780
781 def __setattr__(self, name: str, value: Union[Tensor, 'Module']) -> None:
ModuleAttributeError: 'ResNet' object has no attribute 'predict'
If anyone can help me fix the issue and get this to work as a classifier for skin diseases, I would be ever so thankful.
nn.Module don't have a predict function, just call the object for inference:
prediction = model(img_reshape)
This will call the object's __call__ function which, in turns, callsthe model forward function.
I'm working on a project (my first AI project) and I've hit a bit of a wall. When performing testing on my trained classifier, it's predicting that everything is of class 1. Now the data set is heavily biased to class 1; however, I've implemented weights to compensate for this. Just concerned that I've coded this wrong or missed something. Please let me know if you see anything.
This is the setup and training
batchSize = 50
trainingLoad = DataLoader(trainingData, shuffle = True, batch_size = batchSize, drop_last=True)
validationLoad = DataLoader(validationData, shuffle = True, batch_size = batchSize, drop_last=True)
testingLoad = DataLoader(testingData, shuffle = True, batch_size = batchSize, drop_last=True)
vocabularySize = len(wordToNoDict)
output = 3
embedding = 400
hiddenDimension = 524
layers = 4
classifierModel = Classifier.HateSpeechDetector(device, vocabularySize, output, embedding, hiddenDimension, layers)
classifierModel.to(device)
path = 'Program\data\state_dict2.pt'
weights = torch.tensor([1203/1203, 1203/15389, 1203/3407])
criterion = nn.CrossEntropyLoss(weight = weights)
trainClassifier(classifierModel, trainingLoad, validationLoad, device, batchSize, criterion, path)
test(classifierModel, path, testingLoad, batchSize, device, criterion)
def trainClassifier(model, trainingData, validationData, device, batchSize, criterion, path):
epochs = 5
counter = 0
testWithValiEvery = 10
clip = 5
valid_loss_min = np.Inf
lr=0.0001
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
model.train()
for i in range(epochs):
h = model.init_hidden(batchSize, device)
for inputs, labels in trainingData:
h = tuple([e.data for e in h])
inputs, labels = inputs.to(device), labels.to(device)
model.zero_grad()
output, h = model(inputs, h)
loss = criterion(output.squeeze(), labels.long())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
counter += 1
print(counter)
if counter%testWithValiEvery == 0:
print("validating")
val_h = model.init_hidden(batchSize, device)
val_losses = []
model.eval()
for inp, lab in validationData:
val_h = tuple([each.data for each in val_h])
inp, lab = inp.to(device), lab.to(device)
out, val_h = model(inp, val_h)#
val_loss = criterion(out.squeeze(), lab.long())
val_losses.append(val_loss.item())
model.train()
print("Epoch: {}/{}...".format(i+1, epochs),
"Step: {}...".format(counter),
"Loss: {:.6f}...".format(loss.item()),
"Val Loss: {:.6f}".format(np.mean(val_losses)))
if np.mean(val_losses) <= valid_loss_min:
torch.save(model.state_dict(), path)
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,np.mean(val_losses)))
print('model saved')
valid_loss_min = np.mean(val_losses)
This is the classifier - Fair amount of random commenting here where i've meddled with bits
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as op
import torchvision
from torch.utils.data import TensorDataset, DataLoader
from torchvision import transforms, datasets
class HateSpeechDetector(nn.Module):
def __init__(self, device, vocabularySize, output, embedding, hidden, layers, dropProb=0.5):
super(HateSpeechDetector, self).__init__()
#Number of outputs (Classes/Categories)
self.output = output
#Number of layers in the LSTM
self.numLayers = layers
#Number of hidden neurons in each LSTM layer
self.hiddenDimensions = hidden
#Device being used for by model (CPU or GPU)
self.device = device
#Embedding layer finds correlations in words by converting word integers into vectors
self.embedding = nn.Embedding(vocabularySize, embedding)
#LSTM stores important data in memory, using it to help with future predictions
self.lstm = nn.LSTM(embedding,hidden,layers,dropout=dropProb,batch_first=True)
#Dropout is used to randomly drop nodes. This helps to prevent overfitting of the model during training
self.dropout = nn.Dropout(dropProb)
#Establishing 4 simple layers and a sigmoid output
self.fc = nn.Linear(hidden, hidden)
self.fc2 = nn.Linear(hidden, hidden)
self.fc3 = nn.Linear(hidden, hidden)
self.fc4 = nn.Linear(hidden, hidden)
self.fc5 = nn.Linear(hidden, hidden)
self.fc6 = nn.Linear(hidden, output)
self.softmax = nn.Softmax(dim=2)
def forward(self, x, hidden):
batchSize = x.size(0)
x = x.long()
embeds = self.embedding(x)
lstm_out, hidden = self.lstm(embeds, hidden)
#Tensor changes here from 250,33,524 to 8250,524
# lstm_out = lstm_out.contiguous().view(-1,self.hiddenDimensions)
out = self.dropout(lstm_out)
out = self.fc(out)
out = self.fc2(out)
out = self.fc3(out)
out = self.fc4(out)
out = self.fc5(out)
out = self.fc6(out)
out = self.softmax(out)
out = out[:,-1,:]
# myTensor = torch.Tensor([0,0,0])
# newOut = torch.zeros(batchSize, self.output)
# count = 0
# row = 0
# for tensor in out:
# if(count == 33):
# newOut[row] = myTensor/33
# myTensor = torch.Tensor([0,0,0])
# row += 1
# count = 0
# myTensor += tensor
# count += 1
return out, hidden
def init_hidden(self, batchSize, device):
weight = next(self.parameters()).data
hidden = (weight.new(self.numLayers, batchSize, self.hiddenDimensions).zero_().to(device), weight.new(self.numLayers, batchSize, self.hiddenDimensions).zero_().to(device))
return hidden
You've added weights to the cross-entropy loss, and the weights bias towards the first class already ([1.0, 0.08, 0.35]).
Having a higher weight for a certain class means that the model will be more heavily penalized for getting that class wrong, and it's possible for the model to learn to just predict everything as the class with highest weight. Usually you don't need to manually assign weights.
Also, check your data to see if there's label imbalance, i.e., whether you have more training examples that are of the first class. An imbalanced training set has similar effects as setting different weights on the loss.
summary
I'm adding alphabets to captcha recognition, but pytorch's CTC seems to not working properly when alphabets are added.
What I've tried
At first, I modified BLANK_LABEL to 62 since there are 62 labels(0-9, a-z, A-Z), but it gives me runtime error blank must be in label range. I also tried BLANK_LABEL=0 and then assigning 1~63 as nonblank labels but it outputs NaN as loss.
The code
This is the colab link for the current version of my code: here
below are just core parts of the code.
Constants:
DATASET_PATH = "/home/ik1ne/Downloads/numbers"
MODEL_PATH = "/home/ik1ne/Downloads"
BATCH_SIZE = 50
TRAIN_BATCHES = 180
TEST_BATCHES = 20
TOTAL_BATCHES = TRAIN_BATCHES+TEST_BATCHES
TOTAL_DATASET = BATCH_SIZE*TOTAL_BATCHES
BLANK_LABEL = 63
dataset generation:
!pip install captcha
from captcha.image import ImageCaptcha
import itertools
import os
import random
import string
if not os.path.exists(DATASET_PATH):
os.makedirs(DATASET_PATH)
characters = "0123456789"+string.ascii_lowercase + string.ascii_uppercase
while(len(list(Path(DATASET_PATH).glob('*'))) < TOTAL_BATCHES):
captcha_str = "".join(random.choice(characters) for x in range(6))
if captcha_str in list(Path(DATASET_PATH).glob('*')):
continue
ImageCaptcha().write(captcha_str, f"{DATASET_PATH}/{captcha_str}.png")
dataset:
def convert_strseq_to_numseq(s):
for c in s:
if c >= '0' and c <= '9':
return int(c)
elif c>='a' and c <='z':
return ord(c)-ord('a')+10
else:
return ord(c)-ord('A')+36
class CaptchaDataset(Dataset):
"""CAPTCHA dataset."""
def __init__(self, root_dir, transform=None):
self.root_dir = root_dir
self.image_paths = list(Path(root_dir).glob('*'))
self.transform = transform
def __getitem__(self, index):
image = Image.open(self.image_paths[index])
if self.transform:
image = self.transform(image)
label_sequence = [convert_strseq_to_numseq(c) for c in self.image_paths[index].stem]
return (image, torch.tensor(label_sequence))
def __len__(self):
return len(self.image_paths)
model:
class StackedLSTM(nn.Module):
def __init__(self, input_size=60, output_size=11, hidden_size=512, num_layers=2):
super(StackedLSTM, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.dropout = nn.Dropout()
self.fc = nn.Linear(hidden_size, output_size)
self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
def forward(self, inputs, hidden):
batch_size, seq_len, input_size = inputs.shape
outputs, hidden = self.lstm(inputs, hidden)
outputs = self.dropout(outputs)
outputs = torch.stack([self.fc(outputs[i]) for i in range(width)])
outputs = F.log_softmax(outputs, dim=2)
return outputs, hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
return (weight.new(self.num_layers, batch_size, self.hidden_size).zero_(),
weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
net = StackedLSTM().to(device)
training:
net.train() # set network to training phase
epochs = 30
# for each pass of the training dataset
for epoch in range(epochs):
train_loss, train_correct, train_total = 0, 0, 0
h = net.init_hidden(BATCH_SIZE)
# for each batch of training examples
for batch_index, (inputs, targets) in enumerate(train_dataloader):
inputs, targets = inputs.to(device), targets.to(device)
h = tuple([each.data for each in h])
BATCH_SIZE, channels, height, width = inputs.shape
# reshape inputs: NxCxHxW -> WxNx(HxC)
inputs = (inputs
.permute(3, 0, 2, 1)
.contiguous()
.view((width, BATCH_SIZE, -1)))
optimizer.zero_grad() # zero the parameter gradients
outputs, h = net(inputs, h) # forward pass
# compare output with ground truth
input_lengths = torch.IntTensor(BATCH_SIZE).fill_(width)
target_lengths = torch.IntTensor([len(t) for t in targets])
loss = criterion(outputs, targets, input_lengths, target_lengths)
loss.backward() # backpropagation
nn.utils.clip_grad_norm_(net.parameters(), 10) # clip gradients
optimizer.step() # update network weights
# record statistics
prob, max_index = torch.max(outputs, dim=2)
train_loss += loss.item()
train_total += len(targets)
for i in range(BATCH_SIZE):
raw_pred = list(max_index[:, i].cpu().numpy())
pred = [c for c, _ in groupby(raw_pred) if c != BLANK_LABEL]
target = list(targets[i].cpu().numpy())
if pred == target:
train_correct += 1
# print statistics every 10 batches
if (batch_index + 1) % 10 == 0:
print(f'Epoch {epoch + 1}/{epochs}, ' +
f'Batch {batch_index + 1}/{len(train_dataloader)}, ' +
f'Train Loss: {(train_loss/1):.5f}, ' +
f'Train Accuracy: {(train_correct/train_total):.5f}')
train_loss, train_correct, train_total = 0, 0, 0
This error will occur when the index of blank is larger than the total number of classes, which equals number of chars + blank. What's more, the index starts from 0, instead of 1, so if you have 62 characters in total, their index should be 0-61 and the index of blank should be 62 instead of 63. (Or you can set blank as 0, other characters from 1-62)
You should also check the shape of the output tensor, it should has shape [T, B, C], where T is the time step length, B is the batch size, C is the class num, remember to add blank in to the class num or you will meet the problem
Most probably there is some problem with net shape when it's sent to CTC loss, but you should have provided the dataset to us to see the net's shape. It should be (T,N,C) , where T=input length, N=batch size, C=number of classes. And as I understand blank symbol id should in the 0..C range. Also, you should add blank symbol, for example '-' to the alphabet.