I have trained a CNN model in PyTorch to detect skin diseases in 6 different classes. My model came out with an accuracy of 92% and I saved it in a .pth file. I wish to use this model for predictions but I don't know how to do so. If anyone can aid me in the necessary steps, I will be grateful.
I have tried just taking the image input straight from the folder, resizing it, and then running it through the model for predictions. The error I face is a ModuleAttributeAError which says there is no attribute named predict. Now I do not understand where I went wrong and I know this is a simple task for most but I was hoping for some guidance in this regard. The dataset I used is the Skin Cancer MNIST: HAM10000 dataset from Kaggle and trained it on ResNet18. If anyone has any pointers on fine-tuning the model, I would greatly appreciate it.
TLDR: I get an error called ModuleAttributeError that says the 'ResNet' module has no attribute 'predict'.
The image is preprocessed here as follows:
import os, cv2,itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
from tqdm import tqdm
from glob import glob
from PIL import Image
# pytorch libraries
import torch
from torch import optim,nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
from torchvision import models,transforms
# sklearn libraries
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
np.random.seed(10)
torch.manual_seed(10)
torch.cuda.manual_seed(10)
print(os.listdir("/content/drive/My Drive/input"))
from google.colab import drive
drive.mount('/content/drive')
"""**Data analysis and preprocessing**"""
data_dir = '/content/drive/My Drive/input'
all_image_path = glob(os.path.join(data_dir, '*', '*.jpg'))
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in all_image_path}
lesion_type_dict = {
'nv': 'Melanocytic nevi',
'mel': 'Melanoma',
'bkl': 'Benign keratosis-like lesions ',
'bcc': 'Basal cell carcinoma',
'akiec': 'Actinic keratoses',
'vasc': 'Vascular lesions',
'df': 'Dermatofibroma'
}
def compute_img_mean_std(image_paths):
"""
computing the mean and std of three channel on the whole dataset,
first we should normalize the image from 0-255 to 0-1
"""
img_h, img_w = 224, 224
imgs = []
means, stdevs = [], []
for i in tqdm(range(len(image_paths))):
img = cv2.imread(image_paths[i])
img = cv2.resize(img, (img_h, img_w))
imgs.append(img)
imgs = np.stack(imgs, axis=3)
print(imgs.shape)
imgs = imgs.astype(np.float32) / 255.
for i in range(3):
pixels = imgs[:, :, i, :].ravel() # resize to one row
means.append(np.mean(pixels))
stdevs.append(np.std(pixels))
means.reverse() # BGR --> RGB
stdevs.reverse()
print("normMean = {}".format(means))
print("normStd = {}".format(stdevs))
return means,stdevs
# norm_mean,norm_std = compute_img_mean_std(all_image_path)
norm_mean = (0.763035, 0.54564625, 0.5700399)
norm_std = (0.1409281, 0.15261264, 0.16997051)
df_original = pd.read_csv(os.path.join(data_dir, 'HAM10000_metadata.csv'))
df_original['path'] = df_original['image_id'].map(imageid_path_dict.get)
df_original['cell_type'] = df_original['dx'].map(lesion_type_dict.get)
df_original['cell_type_idx'] = pd.Categorical(df_original['cell_type']).codes
df_original.head()
# this will tell us how many images are associated with each lesion_id
df_undup = df_original.groupby('lesion_id').count()
# now we filter out lesion_id's that have only one image associated with it
df_undup = df_undup[df_undup['image_id'] == 1]
df_undup.reset_index(inplace=True)
df_undup.head()
# here we identify lesion_id's that have duplicate images and those that have only one image.
def get_duplicates(x):
unique_list = list(df_undup['lesion_id'])
if x in unique_list:
return 'unduplicated'
else:
return 'duplicated'
# create a new colum that is a copy of the lesion_id column
df_original['duplicates'] = df_original['lesion_id']
# apply the function to this new column
df_original['duplicates'] = df_original['duplicates'].apply(get_duplicates)
df_original.head()
df_original['duplicates'].value_counts()
# now we filter out images that don't have duplicates
df_undup = df_original[df_original['duplicates'] == 'unduplicated']
df_undup.shape
# now we create a val set using df because we are sure that none of these images have augmented duplicates in the train set
y = df_undup['cell_type_idx']
_, df_val = train_test_split(df_undup, test_size=0.2, random_state=101, stratify=y)
df_val.shape
df_val['cell_type_idx'].value_counts()
# This set will be df_original excluding all rows that are in the val set
# This function identifies if an image is part of the train or val set.
def get_val_rows(x):
# create a list of all the lesion_id's in the val set
val_list = list(df_val['image_id'])
if str(x) in val_list:
return 'val'
else:
return 'train'
# identify train and val rows
# create a new colum that is a copy of the image_id column
df_original['train_or_val'] = df_original['image_id']
# apply the function to this new column
df_original['train_or_val'] = df_original['train_or_val'].apply(get_val_rows)
# filter out train rows
df_train = df_original[df_original['train_or_val'] == 'train']
print(len(df_train))
print(len(df_val))
df_train['cell_type_idx'].value_counts()
df_val['cell_type'].value_counts()
# Copy fewer class to balance the number of 7 classes
data_aug_rate = [15,10,5,50,0,40,5]
for i in range(7):
if data_aug_rate[i]:
df_train=df_train.append([df_train.loc[df_train['cell_type_idx'] == i,:]]*(data_aug_rate[i]-1), ignore_index=True)
df_train['cell_type'].value_counts()
# # We can split the test set again in a validation set and a true test set:
# df_val, df_test = train_test_split(df_val, test_size=0.5)
df_train = df_train.reset_index()
df_val = df_val.reset_index()
# df_test = df_test.reset_index()
Here is where I build the model:
# feature_extract is a boolean that defines if we are finetuning or feature extracting.
# If feature_extract = False, the model is finetuned and all model parameters are updated.
# If feature_extract = True, only the last layer parameters are updated, the others remain fixed.
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
# Initialize these variables which will be set in this if statement. Each of these
# variables is model specific.
model_ft = None
input_size = 0
if model_name == "resnet":
""" Resnet18, resnet34, resnet50, resnet101
"""
model_ft = models.resnet18(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "vgg":
""" VGG11_bn
"""
model_ft = models.vgg11_bn(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
input_size = 224
elif model_name == "densenet":
""" Densenet121
"""
model_ft = models.densenet121(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "inception":
""" Inception v3
"""
model_ft = models.inception_v3(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
# Handle the auxilary net
num_ftrs = model_ft.AuxLogits.fc.in_features
model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
# Handle the primary net
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs,num_classes)
input_size = 299
else:
print("Invalid model name, exiting...")
exit()
return model_ft, input_size
# resnet,vgg,densenet,inception
model_name = 'resnet'
num_classes = 7
feature_extract = False
# Initialize the model for this run
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
# Define the device:
device = torch.device('cuda:0')
# Put the model on the device:
model = model_ft.to(device)
# norm_mean = (0.49139968, 0.48215827, 0.44653124)
# norm_std = (0.24703233, 0.24348505, 0.26158768)
# define the transformation of the train images.
train_transform = transforms.Compose([transforms.Resize((input_size,input_size)),transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),transforms.RandomRotation(20),
transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std)])
# define the transformation of the val images.
val_transform = transforms.Compose([transforms.Resize((input_size,input_size)), transforms.ToTensor(),
transforms.Normalize(norm_mean, norm_std)])
# Define a pytorch dataloader for this dataset
class HAM10000(Dataset):
def __init__(self, df, transform=None):
self.df = df
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, index):
# Load data and get label
X = Image.open(self.df['path'][index])
y = torch.tensor(int(self.df['cell_type_idx'][index]))
if self.transform:
X = self.transform(X)
return X, y
# Define the training set using the table train_df and using our defined transitions (train_transform)
training_set = HAM10000(df_train, transform=train_transform)
train_loader = DataLoader(training_set, batch_size=64, shuffle=True, num_workers=4)
# Same for the validation set:
validation_set = HAM10000(df_val, transform=train_transform)
val_loader = DataLoader(validation_set, batch_size=64, shuffle=False, num_workers=4)
# we use Adam optimizer, use cross entropy loss as our loss function
optimizer = optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.CrossEntropyLoss().to(device)
Lastly, is the training process with a prediction function:
# this function is used during training process, to calculation the loss and accuracy
class AverageMeter(object):
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
total_loss_train, total_acc_train = [],[]
def train(train_loader, model, criterion, optimizer, epoch):
model.train()
train_loss = AverageMeter()
train_acc = AverageMeter()
curr_iter = (epoch - 1) * len(train_loader)
for i, data in enumerate(train_loader):
images, labels = data
N = images.size(0)
# print('image shape:',images.size(0), 'label shape',labels.size(0))
images = Variable(images).to(device)
labels = Variable(labels).to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
prediction = outputs.max(1, keepdim=True)[1]
train_acc.update(prediction.eq(labels.view_as(prediction)).sum().item()/N)
train_loss.update(loss.item())
curr_iter += 1
if (i + 1) % 100 == 0:
print('[epoch %d], [iter %d / %d], [train loss %.5f], [train acc %.5f]' % (
epoch, i + 1, len(train_loader), train_loss.avg, train_acc.avg))
total_loss_train.append(train_loss.avg)
total_acc_train.append(train_acc.avg)
return train_loss.avg, train_acc.avg
def validate(val_loader, model, criterion, optimizer, epoch):
model.eval()
val_loss = AverageMeter()
val_acc = AverageMeter()
with torch.no_grad():
for i, data in enumerate(val_loader):
images, labels = data
N = images.size(0)
images = Variable(images).to(device)
labels = Variable(labels).to(device)
outputs = model(images)
prediction = outputs.max(1, keepdim=True)[1]
val_acc.update(prediction.eq(labels.view_as(prediction)).sum().item()/N)
val_loss.update(criterion(outputs, labels).item())
print('------------------------------------------------------------')
print('[epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss.avg, val_acc.avg))
print('------------------------------------------------------------')
return val_loss.avg, val_acc.avg
import cv2
from PIL import Image, ImageOps
import numpy as np
model = model_ft
model.load_state_dict(torch.load("/content/drive/MyDrive/input/trainbest.pth"))
model.eval()
def import_and_predict(image_data, model):
size = (224, 224)
image = ImageOps.fit(image_data, size, Image.ANTIALIAS)
img = np.asarray(image)
image_reshape = img[np.newaxis,...]
prediction = model.predict(img_reshape)
return prediction
image = Image.open('/content/0365-0596-abd-88-05-0712-gf03.jpg')
# st.image(image, use_column_width = True)
predictions = import_and_predict(image, model)
class_names = ["Melanocytic nevi", "dermatofibroma", "Benign keratosis-like lesions", "Basal cell carcinoma", "Actinic keratoses", "Vascular lesions", "Dermatofibroma"]
string = "It is: " + class_names[np.argmax(predictions)]
print(string)
Here is the error that comes immediately after this is executed.
---------------------------------------------------------------------------
ModuleAttributeError Traceback (most recent call last)
<ipython-input-219-d563271b78c6> in <module>()
32 image = Image.open('/content/0365-0596-abd-88-05-0712-gf03.jpg')
33 # st.image(image, use_column_width = True)
---> 34 predictions = import_and_predict(image, model)
35 class_names = ["Melanocytic nevi", "dermatofibroma", "Benign keratosis-like lesions", "Basal cell carcinoma", "Actinic keratoses", "Vascular lesions", "Dermatofibroma"]
36 string = "It is: " + class_names[np.argmax(predictions)]
1 frames
<ipython-input-219-d563271b78c6> in import_and_predict(image_data, model)
27 img = np.asarray(image)
28 image_reshape = img[np.newaxis,...]
---> 29 prediction = model.predict(img_reshape)
30 return prediction
31
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __getattr__(self, name)
777 return modules[name]
778 raise ModuleAttributeError("'{}' object has no attribute '{}'".format(
--> 779 type(self).__name__, name))
780
781 def __setattr__(self, name: str, value: Union[Tensor, 'Module']) -> None:
ModuleAttributeError: 'ResNet' object has no attribute 'predict'
If anyone can help me fix the issue and get this to work as a classifier for skin diseases, I would be ever so thankful.
nn.Module don't have a predict function, just call the object for inference:
prediction = model(img_reshape)
This will call the object's __call__ function which, in turns, callsthe model forward function.
Related
I'm trying to implement the Laplace Posteriori Approximation on the last layer for the classification results obtained by BERT model. I get an error regarding input size, and after I fix it by extracting just embeddings and class labels from BERT to feed them into Laplace, I get another bunch of errors regarding input dimensions that I don't know how to debug.
As this is something I didn't find on the internet, and includes relatively new libraries, I will post here just the first error I got, code that might help in debugging and useful links.
I will update post if needed.
Of course, if someone knows how to implement Laplace Posteriori Approximation with BERT in some other library like Scikit or Trax, it would be helpful. Also, some other Transformer classification model with some other confidence approximation will be useful for me. Any help is appreciated!
Code:
# Import
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torch import nn
from transformers import BertTokenizer
from transformers import BertModel
from transformers import BertForSequenceClassification
from sklearn.model_selection import train_test_split
import time
import os
#Toy Data
data_a_b_c = ["""category a. This is category a. In category a we talk about animals.
This category includes lions, fish, tigers, birds, elephants, mouses, dogs, cats, and all other animals."""] * 60 \
+ ["""category b. This is category b. In category b we talk about people. This category members are
Abraham Maslow, John Lennon, Drazen Petrovic, Nikola Tesla, Slavoljub Penkala, Nenad Bakic and Larry Page."""] * 60 \
+ ["""category c. This is category c. Category c is dedicated to car brands like Lamborgini, Rimac-Buggati, BMW, Mercedes,
Honda, Opel, Wolkswagen, and etc."""] * 60
label_0_1_2 = [0] * 60 + [1] * 60 + [2] * 60
d = {'text': data_a_b_c, 'labels': label_0_1_2}
df = pd.DataFrame(data=d)
print(df.head(3))
print(df.tail(3))
print(df.info())
# Parameters
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
batch_size = 2
learning_rate = 3e-4
epochs = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
labels = pd.Series(df.labels.values).to_dict()
num_classes = 3
print(f'Tokenizer: {tokenizer}, Batch size:{batch_size}, Learning rate:{learning_rate}, Epochs:{epochs}')
print('Device: ', device)
print('Number of possible classes: ', num_classes)
# Model Architecture
class TransformerModel(nn.Module):
def __init__(self, num_classes, dropout=0.5):
super(TransformerModel, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768, num_classes)
self.relu = nn.ReLU()
def forward(self, input_id, mask):
_, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output)
final_layer = self.relu(linear_output)
return final_layer
# Prepare Data Function
def prepare_data(data, labels):
texts = tokenizer(data, padding='max_length', max_length=512, truncation=True, return_tensors="pt")
input_ids = texts['input_ids']
attention_mask = texts['attention_mask']
train_dataset = TensorDataset(input_ids, attention_mask, torch.LongTensor(labels))
dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
return dataloader
#Run Training Function
def run_training(train_dataloader, val_dataloader, epochs=epochs, lr=learning_rate):
def train(dataloader):
model.train()
total_acc, total_count = 0, 0
log_interval = 128
start_time = time.time()
for idx, (input_id, mask, label) in enumerate(train_dataloader):
# print(idx)
mask = mask.to(device)
input_id = input_id.to(device)
label = label.type(torch.LongTensor).to(device)
output = model(input_id, mask)
optimizer.zero_grad()
loss = criterion(output, label)
loss.backward()
# torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
optimizer.step()
total_acc += (output.argmax(1) == label).sum().item()
total_count += label.size(0)
if idx % log_interval == 0 and idx > 0:
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches '
'| accuracy {:8.3f}'.format(epoch, idx, len(dataloader),
total_acc / total_count))
total_acc, total_count = 0, 0
start_time = time.time()
def evaluate(dataloader):
model.eval()
total_acc, total_count = 0, 0
with torch.no_grad():
for idx, (input_id, mask, label) in enumerate(dataloader):
mask = mask.to(device)
input_id = input_id.to(device)
label = label.to(device)
output = model(input_id, mask)
total_acc += (output.argmax(1) == label).sum().item()
total_count += label.size(0)
return total_acc / total_count
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
device = 'cuda'
model.to(device)
total_accu = None
for epoch in range(1, epochs + 1):
epoch_start_time = time.time()
train(train_dataloader)
accu_val = evaluate(val_dataloader)
if total_accu is not None and total_accu > accu_val:
scheduler.step()
else:
total_accu = accu_val
print('-' * 59)
print('| end of epoch {:3d} | time: {:5.2f}s | '
'valid accuracy {:8.3f} '.format(epoch,
time.time() - epoch_start_time,
accu_val))
print('-' * 59)
# Data Split And Preparation
X_train, X_test, y_train, y_test = train_test_split(df.text.values.tolist(), df.labels.values.tolist(), test_size=0.2, random_state=2)
train_dataloader = prepare_data(X_train, y_train)
val_dataloader = prepare_data(X_test, y_test)
# Run The Model
model = TransformerModel(num_classes)
run_training(train_dataloader, val_dataloader)
print('finished')
# Save And Load The Model (if needed)
PATH = ".../Torch_BERT_model"
torch.save(model, os.path.join(PATH, "Toy_Data_BERT.pth"))
model = torch.load(os.path.join(PATH, "Toy_Data_BERT.pth"))
print(model)
# Laplace
from laplace import Laplace
la = Laplace(model, 'classification', subset_of_weights='last_layer', hessian_structure='full')
la.fit(train_dataloader)
Error I get:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) ~\AppData\Local\Temp\ipykernel_7144\3779742208.py in <cell line:
2>()
1 la = Laplace(model, 'classification', subset_of_weights='last_layer', hessian_structure='full')
----> 2 la.fit(train_dataloader)
~\anaconda3\lib\site-packages\laplace\lllaplace.py in fit(self,
train_loader, override)
98
99 if self.model.last_layer is None:
--> 100 X, _ = next(iter(train_loader))
101 with torch.no_grad():
102 try:
ValueError: too many values to unpack (expected 2)
Useful link for Laplace implementation with examples:
https://aleximmer.github.io/Laplace/#full-example-optimization-of-the-marginal-likelihood-and-prediction
Code that might help in debugging:
for x in train_dataloader:
print("The length of batch is:", len(x))
print()
print("The batch looks like:", x)
print()
print("The length of the first element in the batch is:") #embedding
print(len(x[0]))
print("The length of the second element in the batch is:") #1 if place is filled with word, 0 if it's empty?
print(len(x[1]))
print("The length of the third element in the batch is:") #category
print(len(x[2]))
print()
print("The lengths of the first tensor and second tensor in the first element in the batch is:")
print(len(x[0][0]), len(x[0][1])) # = max_length (512)
print("The lengths of the first tensor and second tensor in the second element in the batch is:")
print(len(x[1][0]), len(x[1][1])) # = max_length (512)
print()
print()
The laplace library expects that the dataloader returns two parameters (X,y) and that the model requires exactly one argument to make its prediction (code). But your model forward pass requires two arguments, namely input_id and mask, and your dataloader returns three arguments input_id, mask, and labels.
There are several ways to work around this limitation (e.g. return a dict with input_ids and attention_mask). The way that requires the least understanding of the internals of the laplace library is to generate the attention mask at runtime in the forward pass (not great for the performance):
class TransformerModel(nn.Module):
def __init__(self, num_classes, pad_id, dropout=0.5):
super(TransformerModel, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768, num_classes)
self.relu = nn.ReLU()
self.pad_id = pad_id
def forward(self, input_id):
mask = (input_ids!=self.pad_id).type(input_ids.dtype)
_, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output)
final_layer = self.relu(linear_output)
return final_layer
model = TransformerModel(num_classes, tokenizer.pad_token_id)
I'm trying to use a U-net to apply image segmentation to the RUGD dataset. I get the following error, I'm stuck.
'lrs':lrs}
print('Total time: {:.2f}m' .format((time.time()-fit_time)/60))
return history
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-17-112aada9db24> in <module>
98
99 #iou
--> 100 val_iou.append(val_iou_score/len(val_loader))
101 train_iou.append(iou_score/len(train_loader))
102 train_acc.append(accuracy/len(train_loader))
NameError: name 'val_iou' is not defined
Installing segmentation-models for pytorch
!pip install -q segmentation-models-pytorch
!pip install -q torchsummary
Installing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
import torchvision
import torch.nn.functional as F
from torch.autograd import Variable
from PIL import Image
import cv2
import albumentations as A
import os
import time
from tqdm.notebook import tqdm
from torchsummary import summary
import segmentation_models_pytorch as smp
device = torch.device("cpu" if torch.cuda.is_available() else "cpu")
Name and direct paths
IMAGE_PATH = '/data/users/mypath/Rugd_dataset/images/train/'
MASK_PATH = '/data/users/mypath/Rugd_dataset/mask/train/'
Create dataframe
n_classes = 24
def create_df():
name = []
for dirname, _, filenames in os.walk(IMAGE_PATH):
for filename in filenames:
name.append(filename.split('.')[0])
return pd.DataFrame({'id': name}, index = np.arange(0, len(name)))
df = create_df()
print('Total Images: ',len(df))
Total Images: 7387
Split Test and Train
X_trainval, X_test = train_test_split(df['id'].values, test_size=0.1, random_state=19)
X_train, X_val = train_test_split(X_trainval, test_size=0.15, random_state=19)
print('Train Size : ', len(X_train))
print('Val Size : ', len(X_val))
print('Test Size : ', len(X_test))
# show original picture with overlay of mask
img = Image.open(IMAGE_PATH + df['id'][0] + '.png')
mask = Image.open(MASK_PATH + df['id'][0] + '.png')
print('Image size', np.asarray(img).shape)
print('Mask size', np.asarray(mask).shape)
plt.imshow(img)
plt.imshow(mask, alpha=0.6)
plt.title('Picture with Mask Applied')
plt.show()
Image size (550, 688, 3)
Mask size (550, 688, 3)
image rendered
Define U-net
class RugdDataset(Dataset):
def __init__(self, img_path, mask_path, X, mean, std, transform=None, patch=False):
self.img_path = img_path
self.mask_path = mask_path
self.X = X
self.transform = transform
self.patches = patch
self.mean = mean
self.std = std
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
img = cv2.imread(self.img_path + self.X[idx] + '.png')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
mask = cv2.imread(self.mask_path + self.X[idx] + '.png', cv2.IMREAD_GRAYSCALE)
if self.transform is not None:
aug = self.transform(image=img, mask=mask)
img = Image.fromarray(aug['image'])
mask = aug['mask']
if self.transform is None:
img = Image.fromarray(img)
t = T.Compose([T.ToTensor(), T.Normalize(self.mean, self.std)])
img = t(img)
mask = torch.from_numpy(mask).long()
'''
if self.patches:
img, mask = self.tiles(img, mask)
return img, mask
'''
'''
def tiles(self, img, mask):
img_patches = img.unfold(1, 512, 512).unfold(2, 768, 768)
im_patches = img_patches.continuous().view(3, -1, 512, 768)
img_patches = img_patches.permute(1,0,2,3)
mask_patches = mask.unfold(0, 512, 512).unfold(1, 768, 768)
mask_patches = mask_patches.contiguous().view(-1, 512, 768)
return img_patches, mask_patches
'''
Apply augmentation and initiate dataloader
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
t_train = A.Compose([A.Resize(512, 512, interpolation=cv2.INTER_NEAREST), A.HorizontalFlip(), A.VerticalFlip(),
A.GridDistortion(p=0.2), A.RandomBrightnessContrast((0,0.5),(0,0.5)), A.GaussNoise()])
t_val = A.Compose([A.Resize(512, 512, interpolation=cv2.INTER_NEAREST), A.HorizontalFlip(), A.GridDistortion(p=0.2)])
# dataset
train_set = RugdDataset(IMAGE_PATH, MASK_PATH, X_train, mean, std, t_train, patch=False)
val_set = RugdDataset(IMAGE_PATH, MASK_PATH, X_val, mean, std, t_val, patch=False)
# dataloader
batch_size = 3
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True)
build the model
model = smp.Unet('mobilenet_v2', encoder_weights='imagenet', classes=24, activation=None,
encoder_depth=5, decoder_channels=[256, 128, 64, 32, 16])
render the model
model
Def pixel accuracy
def pixel_accuracy(output, mask):
with torch.no_grad():
output = torch.argmax(F.softmax(output, dim=1), dim=1)
correct = torch.eq(output, mask).int()
accuracy = float(correct.sum()) / float(correct.numel())
return accuracy
Define Miou
def mIoU(pred_mask, mask, smooth=1e-10, n_classes=24):
with torch.no_grad():
pred_mask = F.softmax(pred_mask, dim=1)
pred_mask = torch.argmax(pred_mask, dim=1)
pred_mask = pred_mask.contiguous().view(-1)
mask = mask.contiguous().view(-1)
iou_per_class = []
for clas in range(0, n_classes): #loop per pixel class
true_class = pred_mask == clas
true_label = mask == clas
if true_label.long().sum().item() == 0: # no exist label in this loop
iou_per_class.append(np.nan)
else:
intersect = torch.logical_and(true_class, true_label).sum().float().item()
union = torch.logical_or(true_class, true_label).sum().float().item()
iou = (intersect + smooth) / (union + smooth)
iou_per_class.append(iou)
return np.nanmean(iou_per_class)
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def fit(epochs, model, train_loader, val_loader, criterion, optimizer, scheduler, patch=False):
torch.cuda.empty_cache()
train_losses = []
test_losses = []
val_iou = []; val_acc = []
train_iou = []; train_acc = []
lrs = []
min_loss = np.inf
decrease = 1 ; not_improve=0
model.to(device)
fit_time = time.time()
for e in range(epochs):
since = time.time()
running_loss = 0
iou_score = 0
accuracy = 0
# training loop
model.train()
'''
for i, data in enumerate(tqdm(train_loader)):
# training phase
image_tiles, mask_tiles = data
if patch:
bs, n_tiles, c, h, w = image_tiles.size()
image_tiles = image_tiles.view(-1,c, h, w)
mask_tiles = mask_tiles.view(-1, h, w)
'''
image = img.to(device); mask = mask.to(device);
# forward
output = model(image)
loss = criterion(output, mask)
# evaluation metrics
iou_score += mIoU(output, mask)
accuracy += pixel_accuracy(output, mask)
# backward
loss.backward()
optimizer.step() #update weight
optimizer.zero_grad() #reset gradient
# step the learining rate
lrs.append(get_lr(optimizer))
scheduler.step()
running_loss += loss.item()
else:
model.eval()
test_loss = 0
test_accuracy = 0
val_iou_score = 0
# validation loop
with torch.no_grad():
for i, data in enumerate(tqdm(val_loader)):
# reshape to 9 patches
image, mask = data
if patch:
bs, n, c, h, w = image.size()
image = image.view(-1, c, h, w)
mask = mask.view(-1, h, w)
image = img.to(device); mask = mask.to(device);
output = model(image)
# evaluation metrics
val_iou_score += mIoU(output, mask)
test_accuracy += pixel_accuracy(output, mask)
# loss
loss = criterion(output, mask)
test_loss += loss.item()
# calculate mean for each batch
train_losses.append(running_loss/len(train_loader))
test_losses.append(test_loss/len(val_loader))
if min_loss > (test_loss/len(val_loader)):
print('Loss Decreasing.. {:.3f} >> {:.3f} ' .format(min_loss, (test_loss/len(val_loader))))
min_loss = (test_loss/len(val_loader))
decrease += 1
if decrease % 5 == 0:
print('saving model...')
torch.save(model, 'Unet-mobilnet_v2_mIoU-{:.3f}.pt'.format(val_iou_score/len(val_loader)))
if (test_loss/len(val_loader)) > min_loss:
not_improve += 1
min_loss = (test_loss/len(val_loader))
print(f'Loss Not Decrease for {not_improve} time')
if not_improve == 7:
print('Loss not decrease for 7 times, Stop Training')
# break
#iou
val_iou.append(val_iou_score/len(val_loader))
train_iou.append(iou_score/len(train_loader))
train_acc.append(accuracy/len(train_loader))
val_acc.append(test_accuracy/len(val_loader))
print("Epoch:{}/{}".format(e+1, epochs),
"Train Loss: {:.3f}..".format(running_loss/len(train_loader)),
"Val Loss: {:.3f}..".format(test_loss/len(val_loader)),
"Train mIoU:{:.3f}..".format(iou_score/len(train_loader)),
"Val mIoU:{:.3f}..".format(val_iou_score/len(val_loader)),
"Val ACC:{:.3f}..".format(test_accuracy/len(val_loader)),
"Time: {:.2f}m".format((time.time()-since)/60))
history = {'train_loss' : train_losses, 'val_loss': test_losses,
'train_miou' : train_iou, 'val_miou':val_iou,
'train_acc' :train_acc, 'val_acc':val_acc,
'lrs':lrs}
print('Total time: {:.2f}m' .format((time.time()-fit_time)/60))
return history
This the point where the model fails
'lrs':lrs}
print('Total time: {:.2f}m' .format((time.time()-fit_time)/60))
return history
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-17-112aada9db24> in <module>
98
99 #iou
--> 100 val_iou.append(val_iou_score/len(val_loader))
101 train_iou.append(iou_score/len(train_loader))
102 train_acc.append(accuracy/len(train_loader))
NameError: name 'val_iou' is not defined
Remainder of the code
max_lr = 1e-3
epoch = 15
weight_decay = 1e-4
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=max_lr, weight_decay=weight_decay)
sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epoch, steps_per_epoch=len(train_loader))
history = fit(epoch, model, train_loader, val_loader, criterion, optimizer, sched)
I'm currently learning NNs and trying to classificate RVL-CDIP public dataset. I'm using InceptionResNetV2 adapted to 16 classes.
I have good results using Keras ImageDataGenerator class for building my train and validation sets ~80% (Image Training).
Now I'm trying to train my model on .npy files : I exported ~30k images to (299,299) np arrays and built a custom DataGenerator using Sequences.
The training results are similar to the first training but the validation results are ~1% (NPY Training).
It seems like i'm experiencing overfitting, although :
I've been using the same data for both Image training and .npy training.
I checked the image to .npy exportation, I did plot some arrays as images without problems
The main difference I spotted is the creation of training set and validation set. In the first training I use the flow_from_directory() method while in the second one I use a pandas.DataFrame containing path and class of each element in the dataset.
Below is my code for creation of the Generators :
def get_custom_data_generator(df_dataset, batch_size, df_train=pd.DataFrame(), df_validation=pd.DataFrame()):
global STEPS_PER_EPOCH_TRAIN
global STEPS_PER_EPOCH_VALIDATION
if len(df_dataset) > 0:
config.class_names = df_dataset['CLASS'].unique()
num_classes = len(config.class_names)
# Shuffling the dataset :
df_dataset = df_dataset.sample(frac=1)
split_row = int(df_dataset.shape[0] * config.dataset_creation.split.train)
df_splits = np.split(df_dataset, [split_row], axis=0)
df_train = df_splits[0]
df_validation = df_splits[1]
STEPS_PER_EPOCH_TRAIN = df_train.shape[0] // batch_size
STEPS_PER_EPOCH_VALIDATION = df_validation.shape[0] // batch_size
if len(df_train) > 0 and len(df_validation) > 0:
train_set_generator = DataGenerator(df_train,
x_col = ['PATH'],
y_col = ['CLASS'],
batch_size = batch_size,
num_classes = num_classes)
validation_set_generator = DataGenerator(df_validation,
x_col = ['PATH'],
y_col = ['CLASS'],
batch_size = batch_size,
num_classes = num_classes)
return train_set_generator, validation_set_generator
return None, None
And here is my DataGenerator class:
class DataGenerator(tf.keras.utils.Sequence):
def __init__(self, df, x_col, y_col=None, batch_size=32, num_classes=None, shuffle=True, width=299, height = 299):
self.batch_size = batch_size
self.df = df
self.indices = self.df.index.tolist()
self.num_classes = num_classes
self.shuffle = shuffle
self.x_col = x_col
self.y_col = y_col
self.width = width
self.height = height
self.dim = (width,height)
self.n_channels = 1
self.map_classes = map_classes(df['CLASS'].unique())
self.on_epoch_end()
def __len__(self):
return len(self.indices) // (self.batch_size)
def __getitem__(self, index):
index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
batch = [self.indices[k] for k in index]
X, y = self.__data_generation_npy(batch)
return X, y
def on_epoch_end(self):
self.index = np.arange(len(self.indices))
if self.shuffle == True:
np.random.shuffle(self.index)
def __data_generation_npy(self, batch):
'''
Create a batch by using the CurrentNPY directory instead of the Current directory
'''
npydataset_path = "D:/dataset/RVL/NPY"
df = self.df
X = np.empty((self.batch_size, *self.dim, 3))
y = np.empty(self.batch_size, dtype=int)
for i, id in enumerate(batch):
doc_row = df.loc[id]
path = str(doc_row['PATH'])
path = os.path.join(npydataset_path, path)
typologie = str(doc_row['CLASS'])
path += ".npy"
with open(path, 'rb') as f:
npyfile = np.load(path, allow_pickle=True)
try:
npyfile = np.expand_dims(npyfile, axis=3)
X[i,]=npyfile[0]
except ValueError:
print('Value Error : npyfile=', npyfile.shape)
y[i] = self.map_classes[typologie]
X = np.array(X)
obj_to_file(X)
Y = np.array(y)
Y = tf.keras.utils.to_categorical(Y, num_classes = self.num_classes)
return X, Y
I will be thankful if someone could help me figure out what I did wring with my DataGenerator, or what could I test in order to spot the problem.
I'm new to Neural Networks and I'm trying to train a CNN model on a custom dataset (cats and dogs images in a single directory). So I guess I do the very usual stuff here which is in the most tutorials, but just in case I will give here my full code.
First I generate .csv file to be processed:
import os
import torch
device = ("cuda" if torch.cuda.is_available() else "cpu")
train_df = pd.DataFrame(columns=["img_name","label"])
train_df["img_name"] = os.listdir("train/")
for idx, i in enumerate(os.listdir("train/")):
if "cat" in i:
train_df["label"][idx] = 0
if "dog" in i:
train_df["label"][idx] = 1
train_df.to_csv (r'train_csv.csv', index = False, header=True)
Then I prepare the dataset:
from torch.utils.data import Dataset
import pandas as pd
import os
from PIL import Image
import torch
class CatsAndDogsDataset(Dataset):
def __init__(self, root_dir, annotation_file, transform=None):
self.root_dir = root_dir
self.annotations = pd.read_csv(annotation_file)
self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self, index):
img_id = self.annotations.iloc[index, 0]
img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")
y_label = torch.tensor(float(self.annotations.iloc[index, 1]))
if self.transform is not None:
img = self.transform(img)
return (img, y_label)
This is my model:
import torch.nn as nn
import torchvision.models as models
class CNN(nn.Module):
def __init__(self, train_CNN=False, num_classes=1):
super(CNN, self).__init__()
self.train_CNN = train_CNN
self.inception = models.inception_v3(pretrained=True, aux_logits=False)
self.inception.fc = nn.Linear(self.inception.fc.in_features, num_classes)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.5)
self.sigmoid = nn.Sigmoid()
def forward(self, images):
features = self.inception(images)
return self.sigmoid(self.dropout(self.relu(features))).squeeze(1)
This is my hyper-params, transformations and dataloaders:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
num_epochs = 10
learning_rate = 0.00001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 0
transform = transforms.Compose(
[
transforms.Resize((356, 356)),
transforms.RandomCrop((299, 299)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]
)
dataset = CatsAndDogsDataset("train","train_csv.csv",transform=transform)
print(len(dataset))
train_set, validation_set = torch.utils.data.random_split(dataset,[162,40])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers,pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers, pin_memory=pin_memory)
model = CNN().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for name, param in model.inception.named_parameters():
if "fc.weight" in name or "fc.bias" in name:
param.requires_grad = True
else:
param.requires_grad = train_CNN
and accuracy check:
def check_accuracy(loader, model):
if loader == train_loader:
print("Checking accuracy on training data")
else:
print("Checking accuracy on validation data")
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
)
model.train()
return f"{float(num_correct)/float(num_samples)*100:.2f}"
And this is my training function:
from tqdm import tqdm
def train():
model.train()
for epoch in range(num_epochs):
loop = tqdm(train_loader, total = len(train_loader), leave = True)
if epoch % 2 == 0:
loop.set_postfix(val_acc = check_accuracy(validation_loader, model))
for imgs, labels in loop:
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loop.set_description(f"Epoch [{epoch}/{num_epochs}]")
loop.set_postfix(loss = loss.item())
if __name__ == "__main__":
train()
0%| | 0/6 [00:00<?, ?it/s]Checking accuracy on validation data
0%| | 0/6 [01:13<?, ?it/s, val_acc=60.00]Got 24 / 40 with accuracy 60.00
Epoch [0/10]: 100%|██████████| 6/6 [06:02<00:00, 60.39s/it, loss=0.693]
Epoch [1/10]: 100%|██████████| 6/6 [04:49<00:00, 48.23s/it, loss=0.693]
...
Epoch [8/10]: 100%|██████████| 6/6 [06:07<00:00, 61.29s/it, loss=0.693]
Epoch [9/10]: 100%|██████████| 6/6 [04:55<00:00, 49.19s/it, loss=0.781]
The model gets trained fine but when I try to use it for prediction I get different results each time I run this last piece in my Jupyter Notebooks:
model.eval()
img = Image.open('train/cat.22.png').convert("RGB")
img_t = transform(img)
batch_t = torch.unsqueeze(img_t, 0)
out = model(batch_t)
print(out)
tensor([0.5276], grad_fn=)
tensor([0.5000], grad_fn=)
tensor([0.5064], grad_fn=)
etc. Each time different result for the same image. Is this normal? Why this is happening?
I don't see you loading your trained model. This means every time you initialize the CNN module, the inception.fc layer will get initialized with random weights, this is most probably the reason why you are getting different results on each inference.
Edit: You have a random transform in your transformation pipeline, namely RandomCrop.
According to this answer on the use of model.eval(), I believe you might want to ensure that you have the lower half of the code cell wrapped in a with torch.no_grad(): context. I think it may still be learning/updating parameters unless inside that context.
I have trained my CNN model and stored it in directory named model which contains files as shown below
\model
|--- checkpoint
|--- model.data-00000-of-00001
|--- model.index
|--- model.meta
I want to restore the model and calculate the test accuracy for that I am using the following code
import tensorflow as tf
import numpy as np
import cv2
import os
import glob
images = []
labels = []
img_names = []
cls = []
test_path = 'data\\cifar-10\\test'
image_size = 32
num_channels = 3
# Prepare input data
with open('data\\cifar-10\\wnids.txt') as f:
classes = f.readlines()
classes = [x.strip() for x in classes]
num_classes = len(classes)
for fields in classes:
index = classes.index(fields)
print('Read {} files (Index: {})'.format(fields, index))
path = os.path.join(test_path, fields, '*g')
files = glob.glob(path)
for fl in files:
image = cv2.imread(fl)
image = cv2.resize(image, (image_size, image_size),0,0, cv2.INTER_LINEAR)
image = image.astype(np.float32)
image = np.multiply(image, 1.0 / 255.0)
images.append(image)
label = np.zeros(len(classes))
label[index] = 1.0
labels.append(label)
flbase = os.path.basename(fl)
img_names.append(flbase)
cls.append(fields)
images = np.array(images)
labels = np.array(labels)
img_names = np.array(img_names)
cls = np.array(cls)
session = tf.Session()
tf_saver = tf.train.import_meta_graph('model\\model.meta')
tf_saver.restore(session, tf.train.latest_checkpoint('model'))
x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, num_channels], name='x')
y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true')
y_true_cls = tf.argmax(y_true, axis=1)
y_pred = tf.nn.softmax(layer_fc2, name='y_pred')
y_pred_cls = tf.argmax(y_pred, axis=1)
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
feed_dict_test = {x: images, y_true: labels}
test_acc = session.run(accuracy, feed_dict=feed_dict_test)
msg = "Test Accuracy: {1:>6.1%}"
print(msg.format(test_acc))
On running the above code I'm getting the error
NameError: name 'layer_fc2' is not defined
How can I properly restore the model and calculate the test accuracy?
layer_fc2 is a python variable defined in your training script (where you define the graph) and it's not present here. What you need to do is to find this layer. Unfortunately, you didn't name it in train time. Change your create_fc_layer code to
def create_fc_layer(input, num_inputs, num_outputs, name, use_relu=True):
weights = create_weights(shape=[num_inputs, num_outputs])
biases = create_biases(num_outputs)
layer = tf.matmul(input, weights) + biases
if use_relu:
layer = tf.nn.relu(layer)
return tf.identity(layer, name=name) # return a named layer
...
layer_fc2 = create_fc_layer(input=layer_fc1, num_inputs=fc_layer_size, num_outputs=num_classes, name='layer_fc2', use_relu=False)
After this in your new script:
layer_fc2 = session.graph.get_operation_by_name('layer_fc2')
By the way, you also don't need to redefine y_pred, y_pred_cls, etc. Give them names and simply get it from the restored graph.