How to evaluate the model through Precision, recall and F1-measure - python

I am currently working on a CSRNet model, trained the model and evaluate it by MAE and MSE now I would like to evaluate the said model through precision, recall and f1-measure and its visualization. Can some one help me how to add the piece of code in the train.py and model.py in order to calculate the above mentioned statistical parameters.
"Train.py"
import sys
import os
import warnings
from model import CSRNet
from utils import save_checkpoint
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.autograd import Variable
from torchvision import datasets, transforms
import numpy as np
import argparse
import json
import cv2
import dataset
import time
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
Train_loss_list = []
#Val_MAE_list = []
k=0
parser = argparse.ArgumentParser(description='PyTorch CSRNet')
parser.add_argument('train_json', metavar='TRAIN',
help='path to train json')
parser.add_argument('test_json', metavar='TEST',
help='path to test json')
parser.add_argument('--pre', '-p', metavar='PRETRAINED', default=None,type=str,
help='path to the pretrained model')
parser.add_argument('gpu',metavar='GPU', type=str,
help='GPU id to use.')
parser.add_argument('task',metavar='TASK', type=str,
help='task id to use.')
def main():
global args,best_prec1
best_prec1 = 1e6
args = parser.parse_args()
args.original_lr = 1e-7
args.lr = 1e-7
args.batch_size = 1
args.momentum = 0.95
args.decay = 5*1e-4
args.start_epoch = 0
args.epochs = 400
args.steps = [-1,1,100,150]
args.scales = [1,1,1,1]
args.workers = 4
args.seed = time.time()
args.print_freq = 30
with open(args.train_json, 'r') as outfile:
train_list = json.load(outfile)
with open(args.test_json, 'r') as outfile:
val_list = json.load(outfile)
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
torch.cuda.manual_seed(args.seed)
model = CSRNet()
model = model.cuda()
criterion = nn.L1Loss(size_average=False).cuda()
optimizer = torch.optim.SGD(model.parameters(), args.lr,
momentum=args.momentum,
weight_decay=args.decay)
if args.pre:
if os.path.isfile(args.pre):
print("=> loading checkpoint '{}'".format(args.pre))
checkpoint = torch.load(args.pre)
args.start_epoch = checkpoint['epoch']
best_prec1 = checkpoint['best_prec1']
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
print("=> loaded checkpoint '{}' (epoch {})"
.format(args.pre, checkpoint['epoch']))
else:
print("=> no checkpoint found at '{}'".format(args.pre))
for epoch in range(args.start_epoch, args.epochs):
adjust_learning_rate(optimizer, epoch)
train(train_list, model, criterion, optimizer, epoch)
prec1 = validate(val_list, model, criterion)
is_best = prec1 < best_prec1
best_prec1 = min(prec1, best_prec1)
print(' * best MAE {mae:.3f} '
.format(mae=best_prec1))
save_checkpoint({
'epoch': epoch + 1,
'arch': args.pre,
'state_dict': model.state_dict(),
'best_prec1': best_prec1,
'optimizer' : optimizer.state_dict(),
}, is_best,args.task)
x1 = range(0, args.epochs)
x2 = range(0, args.epochs)
y1 = Train_loss_list
#y2 = Val_MAE_list
plt.subplots_adjust(left=None, bottom=None, right=None, top=None,
wspace=None, hspace=0.8)
plt.subplot(2, 1, 1)
plt.plot(x1, y1, label="Train Loss")
plt.title('Train Loss vs. Epochs')
plt.xlabel('Epochs')
plt.ylabel('Losses')
#plt.subplot(2, 1, 2)
#plt.plot(x1, y2, label="Val MSE")
#plt.title('MSE vs. epoches')
#plt.xlabel('epochs')
#plt.ylabel('MSE')
plt.savefig("C:/Users/Gigabyte pc/Desktop/COUNTING/CSRNet-pytorch-master/PartA2_train_curve.jpg")
def train(train_list, model, criterion, optimizer, epoch):
losses = AverageMeter()
batch_time = AverageMeter()
data_time = AverageMeter()
train_loader = torch.utils.data.DataLoader(
dataset.listDataset(train_list,
shuffle=True,
transform=transforms.Compose([
transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]),
train=True,
seen=model.seen,
batch_size=args.batch_size,
num_workers=args.workers),
batch_size=args.batch_size)
print('epoch %d, processed %d samples, lr %.10f' % (epoch, epoch * len(train_loader.dataset), args.lr))
model.train()
end = time.time()
for i,(img, target)in enumerate(train_loader):
data_time.update(time.time() - end)
img = img.cuda()
img = Variable(img)
output = model(img)
target = target.type(torch.FloatTensor).unsqueeze(0).cuda()
target = Variable(target)
loss = criterion(output, target)
losses.update(loss.item(), img.size(0))
optimizer.zero_grad()
loss.backward()
optimizer.step()
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
print('Epoch: [{0}][{1}/{2}]\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
.format(
epoch, i, len(train_loader), batch_time=batch_time,
data_time=data_time, loss=losses))
Train_loss_list.append(losses.avg)
def validate(val_list, model, criterion):
print ('begin test')
test_loader = torch.utils.data.DataLoader(
dataset.listDataset(val_list,
shuffle=False,
transform=transforms.Compose([
transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]), train=False),
batch_size=args.batch_size)
model.eval()
mae = 0
for i,(img, target) in enumerate(test_loader):
img = img.cuda()
img = Variable(img)
output = model(img)
mae += abs(output.data.sum()-target.sum().type(torch.FloatTensor).cuda())
mae = mae/len(test_loader)
print(' * MAE {mae:.3f} '
.format(mae=mae))
#Val_MAE_list.append(mae)
return mae
def adjust_learning_rate(optimizer, epoch):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
args.lr = args.original_lr
for i in range(len(args.steps)):
scale = args.scales[i] if i < len(args.scales) else 1
if epoch >= args.steps[i]:
args.lr = args.lr * scale
if epoch == args.steps[i]:
break
else:
break
for param_group in optimizer.param_groups:
param_group['lr'] = args.lr
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
if __name__ == '__main__':
main()
"Model.py"
import torch.nn as nn
import torch
from torchvision import models
from utils import save_net,load_net
class CSRNet(nn.Module):
def __init__(self, load_weights=False):
super(CSRNet, self).__init__()
self.seen = 0
self.frontend_feat = [64,64,'M',128,128,'M',256,256,256,'M',512,512,512]
self.backend_feat = [512, 512, 512,256,128,64]
self.frontend = make_layers(self.frontend_feat)
self.backend = make_layers(self.backend_feat,in_channels = 512,dilation = True)
self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
if not load_weights:
mod = models.vgg16(pretrained = True)
self._initialize_weights()
for i in range(len(self.frontend.state_dict().items())):
list(self.frontend.state_dict().items())[i][1].data[:] = list(mod.state_dict().items())[i][1].data[:]
def forward(self,x):
x = self.frontend(x)
x = self.backend(x)
x = self.output_layer(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, std=0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def make_layers(cfg, in_channels = 3,batch_norm=False,dilation = False):
if dilation:
d_rate = 2
else:
d_rate = 1
layers = []
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate,dilation = d_rate)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
return nn.Sequential(*layers)
"val.py"
# To test single_image.py
import h5py
import scipy.io as io
import PIL.Image as Image
import numpy as np
import os
import glob
from matplotlib import pyplot as plt
from scipy.ndimage.filters import gaussian_filter
import scipy
import json
import torchvision.transforms.functional as F
from matplotlib import cm as CM
from image import *
from model import CSRNet
import torch
from matplotlib import cm as c
from torchvision import datasets, transforms
transform=transforms.Compose([
transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
model = CSRNet()
#defining the model
model = model.cuda()
#loading the trained weights
checkpoint = torch.load('0PartB_BEST.pth.tar')
model.load_state_dict(checkpoint['state_dict'])
img = transform(Image.open('Shanghai/part_B_final/test_data/images/IMG_30.jpg').convert('RGB')).cuda()
output = model(img.unsqueeze(0))
print("Predicted Count : ", int(output.detach().cpu().sum().numpy()))
temp = np.asarray(output.detach().cpu().reshape(output.detach().cpu().shape[2],output.detach().cpu().shape[3]))
plt.imshow(temp, cmap = c.jet)
plt.show()
temp = h5py.File('Shanghai/part_B_final/test_data/ground_truth/IMG_30.h5', 'r')
temp_1 = np.asarray(temp['density'])
plt.imshow(temp_1,cmap = c.jet)
print("Original Count : ",int(np.sum(temp_1)) + 1)
plt.show()
print("Original Image")
plt.imshow(plt.imread('Shanghai/part_B_final/test_data/images/IMG_30.jpg'))
plt.show()

Related

Using a target size (torch.Size([400])) that is different to the input size (torch.Size([400, 1]))

I'm currently switching from tensorflow to pytorch and facing the warning UserWarning: Using a target size (torch.Size([400])) that is different to the input size (torch.Size([400, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size
I came across that unsqueeze(1) on my target could help to resolve my problem, however, I do so obtain problems in regard of the multitarget which results from the shape my loss function (crossentropy) expects.
Here is a minimal example to my code:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
X1 = torch.randn(400, 1, 9999)
X2 = torch.randn((400,1, 9999))
aux1 = torch.randn(400,1)
aux2 = torch.randn(400,1)
aux3 = torch.randn(400,1)
y1 = torch.rand(400,)
y2 = torch.rand(400,)
y3 = torch.rand(400,)
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
# In[18]:
class MultiTaskDataset:
def __init__(self,
amplitude,
phase,
weight,
temperature,
humidity,
shelf_life_clf,
shelf_life_pred,
thickness_pred
):
self.amplitude = amplitude
self.phase = phase
self.weight = weight
self.temperature = temperature
self.humidity = humidity
self.shelf_life_clf = shelf_life_clf
self.shelf_life_pred = shelf_life_pred
self.thickness_pred = thickness_pred
def __len__(self):
return self.amplitude.shape[0]
def __getitem__(self, idx):
#inputs
amplitude = self.amplitude[idx]
phase = self.phase[idx]
weight = self.weight[idx]
temperature = self.temperature[idx]
humidity = self.humidity[idx]
#outputs
shelf_life_clf = self.shelf_life_clf[idx]
shelf_life_reg = self.shelf_life_pred[idx]
thickness_pred = self.thickness_pred[idx]
return ([torch.tensor(amplitude, dtype=torch.float32),
torch.tensor(phase, dtype=torch.float32),
torch.tensor(weight, dtype=torch.float32),
torch.tensor(temperature, dtype=torch.float32),
torch.tensor(humidity, dtype=torch.float32)],
[torch.tensor(shelf_life_clf, dtype=torch.long),
torch.tensor(shelf_life_reg, dtype=torch.float32),
torch.tensor(thickness_pred, dtype=torch.float32)])
# In[19]:
# train loader
dataset = MultiTaskDataset(X1, X2, aux1, aux2, aux3,
y1,y2,y3)
train_loader = DataLoader(dataset, batch_size=512, shuffle=True, num_workers=0)
# test loader
# In[20]:
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.features_amp = nn.Sequential(
nn.LazyConv1d(1, 3, 1),
)
self.features_phase = nn.Sequential(
nn.LazyConv1d(1, 3, 1),
)
self.backbone1 = nn.Sequential(
nn.LazyConv1d(64,3,1),
nn.LazyConv1d(64,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.backbone2 = nn.Sequential(
nn.Conv1d(64, 32,3,1),
nn.Conv1d(32, 32,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.backbone3 = nn.Sequential(
nn.Conv1d(32, 16,3,1),
nn.Conv1d(16, 16,3,1),
nn.AvgPool1d(3),
nn.Dropout(0.25),
)
self.classifier = nn.LazyLinear(2)
self.shelf_life_reg = nn.LazyLinear(1)
self.thickness_reg = nn.LazyLinear(1)
def forward(self, x1, x2, aux1, aux2, aux3):
x1 = self.features_amp(x1)
x2 = self.features_phase(x2)
x1 = x1.view(x1.size(0),-1)
x2 = x2.view(x2.size(0),-1)
x = torch.cat((x1, x2), dim=-1)
print(x.size())
x = x.unsqueeze(1)
print(x.size())
x = self.backbone1(x)
print(x.size())
x = torch.flatten(x, start_dim=1, end_dim=-1)
x = torch.cat([x, aux1, aux2, aux3], dim=-1)
shelf_life_clf = self.classifier(x)
shelf_life_reg = self.shelf_life_reg(x)
thickness_reg = self.thickness_reg(x)
return (shelf_life_clf,
shelf_life_reg,
thickness_reg)
model = MyModel()
optimizer = optim.Adam(model.parameters(), lr=0.003)
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
criterion3 = nn.MSELoss()
# In[21]:
def train(epoch):
model.train()
#exp_lr_scheduler.step()
arr_loss = []
#first_batch = next(iter(train_loader))
for batch_idx, (data, target) in enumerate(train_loader):
#amp, phase = data
clf, reg1, reg2 = target
#print(amp.shape, phase.shape)
#print(target[2].shape)
if torch.cuda.is_available():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data = [data[i].cuda() for i in range(len(data))]
target = [target[i].cuda() for i in range(len(target))]
model.to(device)
optimizer.zero_grad()
output1, output2, output3 = model(*data)
#losses
loss = criterion1(output1, target[0].long())
loss1 = criterion2(output2, target[1].float())
loss2 = criterion3(output3, target[2].float())
loss = loss + loss1 + loss2
#metrices
loss.backward()
optimizer.step()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, (batch_idx + 1) * len(data), len(train_loader.dataset),
100. * (batch_idx + 1) / len(train_loader), loss.data))
arr_loss.append(loss.data)
return arr_loss
def averaged_accuracy(outputs, targets):
assert len(outputs) != len(targets), "number of outputs should equal the number of targets"
accuracy = []
for i in range(len(outputs)):
_, predicted = torch.max(output1.data, 1)
total += target[0].size(0)
correct += (predicted == target[0]).sum()
acc = correct / total *100
accuracy.append(acc)
return torch.mean(accuracy)
# In[22]:
optimizer = optim.Adam(model.parameters(), lr=0.00003)
criterion1 = nn.CrossEntropyLoss()
criterion2 = nn.MSELoss()
criterion3 = nn.MSELoss()
n_epochs = 10
for epoch in range(n_epochs):
train(epoch)
Can anybody provide guidance to resolve this problem?

Implementing a U-net for image segmentation and have the following error: NameError: name 'val_iou' is not defined

I'm trying to use a U-net to apply image segmentation to the RUGD dataset. I get the following error, I'm stuck.
'lrs':lrs}
print('Total time: {:.2f}m' .format((time.time()-fit_time)/60))
return history
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-17-112aada9db24> in <module>
98
99 #iou
--> 100 val_iou.append(val_iou_score/len(val_loader))
101 train_iou.append(iou_score/len(train_loader))
102 train_acc.append(accuracy/len(train_loader))
NameError: name 'val_iou' is not defined
Installing segmentation-models for pytorch
!pip install -q segmentation-models-pytorch
!pip install -q torchsummary
Installing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
import torchvision
import torch.nn.functional as F
from torch.autograd import Variable
from PIL import Image
import cv2
import albumentations as A
import os
import time
from tqdm.notebook import tqdm
from torchsummary import summary
import segmentation_models_pytorch as smp
device = torch.device("cpu" if torch.cuda.is_available() else "cpu")
Name and direct paths
IMAGE_PATH = '/data/users/mypath/Rugd_dataset/images/train/'
MASK_PATH = '/data/users/mypath/Rugd_dataset/mask/train/'
Create dataframe
n_classes = 24
def create_df():
name = []
for dirname, _, filenames in os.walk(IMAGE_PATH):
for filename in filenames:
name.append(filename.split('.')[0])
return pd.DataFrame({'id': name}, index = np.arange(0, len(name)))
df = create_df()
print('Total Images: ',len(df))
Total Images: 7387
Split Test and Train
X_trainval, X_test = train_test_split(df['id'].values, test_size=0.1, random_state=19)
X_train, X_val = train_test_split(X_trainval, test_size=0.15, random_state=19)
print('Train Size : ', len(X_train))
print('Val Size : ', len(X_val))
print('Test Size : ', len(X_test))
# show original picture with overlay of mask
img = Image.open(IMAGE_PATH + df['id'][0] + '.png')
mask = Image.open(MASK_PATH + df['id'][0] + '.png')
print('Image size', np.asarray(img).shape)
print('Mask size', np.asarray(mask).shape)
plt.imshow(img)
plt.imshow(mask, alpha=0.6)
plt.title('Picture with Mask Applied')
plt.show()
Image size (550, 688, 3)
Mask size (550, 688, 3)
image rendered
Define U-net
class RugdDataset(Dataset):
def __init__(self, img_path, mask_path, X, mean, std, transform=None, patch=False):
self.img_path = img_path
self.mask_path = mask_path
self.X = X
self.transform = transform
self.patches = patch
self.mean = mean
self.std = std
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
img = cv2.imread(self.img_path + self.X[idx] + '.png')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
mask = cv2.imread(self.mask_path + self.X[idx] + '.png', cv2.IMREAD_GRAYSCALE)
if self.transform is not None:
aug = self.transform(image=img, mask=mask)
img = Image.fromarray(aug['image'])
mask = aug['mask']
if self.transform is None:
img = Image.fromarray(img)
t = T.Compose([T.ToTensor(), T.Normalize(self.mean, self.std)])
img = t(img)
mask = torch.from_numpy(mask).long()
'''
if self.patches:
img, mask = self.tiles(img, mask)
return img, mask
'''
'''
def tiles(self, img, mask):
img_patches = img.unfold(1, 512, 512).unfold(2, 768, 768)
im_patches = img_patches.continuous().view(3, -1, 512, 768)
img_patches = img_patches.permute(1,0,2,3)
mask_patches = mask.unfold(0, 512, 512).unfold(1, 768, 768)
mask_patches = mask_patches.contiguous().view(-1, 512, 768)
return img_patches, mask_patches
'''
Apply augmentation and initiate dataloader
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
t_train = A.Compose([A.Resize(512, 512, interpolation=cv2.INTER_NEAREST), A.HorizontalFlip(), A.VerticalFlip(),
A.GridDistortion(p=0.2), A.RandomBrightnessContrast((0,0.5),(0,0.5)), A.GaussNoise()])
t_val = A.Compose([A.Resize(512, 512, interpolation=cv2.INTER_NEAREST), A.HorizontalFlip(), A.GridDistortion(p=0.2)])
# dataset
train_set = RugdDataset(IMAGE_PATH, MASK_PATH, X_train, mean, std, t_train, patch=False)
val_set = RugdDataset(IMAGE_PATH, MASK_PATH, X_val, mean, std, t_val, patch=False)
# dataloader
batch_size = 3
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=True)
build the model
model = smp.Unet('mobilenet_v2', encoder_weights='imagenet', classes=24, activation=None,
encoder_depth=5, decoder_channels=[256, 128, 64, 32, 16])
render the model
model
Def pixel accuracy
def pixel_accuracy(output, mask):
with torch.no_grad():
output = torch.argmax(F.softmax(output, dim=1), dim=1)
correct = torch.eq(output, mask).int()
accuracy = float(correct.sum()) / float(correct.numel())
return accuracy
Define Miou
def mIoU(pred_mask, mask, smooth=1e-10, n_classes=24):
with torch.no_grad():
pred_mask = F.softmax(pred_mask, dim=1)
pred_mask = torch.argmax(pred_mask, dim=1)
pred_mask = pred_mask.contiguous().view(-1)
mask = mask.contiguous().view(-1)
iou_per_class = []
for clas in range(0, n_classes): #loop per pixel class
true_class = pred_mask == clas
true_label = mask == clas
if true_label.long().sum().item() == 0: # no exist label in this loop
iou_per_class.append(np.nan)
else:
intersect = torch.logical_and(true_class, true_label).sum().float().item()
union = torch.logical_or(true_class, true_label).sum().float().item()
iou = (intersect + smooth) / (union + smooth)
iou_per_class.append(iou)
return np.nanmean(iou_per_class)
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def fit(epochs, model, train_loader, val_loader, criterion, optimizer, scheduler, patch=False):
torch.cuda.empty_cache()
train_losses = []
test_losses = []
val_iou = []; val_acc = []
train_iou = []; train_acc = []
lrs = []
min_loss = np.inf
decrease = 1 ; not_improve=0
model.to(device)
fit_time = time.time()
for e in range(epochs):
since = time.time()
running_loss = 0
iou_score = 0
accuracy = 0
# training loop
model.train()
'''
for i, data in enumerate(tqdm(train_loader)):
# training phase
image_tiles, mask_tiles = data
if patch:
bs, n_tiles, c, h, w = image_tiles.size()
image_tiles = image_tiles.view(-1,c, h, w)
mask_tiles = mask_tiles.view(-1, h, w)
'''
image = img.to(device); mask = mask.to(device);
# forward
output = model(image)
loss = criterion(output, mask)
# evaluation metrics
iou_score += mIoU(output, mask)
accuracy += pixel_accuracy(output, mask)
# backward
loss.backward()
optimizer.step() #update weight
optimizer.zero_grad() #reset gradient
# step the learining rate
lrs.append(get_lr(optimizer))
scheduler.step()
running_loss += loss.item()
else:
model.eval()
test_loss = 0
test_accuracy = 0
val_iou_score = 0
# validation loop
with torch.no_grad():
for i, data in enumerate(tqdm(val_loader)):
# reshape to 9 patches
image, mask = data
if patch:
bs, n, c, h, w = image.size()
image = image.view(-1, c, h, w)
mask = mask.view(-1, h, w)
image = img.to(device); mask = mask.to(device);
output = model(image)
# evaluation metrics
val_iou_score += mIoU(output, mask)
test_accuracy += pixel_accuracy(output, mask)
# loss
loss = criterion(output, mask)
test_loss += loss.item()
# calculate mean for each batch
train_losses.append(running_loss/len(train_loader))
test_losses.append(test_loss/len(val_loader))
if min_loss > (test_loss/len(val_loader)):
print('Loss Decreasing.. {:.3f} >> {:.3f} ' .format(min_loss, (test_loss/len(val_loader))))
min_loss = (test_loss/len(val_loader))
decrease += 1
if decrease % 5 == 0:
print('saving model...')
torch.save(model, 'Unet-mobilnet_v2_mIoU-{:.3f}.pt'.format(val_iou_score/len(val_loader)))
if (test_loss/len(val_loader)) > min_loss:
not_improve += 1
min_loss = (test_loss/len(val_loader))
print(f'Loss Not Decrease for {not_improve} time')
if not_improve == 7:
print('Loss not decrease for 7 times, Stop Training')
# break
#iou
val_iou.append(val_iou_score/len(val_loader))
train_iou.append(iou_score/len(train_loader))
train_acc.append(accuracy/len(train_loader))
val_acc.append(test_accuracy/len(val_loader))
print("Epoch:{}/{}".format(e+1, epochs),
"Train Loss: {:.3f}..".format(running_loss/len(train_loader)),
"Val Loss: {:.3f}..".format(test_loss/len(val_loader)),
"Train mIoU:{:.3f}..".format(iou_score/len(train_loader)),
"Val mIoU:{:.3f}..".format(val_iou_score/len(val_loader)),
"Val ACC:{:.3f}..".format(test_accuracy/len(val_loader)),
"Time: {:.2f}m".format((time.time()-since)/60))
history = {'train_loss' : train_losses, 'val_loss': test_losses,
'train_miou' : train_iou, 'val_miou':val_iou,
'train_acc' :train_acc, 'val_acc':val_acc,
'lrs':lrs}
print('Total time: {:.2f}m' .format((time.time()-fit_time)/60))
return history
This the point where the model fails
'lrs':lrs}
print('Total time: {:.2f}m' .format((time.time()-fit_time)/60))
return history
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-17-112aada9db24> in <module>
98
99 #iou
--> 100 val_iou.append(val_iou_score/len(val_loader))
101 train_iou.append(iou_score/len(train_loader))
102 train_acc.append(accuracy/len(train_loader))
NameError: name 'val_iou' is not defined
Remainder of the code
max_lr = 1e-3
epoch = 15
weight_decay = 1e-4
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=max_lr, weight_decay=weight_decay)
sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epoch, steps_per_epoch=len(train_loader))
history = fit(epoch, model, train_loader, val_loader, criterion, optimizer, sched)

RuntimeError: Given input size: (512x1x3). Calculated output size: (512x0x1). Output size is too small

I am working on 3D Cardiac CT images (axial/sagittal/coronal). I am using the 2D pre-trained model vgg13. But I am facing the following issues can somebody guide me by viewing my error in code.
Following are the parameters
Classifier
(
(layer): Sequential(
(0): Linear(in_features=45, out_features=5, bias=True)
)
)
torch.Size([50, 1, 62, 62])
this is a vgg.py file consisting of axial/sagittal/coronal model code using pre-trained vgg13 model. The input features are 45 and the output features are 5 with the torch.size(1,62,62)
import torch
import torch.nn as nn
from torchvision import models
__all__ = ['vggNet']
class vggNet(nn.Module):
def __init__(self, is_emr=False, mode='sum'):
super().__init__()
self.is_emr = is_emr
self.mode = mode
in_dim = 45
self.axial_model = models.vgg13(pretrained=True)
out_channels = self.axial_model.features[0].out_channels
self.axial_model.features[0] = nn.Conv2d(1, out_channels, kernel_size=7, stride=1, padding=0, bias=False)
self.axial_model.features[3] = nn.MaxPool2d(1)
num_ftrs = self.axial_model.classifier[6].in_features
self.axial_model.classifier[6] = nn.Linear(num_ftrs, 15)
self.sa_co_model = models.vgg13(pretrained=True)
self.sa_co_model.features[0] = nn.Conv2d(1, out_channels, kernel_size=7, stride=1, padding=(3,0), bias=False)
self.sa_co_model.features[3] = nn.MaxPool2d(1)
self.sa_co_model.classifier[6] = nn.Linear(num_ftrs, 15)
if self.is_emr:
self.emr_model = EMRModel()
self.classifier = Classifier(in_dim)
print(self.classifier)
def forward(self, axial, sagittal, coronal, emr):
axial = axial[:,:,:-3,:-3]
sagittal = sagittal[:,:,:,:-3]
coronal = coronal[:,:,:,:-3]
print(axial.shape)
axial_feature = self.axial_model(axial)
sagittal_feature = self.sa_co_model(sagittal)
coronal_feature = self.sa_co_model(coronal)
out = torch.cat([axial_feature, sagittal_feature, coronal_feature], dim=1)
if self.is_emr:
emr_feature = self.emr_model(emr)
out = self.classifier(out)
out += emr_feature
return out
class EMRModel(nn.Module):
def __init__(self):
super().__init__()
self.layer = nn.Sequential(
nn.Linear(7, 256),
nn.BatchNorm1d(256),
nn.LeakyReLU(negative_slope=0.2),
nn.Dropout(p=0.2, inplace=True),
nn.Linear(256, 256),
nn.BatchNorm1d(256),
nn.LeakyReLU(negative_slope=0.2),
nn.Dropout(p=0.2, inplace=True),
nn.Linear(256, 5),
)
def forward(self, x):
return self.layer(x)
class Classifier(nn.Module):
def __init__(self, in_dim):
super().__init__()
self.layer = nn.Sequential(
nn.Linear(in_dim, 5)
)
def forward(self, x):
return self.layer(x)
class ConvBN(nn.Module):
def __init__(self, in_dim, out_dim, **kwargs):
super().__init__()
self.layer = nn.Sequential(
nn.Conv2d(in_dim, out_dim, bias=False, **kwargs),
nn.BatchNorm2d(out_dim),
nn.LeakyReLU(negative_slope=0.2))
def forward(self, x):
return self.layer(x)
if __name__ == "__main__":
images = torch.randn(2,1,65,65)
model = vgg()
out = model(images,images,images)
model = models.vgg13(pretrained=True)
for k, v in model.state_dict().items():
print(k)
this is ct-pretrained file that take heart_patches of size 512*512 and trained the model to calculate the confusion matrix and accuracy of the model using 5 targets
import os
import sys
import random
import time
import argparse
import numpy as np
from PIL import Image
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import torch
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import DataLoader
from torchvision import transforms
import models
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-m', type=str, default='vggNet')
parser.add_argument('-r', type=int, default=1)
parser.add_argument('-g', type=int, default=5)
parser.add_argument('-k', type=int, default=5)
parser.add_argument('-b', type=int, default=50)
parser.add_argument('-e', type=int, default=1000)
parser.add_argument('-l', type=float, default=0.0001)
parser.add_argument('-i', type=int, default=50)
return parser.parse_args()
def build_model():
if name == 'resnet':
return models.ResNet(is_emr=is_emr)
elif name == 'hardnet':
return models.HardNet(is_emr=is_emr)
elif name == 'densenet':
return models.DenseNet(is_emr=is_emr)
elif name == 'vggNet':
return models.vggNet(is_emr=is_emr)
def train(model, loader, optimizer):
model.train()
running_loss = 0
running_metric = 0
for batch_idx, (axial, sagittal, coronal, emr, target) in enumerate(loader):
axial, sagittal, coronal, emr, target = axial.cuda(), sagittal.cuda(), coronal.cuda(), emr.cuda(), target.cuda()
optimizer.zero_grad()
output = model(axial, sagittal, coronal, emr)
loss = loss_fn(output, target)
loss.backward()
optimizer.step()
pred = output.argmax(dim=1)
running_loss += loss.item()
running_metric += metric_fn(pred, target)
running_loss /= (batch_idx+1)
running_metric /= (batch_idx+1)
return running_loss, running_metric
def validate(model, loader):
model.eval()
running_loss = 0
running_metric = 0
with torch.no_grad():
for batch_idx, (axial, sagittal, coronal, emr, target) in enumerate(loader):
axial, sagittal, coronal, emr, target = axial.cuda(), sagittal.cuda(), coronal.cuda(), emr.cuda(), target.cuda()
output = model(axial, sagittal, coronal, emr)
loss = loss_fn(output, target)
pred = output.argmax(dim=1)
running_loss += loss.item()
running_metric += metric_fn(pred, target)
running_loss /= (batch_idx+1)
running_metric /= (batch_idx+1)
return running_loss, running_metric
def test(model, loader):
model.eval()
outputs = []
targets = []
with torch.no_grad():
for batch_idx, (axial, sagittal, coronal, emr, target) in enumerate(loader):
axial, sagittal, coronal, emr = axial.cuda(), sagittal.cuda(), coronal.cuda(), emr.cuda()
output = model(axial, sagittal, coronal, emr)
pred = output.argmax(dim=1).cpu()
outputs = torch.cat([outputs, pred], dim=0) if batch_idx else pred
targets = torch.cat([targets, target], dim=0) if batch_idx else target
# Confusion matrix whose i-th row and j-th column entry indicates the number of samples with true label being i-th class and predicted label being j-th class.
cm = confusion_matrix(targets, outputs)
num_class = 5
sum_acc = 0
sum_f1 = 0
txt = ''
for i in range(num_class):
tp = 0
tn = 0
fp = 0
fn = 0
tp = cm[i,i]
fn = cm[i,:].sum() - tp
for j in range(num_class):
if i == j:
fp = cm[:,j].sum() - tp
else:
tn += cm[:,j].sum() - cm[i,j]
txt += f'{class_name[i]} tp:{tp}, tn:{tn}, fp:{fp}, fn:{fn}\n'
acc = (tp+tn)/(tp+tn+fp+fn)
f1 = 2*tp/(2*tp+fp+fn)
sum_acc += acc
sum_f1 += f1
with open(f'{out_path}/results.txt', 'a') as f:
f.write(f'{str(cm)}\n')
f.write(txt)
f.write(f'Accuracy: {sum_acc/num_class:.4f}\n')
f.write(f'F-1 score: {sum_f1/num_class:.4f}\n')
def loss_fn(output, target):
loss = F.cross_entropy(output, target)
return loss
def metric_fn(output, target):
num_data = output.size(0)
target = target.view_as(output)
correct = output.eq(target).sum().item()
return correct / num_data
args = parse_args()
name = args.m
gpu_num = str(args.g)
is_emr = bool(args.r)
batch_size = args.b
num_epochs = args.e
min_epoch = 1
interval = args.i
lr = args.l
k_fold = args.k
in_path = f'/data/knight/data/image/heart/patches'
out_path = f'/data/results/results_heart/{name}_9'
os.makedirs(out_path, exist_ok=True)
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = gpu_num
#SEED = 42
#os.environ["PYTHONHASHSEED"] = str(SEED)
#random.seed(SEED)
#np.random.seed(SEED)
#
#torch.manual_seed(SEED)
#torch.cuda.manual_seed(SEED)
#torch.backends.cudnn.deterministic = True
#torch.backends.cudnn.benchmark = False
class_name = ['0_BG', '1_LM', '2_CX', '3_LAD', '4_RCA']
train_data_selection = np.array([[2, 3, 5], [1, 3, 4], [2, 4, 5], [1, 2, 5], [1, 3, 4]])
val_data_selection = np.array([[4], [5], [1], [3], [2]])
test_data_selection = np.array([[1], [2], [3], [4], [5]])
dataset = np.array([f'{in_path}/fold_{i+1}' for i in range(k_fold)])
transform = transforms.Compose([
transforms.ToTensor()])
for k in range(k_fold):
train_data = models.CTDataset(dataset[train_data_selection[k]-1], transform=transform)
val_data = models.CTDataset(dataset[val_data_selection[k]-1], transform=transform)
test_data = models.CTDataset(dataset[test_data_selection[k]-1], transform=transform)
if is_emr:
mean_df, stddev_df = train_data.get_emr_mean_stddev()
train_data.emr_normalize(mean_df, stddev_df)
val_data.emr_normalize(mean_df, stddev_df)
test_data.emr_normalize(mean_df, stddev_df)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
model = build_model().cuda()
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
#scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
stop_epoch = 0
best_loss = sys.maxsize
for epoch in range(num_epochs):
start_time = time.time()
loss, metric = train(model, train_loader, optimizer)
val_loss, val_metric = validate(model, val_loader)
scheduler.step()
if (epoch+1) >= min_epoch and (val_loss - best_loss) < 0:
stop_epoch = epoch
best_loss = val_loss
torch.save(model.state_dict(), f'{out_path}/checkpoint_{k+1}.pth')
if (epoch+1) % 1 == 0:
print(f'Epoch: {epoch+1}/{num_epochs} - ', end='')
print(f'ACC: {val_metric:.4f} - ', end='')
print(f'Loss: {val_loss:.4f} - ', end='')
print(f'took {time.time() - start_time:.2f}s')
if stop_epoch + interval < epoch:
break
model = build_model().cuda()
model.load_state_dict(torch.load(f'{out_path}/checkpoint_{k+1}.pth'))
test(model, test_loader)

RuntimeError: The size of tensor a (64) must match the size of tensor b (128) at non-singleton dimension 3

I want to train the model. In the model vgg16 with first ten (10) layers has been used as a frontend. Now I want to change its network from 10 to 13 layers of vgg16. the following error has occurred.
Runtime Error: The size of tensor a (64) must match the size of tensor b (128) at non-singleton dimension 3
Here is the error stacktrace :
G:\Saadain\Anaconda\envs\CSRNet\lib\site-packages\torch\nn\modules\loss.py:528: UserWarning: Using a target size (torch.Size([1, 1, 85, 128])) that is different to the input size (torch.Size([1, 1, 42, 64])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
Traceback (most recent call last):
File "train.py", line 230, in <module>
main()
File "train.py", line 92, in main
train(train_list, model, criterion, optimizer, epoch)
File "train.py", line 145, in train
loss = criterion(output, target)
File "G:\Saadain\Anaconda\envs\CSRNet\lib\site-packages\torch\nn\modules\module.py", line 889, in _call_impl
result = self.forward(*input, **kwargs)
File "G:\Saadain\Anaconda\envs\CSRNet\lib\site-packages\torch\nn\modules\loss.py", line 528, in forward
return F.mse_loss(input, target, reduction=self.reduction)
File "G:\Saadain\Anaconda\envs\CSRNet\lib\site-packages\torch\nn\functional.py", line 2928, in mse_loss
expanded_input, expanded_target = torch.broadcast_tensors(input, target)
File "G:\Saadain\Anaconda\envs\CSRNet\lib\site-packages\torch\functional.py", line 74, in broadcast_tensors
return _VF.broadcast_tensors(tensors) # type: ignore
RuntimeError: The size of tensor a (64) must match the size of tensor b (128) at non-singleton dimension 3
The model defintion :
import torch.nn as nn
import torch
from torchvision import models
from utils import save_net,load_net
class CSRNet(nn.Module):
def __init__(self, load_weights=False):
super(CSRNet, self).__init__()
self.seen = 0
self.frontend_feat = [64,64,'M',128,128,'M',256,256,256,'M',512,512,512,'M',512,512,512,]
self.backend_feat = [512, 512, 512,256,128,64]
self.frontend = make_layers(self.frontend_feat)
self.backend = make_layers(self.backend_feat,in_channels = 512,dilation = True)
self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
if not load_weights:
mod = models.vgg16(pretrained = True)
self._initialize_weights()
for i in range(len(self.frontend.state_dict().items())):
list(self.frontend.state_dict().items())[i][1].data[:] = list(mod.state_dict().items())[i][1].data[:]
def forward(self,x):
x = self.frontend(x)
x = self.backend(x)
x = self.output_layer(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, std=0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
def make_layers(cfg, in_channels = 3,batch_norm=False,dilation = False):
if dilation:
d_rate = 2
else:
d_rate = 1
layers = []
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate,dilation = d_rate)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
return nn.Sequential(*layers)
And the train function :
import sys
import os
import warnings
from model import CSRNet
from utils import save_checkpoint
import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import datasets, transforms
import numpy as np
import argparse
import json
import cv2
import dataset
import time
parser = argparse.ArgumentParser(description='PyTorch CSRNet')
parser.add_argument('train_json', metavar='TRAIN',
help='path to train json')
parser.add_argument('test_json', metavar='TEST',
help='path to test json')
parser.add_argument('--pre', '-p', metavar='PRETRAINED', default=None,type=str,
help='path to the pretrained model')
parser.add_argument('gpu',metavar='GPU', type=str,
help='GPU id to use.')
parser.add_argument('task',metavar='TASK', type=str,
help='task id to use.')
def main():
global args,best_prec1
best_prec1 = 1e6
args = parser.parse_args()
args.original_lr = 1e-7
args.lr = 1e-7
args.batch_size = 1
args.momentum = 0.95
args.decay = 5*1e-4
args.start_epoch = 0
args.epochs = 400
args.steps = [-1,1,100,150]
args.scales = [1,1,1,1]
args.workers = 4
args.seed = time.time()
args.print_freq = 30
with open(args.train_json, 'r') as outfile:
train_list = json.load(outfile)
with open(args.test_json, 'r') as outfile:
val_list = json.load(outfile)
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
torch.cuda.manual_seed(args.seed)
model = CSRNet()
model = model.cuda()
criterion = nn.MSELoss(size_average=False).cuda()
optimizer = torch.optim.SGD(model.parameters(), args.lr,
momentum=args.momentum,
weight_decay=args.decay)
if args.pre:
if os.path.isfile(args.pre):
print("=> loading checkpoint '{}'".format(args.pre))
checkpoint = torch.load(args.pre)
args.start_epoch = checkpoint['epoch']
best_prec1 = checkpoint['best_prec1']
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
print("=> loaded checkpoint '{}' (epoch {})"
.format(args.pre, checkpoint['epoch']))
else:
print("=> no checkpoint found at '{}'".format(args.pre))
for epoch in range(args.start_epoch, args.epochs):
adjust_learning_rate(optimizer, epoch)
train(train_list, model, criterion, optimizer, epoch)
prec1 = validate(val_list, model, criterion)
is_best = prec1 < best_prec1
best_prec1 = min(prec1, best_prec1)
print(' * best MAE {mae:.3f} '
.format(mae=best_prec1))
save_checkpoint({
'epoch': epoch + 1,
'arch': args.pre,
'state_dict': model.state_dict(),
'best_prec1': best_prec1,
'optimizer' : optimizer.state_dict(),
}, is_best,args.task)
def train(train_list, model, criterion, optimizer, epoch):
losses = AverageMeter()
batch_time = AverageMeter()
data_time = AverageMeter()
train_loader = torch.utils.data.DataLoader(
dataset.listDataset(train_list,
shuffle=True,
transform=transforms.Compose([
transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]),
train=True,
seen=model.seen,
batch_size=args.batch_size,
num_workers=args.workers),
batch_size=args.batch_size)
print('epoch %d, processed %d samples, lr %.10f' % (epoch, epoch * len(train_loader.dataset), args.lr))
model.train()
end = time.time()
for i,(img, target)in enumerate(train_loader):
data_time.update(time.time() - end)
img = img.cuda()
img = Variable(img)
output = model(img)
target = target.type(torch.FloatTensor).unsqueeze(0).cuda()
target = Variable(target)
loss = criterion(output, target)
losses.update(loss.item(), img.size(0))
optimizer.zero_grad()
loss.backward()
optimizer.step()
batch_time.update(time.time() - end)
end = time.time()
if i % args.print_freq == 0:
print('Epoch: [{0}][{1}/{2}]\t'
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
.format(
epoch, i, len(train_loader), batch_time=batch_time,
data_time=data_time, loss=losses))
def validate(val_list, model, criterion):
print ('begin test')
test_loader = torch.utils.data.DataLoader(
dataset.listDataset(val_list,
shuffle=False,
transform=transforms.Compose([
transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
]), train=False),
batch_size=args.batch_size)
model.eval()
mae = 0
for i,(img, target) in enumerate(test_loader):
img = img.cuda()
img = Variable(img)
output = model(img)
mae += abs(output.data.sum()-target.sum().type(torch.FloatTensor).cuda())
mae = mae/len(test_loader)
print(' * MAE {mae:.3f} '
.format(mae=mae))
return mae
def adjust_learning_rate(optimizer, epoch):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
args.lr = args.original_lr
for i in range(len(args.steps)):
scale = args.scales[i] if i < len(args.scales) else 1
if epoch >= args.steps[i]:
args.lr = args.lr * scale
if epoch == args.steps[i]:
break
else:
break
for param_group in optimizer.param_groups:
param_group['lr'] = args.lr
class AverageMeter(object):
"""Computes and stores the average and current value"""
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
if __name__ == '__main__':
main()

softmax_cross_entropy_with_logits nan

I have extracted CNN features from a pretrain vgg19 with size 4096. Then I am using a shallower architecture to train a classifier with softmax and center losses. Unfortunately, the softmax loss function returns nan. There is detailed discussion available here, however I am not able to resolve the problem with clip because labels and logits are in two different data format (int64, float32). Furthermore, I also changed the learning rate but still got the same error.
Can some please let me know, how to resolve this situation.
from __future__ import division
from __future__ import print_function
import csv
import numpy as np
import tensorflow as tf
from retrieval_model import setup_train_model
FLAGS = None
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def get_name(read_file):
feat_lst = []
identifier_lst = []
with open(read_file, 'r') as csvfile:
read_file = csv.reader(csvfile, delimiter=',')
for row in read_file:
feat = row[:-1]
s_feat = [float(i) for i in feat]
identifier = row[-1]
feat_lst.append(s_feat)
identifier_lst.append(identifier)
return feat_lst, identifier_lst
def get_batch(batch_index, batch_size, labels, f_lst):
start_ind = batch_index * batch_size
end_ind = start_ind + batch_size
return f_lst[start_ind:end_ind], labels[start_ind:end_ind]
def creat_dict(orig_labels):
dict = {}
count = 0
for x in orig_labels:
n_label = dict.get(x, None)
if n_label is None:
dict[x] = count
count += 1
return dict
def main(_):
save_dir = 'model/one-branch-ckpt'
train_file = 'gtrain.csv'
img_feat, img_labels = get_name(train_file)
map_dict = creat_dict(img_labels)
img_labels = [map_dict.get(x) for x in img_labels]
im_feat_dim = 4096
batch_size = 50
max_num_epoch = 10
steps_per_epoch = len(img_feat) // batch_size
num_steps = steps_per_epoch * max_num_epoch
# Setup placeholders for input variables.
im_feat_plh = tf.placeholder(tf.float32, shape=[batch_size, im_feat_dim])
label_plh = tf.placeholder(tf.int64, shape=(batch_size), name='labels')
train_phase_plh = tf.placeholder(tf.bool)
# Setup training operation.
t_l = setup_train_model(im_feat_plh, train_phase_plh, label_plh, classes)
# Setup optimizer.
global_step = tf.Variable(0, trainable=False)
init_learning_rate = 0.0001
learning_rate = tf.train.exponential_decay(init_learning_rate, global_step,
steps_per_epoch, 0.794, staircase=True)
optim = tf.train.AdamOptimizer(init_learning_rate)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_step = optim.minimize(t_l, global_step=global_step)
# Setup model saver.
saver = tf.train.Saver(save_relative_paths=True,max_to_keep=1)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(num_steps):
im_feats, labels = get_batch(
i % steps_per_epoch, batch_size, img_labels, img_feat)
feed_dict = {
im_feat_plh: im_feats,
label_plh: labels,
train_phase_plh: True,
}
[_, loss_val] = sess.run([train_step, t_l], feed_dict=feed_dict)
if i % 100 == 0:
print('Epoch: %d Step: %d Loss: %f' % (i // steps_per_epoch, i, loss_val))
if i % steps_per_epoch == 0 and i > 0:
print('Saving checkpoint at step %d' % i)
saver.save(sess, save_dir, global_step=global_step)
if __name__ == '__main__':
np.random.seed(0)
tf.set_random_seed(0)
tf.app.run(main=main)
**************************retrieval_model********************************
def setup_train_model(im_feats, train_phase, im_labels, nrof_classes):
alfa = 0.9
# nrof_classes = 28783
i_embed = embedding_model(im_feats, train_phase, im_labels)
c_l = embedding_loss(i_embed, im_labels, alfa, nrof_classes)
loss = softmax_loss(i_embed, im_labels)
total_loss = loss + c_l
return total_loss
def add_fc(inputs, outdim, train_phase, scope_in):
fc = fully_connected(inputs, outdim, activation_fn=None, scope=scope_in + '/fc')
fc_bnorm = tf.layers.batch_normalization(fc, momentum=0.1, epsilon=1e-5,
training=train_phase, name=scope_in + '/bnorm')
fc_relu = tf.nn.relu(fc_bnorm, name=scope_in + '/relu')
fc_out = tf.layers.dropout(fc_relu, seed=0, training=train_phase, name=scope_in + '/dropout')
return fc_out
def embedding_loss(features, label, alfa, nrof_classes):
nrof_features = features.get_shape()[1]
centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
initializer=tf.constant_initializer(0), trainable=False)
label = tf.reshape(label, [-1])
centers_batch = tf.gather(centers, label)
diff = (1 - alfa) * (centers_batch - features)
#centers = tf.scatter_sub(centers, label, diff)
center_loss = tf.reduce_mean(tf.square(features - centers_batch))
#softmax_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=features))
#total_loss = softmax_loss + center_loss
return center_loss
def embedding_model(im_feats, train_phase, im_labels,
fc_dim=2048, embed_dim=512):
# Image branch.
im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1')
im_fc2 = fully_connected(im_fc1, embed_dim, activation_fn=None,
scope='im_embed_2')
return tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10)
def softmax_loss(feat, im_labels):
label = tf.reshape(im_labels, [-1])
softmax = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=feat))
return softmax

Categories

Resources