Related
I get this error when I run this command: !python Models/KiU-Net/LiTS/train.py (training my neural network on Google Colab Pro in Python and Windows 10).
I tried to run this command: !python Models/KiU-Net/LiTS/train.py -Xcompiler -fno-gnu-unique, but I still got the same error.
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/visdom/__init__.py", line 692, in _send
return self._handle_post(
File "/usr/local/lib/python3.8/dist-packages/visdom/__init__.py", line 656, in _handle_post
r = self.session.post(url, data=data)
File "/usr/local/lib/python3.8/dist-packages/requests/sessions.py", line 578, in post
return self.request('POST', url, data=data, json=json, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/requests/sessions.py", line 530, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python3.8/dist-packages/requests/sessions.py", line 643, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/requests/adapters.py", line 516, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPConnectionPool(host='localhost', port=666): Max retries exceeded with url: /update (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f1fa000dee0>: Failed to establish a new connection: [Errno 111] Connection refused'))
epoch:0, step:0, loss1:1.171, loss2:1.417, loss3:1.201, loss4:1.182, loss:2.4318742752075195 time:0.077 min
../aten/src/ATen/native/cuda/NLLLoss2d.cu:104: nll_loss2d_forward_kernel: block: [15,0,0], thread: [303,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/NLLLoss2d.cu:104: nll_loss2d_forward_kernel: block: [7,0,0], thread: [304,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/NLLLoss2d.cu:104: nll_loss2d_forward_kernel: block: [7,0,0], thread: [817,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/NLLLoss2d.cu:104: nll_loss2d_forward_kernel: block: [15,0,0], thread: [48,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/NLLLoss2d.cu:104: nll_loss2d_forward_kernel: block: [22,0,0], thread: [47,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/NLLLoss2d.cu:104: nll_loss2d_forward_kernel: block: [11,0,0], thread: [845,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/NLLLoss2d.cu:104: nll_loss2d_forward_kernel: block: [8,0,0], thread: [562,0,0] Assertion `t >= 0 && t < n_classes` failed.
../aten/src/ATen/native/cuda/NLLLoss2d.cu:104: nll_loss2d_forward_kernel: block: [4,0,0], thread: [586,0,0] Assertion `t >= 0 && t < n_classes` failed.
This is the training file (train.py) where I train a convolutional neural network, calculating the loss (cross-entropy loss) and making backpropagation to update the weights:
import os
from time import time
import numpy as np
import torch
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader
from visdom import Visdom
from dataset.dataset import Dataset
from loss.Dice import DiceLoss
from loss.ELDice import ELDiceLoss
from loss.WBCE import WCELoss
from loss.Jaccard import JaccardLoss
from loss.SS import SSLoss
from loss.Tversky import TverskyLoss
from loss.Hybrid import HybridLoss
from loss.BCE import BCELoss
from net.models import net
import parameter as para
viz = Visdom(port=666)
step_list = [0]
win = viz.line(X=np.array([0]), Y=np.array([1.0]), opts=dict(title='loss'))
os.environ['CUDA_VISIBLE_DEVICES'] = para.gpu
cudnn.benchmark = para.cudnn_benchmark
net = torch.nn.DataParallel(net).cuda()
net.train()
print(para.training_set_path)
train_ds = Dataset(os.path.join(para.train_ct_path, 'ct'), os.path.join(para.train_seg_path, 'seg'))
train_dl = DataLoader(train_ds, para.batch_size, True, num_workers=para.num_workers, pin_memory=para.pin_memory)
loss_func_list = [DiceLoss(), ELDiceLoss(), WCELoss(), JaccardLoss(), SSLoss(), TverskyLoss(), HybridLoss(), BCELoss()]
loss_func = loss_func_list[5]
opt = torch.optim.Adam(net.parameters(), lr=para.learning_rate, capturable=True)
#opt = torch.optim.Adam(net.parameters(), lr=para.learning_rate)
lr_decay = torch.optim.lr_scheduler.MultiStepLR(opt, para.learning_rate_decay)
alpha = para.alpha
start = time()
for epoch in range(para.Epoch):
lr_decay.step()
mean_loss = []
for step, (ct, seg) in enumerate(train_dl):
ct = ct.cuda()
seg = seg.cuda()
# print(f"seg unique: {torch.unique(seg)}")
outputs = net(ct)
# print(len(outputs))
loss1 = loss_func(outputs[0], seg)
loss2 = loss_func(outputs[1], seg)
loss3 = loss_func(outputs[2], seg)
loss4 = loss_func(outputs[3], seg)
loss = (loss1 + loss2 + loss3) * alpha + loss4
mean_loss.append(loss4.item())
opt.zero_grad()
loss.backward()
#print("Capturing:", torch.cuda.is_current_stream_capturing())
opt.step()
if step % 5 == 0:
step_list.append(step_list[-1] + 1)
viz.line(X=np.array([step_list[-1]]), Y=np.array([loss4.item()]), win=win, update='append')
print('epoch:{}, step:{}, loss1:{:.3f}, loss2:{:.3f}, loss3:{:.3f}, loss4:{:.3f}, loss:{} time:{:.3f} min'
.format(epoch, step, loss1.item(), loss2.item(), loss3.item(), loss4.item(), loss, (time() - start) / 60))
mean_loss = sum(mean_loss) / len(mean_loss)
if epoch % 50 == 0 :
torch.save(net.state_dict(), '/content/drive/MyDrive/CS/Models/KiU-Net/LiTS/saved_networks/net{}-{:.3f}-{:.3f}.pth'.format(epoch, loss, mean_loss))
if epoch % 40 == 0 and epoch != 0:
alpha *= 0.8
This is my model (net.py). It is a convolutional neural network (the input size is equal to the output size) to make a multiclass segmentation of 3 classes: background, liver, and tumor of medical images (LiTS, liver tumor segmentation benchmark):
import os
import sys
sys.path.append(os.path.split(sys.path[0])[0])
import torch
import torch.nn as nn
import torch.nn.functional as F
import parameter as para
class segnet(nn.Module):
def __init__(self, training):
super(segnet, self).__init__()
self.training = training
self.encoder1 = nn.Conv3d(1, 32, 3, stride=1, padding=1) # b, 16, 10, 10
self.encoder2 = nn.Conv3d(32, 64, 3, stride=1, padding=1) # b, 8, 3, 3
self.encoder3 = nn.Conv3d(64, 128, 3, stride=1, padding=1)
self.encoder4 = nn.Conv3d(128, 256, 3, stride=1, padding=1)
self.encoder5 = nn.Conv3d(256, 512, 3, stride=1, padding=1)
self.decoder1 = nn.Conv3d(512, 256, 3, stride=1,padding=1) # b, 16, 5, 5
self.decoder2 = nn.Conv3d(256, 128, 3, stride=1, padding=1) # b, 8, 15, 1
self.decoder3 = nn.Conv3d(128, 64, 3, stride=1, padding=1) # b, 1, 28, 28
self.decoder4 = nn.Conv3d(64, 32, 3, stride=1, padding=1)
self.decoder5 = nn.Conv3d(32, 3, 3, stride=1, padding=1) # self.decoder5 = nn.Conv3d(32, 2, 3, stride=1, padding=1)
#(32, 3...
self.map4 = nn.Sequential(
nn.Conv3d(3, 3, 1, 1), #nn.Conv3d(2, 1, 1, 1) / nn.Conv3d(3, 3, 1, 1)
nn.Upsample(scale_factor=(1, 1, 1), mode='trilinear'), # nn.Upsample(scale_factor=(1, 2, 2), mode='trilinear')
nn.Sigmoid()
)
# 128*128 尺度下的映射
self.map3 = nn.Sequential(
nn.Conv3d(64, 3, 1, 1), #nn.Conv3d(64, 1, 1, 1) / nn.Conv3d(64, 3, 1, 1)
nn.Upsample(scale_factor=(4, 4, 4), mode='trilinear'), #nn.Upsample(scale_factor=(4, 8, 8), mode='trilinear')
nn.Sigmoid()
)
# 64*64 尺度下的映射
self.map2 = nn.Sequential(
nn.Conv3d(128, 3, 1, 1), #nn.Conv3d(128, 1, 1, 1) / nn.Conv3d(128, 3, 1, 1)
nn.Upsample(scale_factor=(8, 8, 8), mode='trilinear'), #nn.Upsample(scale_factor=(8, 16, 16), mode='trilinear')
nn.Sigmoid()
)
# 32*32 尺度下的映射
self.map1 = nn.Sequential(
nn.Conv3d(256, 3, 1, 1), #nn.Conv3d(256, 1, 1, 1) / nn.Conv3d(256, 3, 1, 1)
nn.Upsample(scale_factor=(16, 16, 16), mode='trilinear'), #nn.Upsample(scale_factor=(16, 32, 32), mode='trilinear'),
nn.Sigmoid()
)
self.soft = nn.Softmax(dim = 1) #self.soft = nn.Softmax(dim = 1)
def forward(self, x):
out = F.relu(F.max_pool3d(self.encoder1(x),2,2))
t1 = out
out = F.relu(F.max_pool3d(self.encoder2(out),2,2))
t2 = out
out = F.relu(F.max_pool3d(self.encoder3(out),2,2))
t3 = out
out = F.relu(F.max_pool3d(self.encoder4(out),2,2))
t4 = out
out = F.relu(F.max_pool3d(self.encoder5(out),2,2))
# t2 = out
out = F.relu(F.interpolate(self.decoder1(out),scale_factor=(2,2,2),mode ='trilinear'))
# print(out.shape,t4.shape)
out = torch.add(F.pad(out,[0,0,0,0,0,1]),t4)
output1 = self.map1(out)
out = F.relu(F.interpolate(self.decoder2(out),scale_factor=(2,2,2),mode ='trilinear'))
# out = torch.add(out,t3)
output2 = self.map2(out)
out = F.relu(F.interpolate(self.decoder3(out),scale_factor=(2,2,2),mode ='trilinear'))
# out = torch.add(out,t2)
output3 = self.map3(out)
out = F.relu(F.interpolate(self.decoder4(out),scale_factor=(2,2,2),mode ='trilinear'))
# out = torch.add(out,t1)
out = F.relu(F.interpolate(self.decoder5(out),scale_factor=(2,2,2),mode ='trilinear'))
output4 = self.map4(out)
# print(out.shape)
# print(output1.shape,output2.shape,output3.shape,output4.shape)
if self.training is True:
return output1, output2, output3, output4
else:
return output4
def init(module):
if isinstance(module, nn.Conv3d) or isinstance(module, nn.ConvTranspose3d):
nn.init.kaiming_normal_(module.weight.data, 0.25)
nn.init.constant_(module.bias.data, 0)
net = segnet(training=True)
# net = kiunet_org(training=True)
net.apply(init)
# 计算网络参数
print('net total parameters:', sum(param.numel() for param in net.parameters()))
This is the file where I calculate cross-entropy loss (Tversky.py):
import torch
import torch.nn as nn
import torch.nn.functional as F
class TverskyLoss(nn.Module):
def __init__(self):
super().__init__()
self.ce_loss = nn.CrossEntropyLoss(reduction='mean')
def forward(self, pred, target):
pred = pred.squeeze(dim=1) # pred.size() == target.size()
output = self.ce_loss(pred, target.to(torch.int64))
return output
I get an error when I run this code !python train.py (KiU-Net 3D) in Google Colab Pro in Windows using Python and Pytorch library, which is the following:
File "train.py", line 86, in <module>
opt.step()
File "/usr/local/lib/python3.8/dist-packages/torch/optim/lr_scheduler.py", line 68, in wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/optim/optimizer.py", line 140, in wrapper
out = func(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/optim/optimizer.py", line 23, in _use_grad
ret = func(self, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/optim/adam.py", line 178, in step
self._cuda_graph_capture_health_check()
File "/usr/local/lib/python3.8/dist-packages/torch/optim/optimizer.py", line 103, in _cuda_graph_capture_health_check
raise RuntimeError("Attempting CUDA graph capture of step() for an instance of " +
RuntimeError: Attempting CUDA graph capture of step() for an instance of Adam but this instance was constructed with capturable=False.
This is the train.py file where I get the error in opt.step():
import os
from time import time
import numpy as np
import torch
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader
from visdom import Visdom
from dataset.dataset import Dataset
from loss.Dice import DiceLoss
from loss.ELDice import ELDiceLoss
from loss.WBCE import WCELoss
from loss.Jaccard import JaccardLoss
from loss.SS import SSLoss
from loss.Tversky import TverskyLoss
from loss.Hybrid import HybridLoss
from loss.BCE import BCELoss
from net.models import net
import parameter as para
viz = Visdom(port=666)
step_list = [0]
win = viz.line(X=np.array([0]), Y=np.array([1.0]), opts=dict(title='loss'))
os.environ['CUDA_VISIBLE_DEVICES'] = para.gpu
cudnn.benchmark = para.cudnn_benchmark
net = torch.nn.DataParallel(net).cuda()
net.train()
print(para.training_set_path)
train_ds = Dataset(os.path.join(para.train_ct_path, 'ct'), os.path.join(para.train_seg_path, 'seg'))
train_dl = DataLoader(train_ds, para.batch_size, True, num_workers=para.num_workers, pin_memory=para.pin_memory)
loss_func_list = [DiceLoss(), ELDiceLoss(), WCELoss(), JaccardLoss(), SSLoss(), TverskyLoss(), HybridLoss(), BCELoss()]
loss_func = loss_func_list[5]
opt = torch.optim.Adam(net.parameters(), lr=para.learning_rate)
lr_decay = torch.optim.lr_scheduler.MultiStepLR(opt, para.learning_rate_decay)
alpha = para.alpha
start = time()
for epoch in range(para.Epoch):
lr_decay.step()
mean_loss = []
for step, (ct, seg) in enumerate(train_dl):
ct = ct.cuda()
seg = seg.cuda()
outputs = net(ct)
print(len(outputs))
loss1 = loss_func(outputs[0], seg)
loss2 = loss_func(outputs[1], seg)
loss3 = loss_func(outputs[2], seg)
loss4 = loss_func(outputs[3], seg)
loss = (loss1 + loss2 + loss3) * alpha + loss4
mean_loss.append(loss4.item())
opt.zero_grad()
loss.backward()
# opt.param_groups[0]['capturable'] = True #https://github.com/pytorch/pytorch/issues/80809
opt.step()
if step % 5 is 0:
step_list.append(step_list[-1] + 1)
viz.line(X=np.array([step_list[-1]]), Y=np.array([loss4.item()]), win=win, update='append')
print('epoch:{}, step:{}, loss1:{:.3f}, loss2:{:.3f}, loss3:{:.3f}, loss4:{:.3f}, time:{:.3f} min'
.format(epoch, step, loss1.item(), loss2.item(), loss3.item(), loss4.item(), (time() - start) / 60))
mean_loss = sum(mean_loss) / len(mean_loss)
if epoch % 50 is 0 :
torch.save(net.state_dict(), '/content/drive/MyDrive/CS/Models/KiU-Net/LiTS/saved_networks/net{}-{:.3f}-{:.3f}.pth'.format(epoch, loss, mean_loss))
if epoch % 40 is 0 and epoch is not 0:
alpha *= 0.8
I expect to run the neural network as normal training. This is the architecture of KiU-Net 3D in case there is an error there (models.py):
import os
import sys
sys.path.append(os.path.split(sys.path[0])[0])
import torch
import torch.nn as nn
import torch.nn.functional as F
import parameter as para
class segnet(nn.Module):
def __init__(self, training):
super(segnet, self).__init__()
self.training = training
self.encoder1 = nn.Conv3d(1, 32, 3, stride=1, padding=1) # b, 16, 10, 10
self.encoder2= nn.Conv3d(32, 64, 3, stride=1, padding=1) # b, 8, 3, 3
self.encoder3= nn.Conv3d(64, 128, 3, stride=1, padding=1)
self.encoder4= nn.Conv3d(128, 256, 3, stride=1, padding=1)
self.encoder5= nn.Conv3d(256, 512, 3, stride=1, padding=1)
self.decoder1 = nn.Conv3d(512, 256, 3, stride=1,padding=1) # b, 16, 5, 5
self.decoder2 = nn.Conv3d(256, 128, 3, stride=1, padding=1) # b, 8, 15, 1
self.decoder3 = nn.Conv3d(128, 64, 3, stride=1, padding=1) # b, 1, 28, 28
self.decoder4 = nn.Conv3d(64, 32, 3, stride=1, padding=1)
self.decoder5 = nn.Conv3d(32, 3, 3, stride=1, padding=1) # self.decoder5 = nn.Conv3d(32, 2, 3, stride=1, padding=1)
self.map4 = nn.Sequential(
nn.Conv3d(3, 3, 1, 1), #nn.Conv3d(2, 1, 1, 1),
nn.Upsample(scale_factor=(1, 1, 1), mode='trilinear'), # nn.Upsample(scale_factor=(1, 2, 2), mode='trilinear')
nn.Sigmoid()
)
# 128*128 尺度下的映射
self.map3 = nn.Sequential(
nn.Conv3d(64, 3, 1, 1), #nn.Conv3d(64, 1, 1, 1)
nn.Upsample(scale_factor=(4, 4, 4), mode='trilinear'), #nn.Upsample(scale_factor=(4, 8, 8), mode='trilinear')
nn.Sigmoid()
)
# 64*64 尺度下的映射
self.map2 = nn.Sequential(
nn.Conv3d(128, 3, 1, 1), #nn.Conv3d(128, 1, 1, 1)
nn.Upsample(scale_factor=(8, 8, 8), mode='trilinear'), #nn.Upsample(scale_factor=(8, 16, 16), mode='trilinear')
nn.Sigmoid()
)
# 32*32 尺度下的映射
self.map1 = nn.Sequential(
nn.Conv3d(256, 3, 1, 1), #nn.Conv3d(256, 1, 1, 1)
nn.Upsample(scale_factor=(16, 16, 16), mode='trilinear'), #nn.Upsample(scale_factor=(16, 32, 32), mode='trilinear'),
nn.Sigmoid()
)
self.soft = nn.Softmax(dim =1)
def forward(self, x):
out = F.relu(F.max_pool3d(self.encoder1(x),2,2))
t1 = out
out = F.relu(F.max_pool3d(self.encoder2(out),2,2))
t2 = out
out = F.relu(F.max_pool3d(self.encoder3(out),2,2))
t3 = out
out = F.relu(F.max_pool3d(self.encoder4(out),2,2))
t4 = out
out = F.relu(F.max_pool3d(self.encoder5(out),2,2))
# t2 = out
out = F.relu(F.interpolate(self.decoder1(out),scale_factor=(2,2,2),mode ='trilinear'))
# print(out.shape,t4.shape)
out = torch.add(F.pad(out,[0,0,0,0,0,1]),t4)
output1 = self.map1(out)
out = F.relu(F.interpolate(self.decoder2(out),scale_factor=(2,2,2),mode ='trilinear'))
# out = torch.add(out,t3)
output2 = self.map2(out)
out = F.relu(F.interpolate(self.decoder3(out),scale_factor=(2,2,2),mode ='trilinear'))
# out = torch.add(out,t2)
output3 = self.map3(out)
out = F.relu(F.interpolate(self.decoder4(out),scale_factor=(2,2,2),mode ='trilinear'))
# out = torch.add(out,t1)
out = F.relu(F.interpolate(self.decoder5(out),scale_factor=(2,2,2),mode ='trilinear'))
output4 = self.map4(out)
# print(out.shape)
# print(output1.shape,output2.shape,output3.shape,output4.shape)
if self.training is True:
return output1, output2, output3, output4
else:
return output4
def init(module):
if isinstance(module, nn.Conv3d) or isinstance(module, nn.ConvTranspose3d):
nn.init.kaiming_normal_(module.weight.data, 0.25)
nn.init.constant_(module.bias.data, 0)
net =segnet(training=True)
net.apply(init)
# 计算网络参数 (Calculating network parameters)
print('net total parameters:', sum(param.numel() for param in net.parameters()))
And this is how I calculate the loss function (Tversky.py):
"""
Tversky loss
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
class TverskyLoss(nn.Module):
def __init__(self):
super().__init__()
def forward(self, pred, target):
smooth = 1e-7
pred = pred.squeeze()
target = target.squeeze()
pred = torch.argmax(pred, dim=0).flatten().to(torch.int64)
target = target.flatten().to(torch.int64)
# print(pred.shape)
# print(target.shape)
pred_f = F.one_hot(pred, num_classes=3)[...,1:]
target_f = F.one_hot(target, num_classes=3)[...,1:]
# print(pred_f.shape)
# print(target_f.shape)
intersection = torch.sum(pred_f * target_f, axis=1)
denominator = torch.sum(pred_f + target_f, axis=-1)
# print(intersection.shape)
# print(denom.shape)
dice = 1 - torch.mean((2. * intersection / (denominator + smooth)))
dice.requires_grad = True
return dice
Why
Torch optimizers throw Attempting CUDA graph capture of step()... if they detect that the current CUDA stream (queue of operations to be run on the GPU) is actively recording ("capturing") operations.
Since you're seeing this error during opt.step(), some code that runs prior to opt.step() is probably starting CUDA graph / stream capture for the current (default) stream, but never turning capture mode off.
You can verify this hypothesis with print("Capturing:", torch.cuda.is_current_stream_capturing()) before opt.step(). Based on the error message, this will print Capturing: True, but you want Capturing: False.
You may be able to use print("Capturing:", torch.cuda.is_current_stream_capturing()) to narrow down what part of the code is initiating graph / stream capture.
Fix?
Without knowing which part of the code is initiating the graph / stream capture, it's hard to suggest an elegant fix.
If the stream capture occurs before your training loop, you might be able to work around the issue by just wrapping your training loop in a dedicated stream (which won't be in capture mode):
training_loop_stream = torch.cuda.Stream()
torch.cuda.synchronize() # make sure model is on device
with torch.cuda.stream(training_loop_stream):
# training loop, now using this dedicated stream...
I need your help in my segnet.
I cloned this repository in git.
https://github.com/tkuanlun350/Tensorflow-SegNet
And I changed dataset to my custom data consisting of 3 classes including the background.
But This error occurred when I executed the test command.
ValueError: Cannot feed value of shape (1, 360, 480, 3, 1) for Tensor Placeholder_1:0, which has shape (1, 360, 480, 1)
How to solve this problem?
I'm not good at tensorflow. Please Help me...
Here is my code.
python 3.7
tensorflow 2.0
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
import os, sys
import numpy as np
import math
from datetime import datetime
import time
from PIL import Image
from math import ceil
from tensorflow.python.ops import gen_nn_ops
# modules
from Utils import _variable_with_weight_decay, _variable_on_cpu, _add_loss_summaries, _activation_summary, print_hist_summery, get_hist, per_class_acc, writeImage
from Inputs import *
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
os.environ["CUDA_VISIBLE_DEVICES"]="0, 1, 2, 3"
# gpus = tf.config.experimental.list_physical_devices('GPU')
gpus = tf.config.list_physical_devices('GPU')
for i in range(len(gpus)):
tf.config.experimental.set_memory_growth(gpus[i], True)
# Constants describing the training process.
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.001 # Initial learning rate.
EVAL_BATCH_SIZE = 8
BATCH_SIZE = 8
# for CamVid
IMAGE_HEIGHT = 360
IMAGE_WIDTH = 480
IMAGE_DEPTH = 3
NUM_CLASSES = 3
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 367
NUM_EXAMPLES_PER_EPOCH_FOR_TEST = 101
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 1
TEST_ITER = NUM_EXAMPLES_PER_EPOCH_FOR_TEST / BATCH_SIZE
def msra_initializer(kl, dl):
"""
kl for kernel size, dl for filter number
"""
stddev = math.sqrt(2. / (kl**2 * dl))
return tf.truncated_normal_initializer(stddev=stddev)
def orthogonal_initializer(scale = 1.1):
''' From Lasagne and Keras. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
'''
def _initializer(shape, dtype=tf.float32, partition_info=None):
flat_shape = (shape[0], np.prod(shape[1:]))
a = np.random.normal(0.0, 1.0, flat_shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
# pick the one with the correct shape
q = u if u.shape == flat_shape else v
q = q.reshape(shape) #this needs to be corrected to float32
return tf.constant(scale * q[:shape[0], :shape[1]], dtype=tf.float32)
return _initializer
def loss(logits, labels):
"""
loss func without re-weighting
"""
# Calculate the average cross entropy loss across the batch.
logits = tf.reshape(logits, (-1,NUM_CLASSES))
labels = tf.reshape(labels, [-1])
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=labels, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def weighted_loss(logits, labels, num_classes, head=None):
""" median-frequency re-weighting """
with tf.name_scope('loss'):
logits = tf.reshape(logits, (-1, num_classes))
epsilon = tf.constant(value=1e-10)
logits = logits + epsilon
# consturct one-hot label array
label_flat = tf.reshape(labels, (-1, 1))
# should be [batch ,num_classes]
labels = tf.reshape(tf.one_hot(label_flat, depth=num_classes), (-1, num_classes))
softmax = tf.nn.softmax(logits)
cross_entropy = -tf.reduce_sum(tf.multiply(labels * tf.log(softmax + epsilon), head), axis=[1])
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
return loss
def cal_loss(logits, labels):
loss_weight = np.array([
0.2595,
0.3826,
1.0974]) # class 0~11
labels = tf.cast(labels, tf.int32)
# return loss(logits, labels)
return weighted_loss(logits, labels, num_classes=NUM_CLASSES, head=loss_weight)
def conv_layer_with_bn(inputT, shape, train_phase, activation=True, name=None):
in_channel = shape[2]
out_channel = shape[3]
k_size = shape[0]
with tf.variable_scope(name) as scope:
kernel = _variable_with_weight_decay('ort_weights', shape=shape, initializer=orthogonal_initializer(), wd=None)
conv = tf.nn.conv2d(inputT, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [out_channel], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
if activation is True:
conv_out = tf.nn.relu(batch_norm_layer(bias, train_phase, scope.name))
else:
conv_out = batch_norm_layer(bias, train_phase, scope.name)
return conv_out
def get_deconv_filter(f_shape):
"""
reference: https://github.com/MarvinTeichmann/tensorflow-fcn
"""
width = f_shape[0]
heigh = f_shape[0]
f = ceil(width/2.0)
c = (2 * f - 1 - f % 2) / (2.0 * f)
bilinear = np.zeros([f_shape[0], f_shape[1]])
for x in range(width):
for y in range(heigh):
value = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
bilinear[x, y] = value
weights = np.zeros(f_shape)
for i in range(f_shape[2]):
weights[:, :, i, i] = bilinear
init = tf.constant_initializer(value=weights,
dtype=tf.float32)
return tf.get_variable(name="up_filter", initializer=init,
shape=weights.shape)
def deconv_layer(inputT, f_shape, output_shape, stride=2, name=None):
# output_shape = [b, w, h, c]
# sess_temp = tf.InteractiveSession()
sess_temp = tf.global_variables_initializer()
strides = [1, stride, stride, 1]
with tf.variable_scope(name):
weights = get_deconv_filter(f_shape)
deconv = tf.nn.conv2d_transpose(inputT, weights, output_shape,
strides=strides, padding='SAME')
return deconv
def batch_norm_layer(inputT, is_training, scope):
return tf.cond(is_training,
lambda: tf.keras.layers.BatchNormalization(center=False)(inputT),
lambda: tf.keras.layers.BatchNormalization(center=False)(inputT))
def inference(images, labels, batch_size, phase_train):
# norm1
norm1 = tf.nn.lrn(images, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75,
name='norm1')
# conv1
conv1 = conv_layer_with_bn(norm1, [7, 7, images.get_shape().as_list()[3], 64], phase_train, name="conv1")
print(conv1.shape)
# pool1
pool1, pool1_indices = tf.nn.max_pool_with_argmax(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
# conv2
conv2 = conv_layer_with_bn(pool1, [7, 7, 64, 64], phase_train, name="conv2")
# pool2
pool2, pool2_indices = tf.nn.max_pool_with_argmax(conv2, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# conv3
conv3 = conv_layer_with_bn(pool2, [7, 7, 64, 64], phase_train, name="conv3")
# pool3
pool3, pool3_indices = tf.nn.max_pool_with_argmax(conv3, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool3')
# conv4
conv4 = conv_layer_with_bn(pool3, [7, 7, 64, 64], phase_train, name="conv4")
# pool4
pool4, pool4_indices = tf.nn.max_pool_with_argmax(conv4, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool4')
""" End of encoder """
""" start upsample """
# upsample4
# Need to change when using different dataset out_w, out_h
# upsample4 = upsample_with_pool_indices(pool4, pool4_indices, pool4.get_shape(), out_w=45, out_h=60, scale=2, name='upsample4')
upsample4 = deconv_layer(pool4, [2, 2, 64, 64], [batch_size, 45, 60, 64], 2, "up4")
# decode 4
conv_decode4 = conv_layer_with_bn(upsample4, [7, 7, 64, 64], phase_train, False, name="conv_decode4")
# upsample 3
# upsample3 = upsample_with_pool_indices(conv_decode4, pool3_indices, conv_decode4.get_shape(), scale=2, name='upsample3')
upsample3= deconv_layer(conv_decode4, [2, 2, 64, 64], [batch_size, 90, 120, 64], 2, "up3")
# decode 3
conv_decode3 = conv_layer_with_bn(upsample3, [7, 7, 64, 64], phase_train, False, name="conv_decode3")
# upsample2
# upsample2 = upsample_with_pool_indices(conv_decode3, pool2_indices, conv_decode3.get_shape(), scale=2, name='upsample2')
upsample2= deconv_layer(conv_decode3, [2, 2, 64, 64], [batch_size, 180, 240, 64], 2, "up2")
# decode 2
conv_decode2 = conv_layer_with_bn(upsample2, [7, 7, 64, 64], phase_train, False, name="conv_decode2")
# upsample1
# upsample1 = upsample_with_pool_indices(conv_decode2, pool1_indices, conv_decode2.get_shape(), scale=2, name='upsample1')
upsample1= deconv_layer(conv_decode2, [2, 2, 64, 64], [batch_size, 360, 480, 64], 2, "up1")
# decode4
conv_decode1 = conv_layer_with_bn(upsample1, [7, 7, 64, 64], phase_train, False, name="conv_decode1")
""" end of Decode """
""" Start Classify """
# output predicted class number (6)
with tf.variable_scope('conv_classifier') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[1, 1, 64, NUM_CLASSES],
initializer=msra_initializer(1, 64),
wd=0.0005)
conv = tf.nn.conv2d(conv_decode1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0))
conv_classifier = tf.nn.bias_add(conv, biases, name=scope.name)
logit = conv_classifier
loss = cal_loss(conv_classifier, labels)
return loss, logit
def train(total_loss, global_step):
total_sample = 274
num_batches_per_epoch = 274/1
""" fix lr """
lr = INITIAL_LEARNING_RATE
loss_averages_op = _add_loss_summaries(total_loss)
# Compute gradients.
with tf.control_dependencies([loss_averages_op]):
opt = tf.train.AdamOptimizer(lr)
grads = opt.compute_gradients(total_loss)
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Add histograms for trainable variables.
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
# Add histograms for gradients.
for grad, var in grads:
if grad is not None:
tf.summary.histogram(var.op.name + '/gradients', grad)
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
train_op = tf.no_op(name='train')
return train_op
def test(FLAGS):
max_steps = FLAGS.max_steps
batch_size = FLAGS.batch_size
train_dir = FLAGS.log_dir # /tmp3/first350/TensorFlow/Logs
# test_dir = FLAGS.test_dir # /tmp3/first350/SegNet-Tutorial/CamVid/train.txt
test_dir = '/home/ml/song/Segnet_tensorflow_3/dataset/test.txt'
# test_ckpt = FLAGS.testing
test_ckpt = "/home/ml/song/Segnet_tensorflow_3/path_to_your_log/model.ckpt-19999.meta"
image_w = FLAGS.image_w
image_h = FLAGS.image_h
image_c = FLAGS.image_c
# testing should set BATCH_SIZE = 1
batch_size = 1
image_filenames, label_filenames = get_filename_list(test_dir)
test_data_node = tf.placeholder(
tf.float32,
shape=[batch_size, image_h, image_w, image_c])
test_labels_node = tf.placeholder(tf.int64, shape=[batch_size, 360, 480, 1])
phase_train = tf.placeholder(tf.bool, name='phase_train')
loss, logits = inference(test_data_node, test_labels_node, batch_size, phase_train)
pred = tf.argmax(logits, axis=3)
# get moving avg
variable_averages = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.0001)
with tf.Session() as sess:
# Load checkpoint
# saver.restore(sess, "/home/ml/song/Segnet_tensorflow/path_to_your_log/model.ckpt-19999")
# saver = tf.train.import_meta_graph('/home/ml/song/Segnet_tensorflow/Logs/model.ckpt-19999.meta')
saver.restore(sess, '/home/ml/song/Segnet_tensorflow_3/path_to_your_log/model.ckpt-19999')
images, labels = get_all_test_data(image_filenames, label_filenames)
threads = tf.train.start_queue_runners(sess=sess)
hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
for image_batch, label_batch in zip(images, labels):
feed_dict = {
test_data_node: image_batch,
test_labels_node: label_batch,
phase_train: False
}
print('*'*100)
print(type(feed_dict))
print(test_data_node.shape)
print(test_labels_node.shape)
print('*'*100)
print(' -- logits')
print(' ', logits.shape, type(logits))
print(logits[0])
print(' -- pred')
print(' ', pred.shape, type(pred))
print('')
dense_prediction, im = sess.run([logits, pred], feed_dict=feed_dict)
print(dense_prediction.shape)
print(im.shape)
print('*'*100)
# output_image to verify
if (FLAGS.save_image):
writeImage(im[0], 'testing_image.png')
# writeImage(im[0], 'out_image/'+str(image_filenames[count]).split('/')[-1])
hist += get_hist(dense_prediction, label_batch)
# count+=1
acc_total = np.diag(hist).sum() / hist.sum()
iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
print("acc: ", acc_total)
print("mean IU: ", np.nanmean(iu))
# ------------------------------------------------------------------------------------------------------
from tensorflow.python.client import device_lib
def get_available_gpus():
local_device_protos = device_lib.list_local_devices()
return [x.name for x in local_device_protos if x.device_type == 'GPU']
# ------------------------------------------------------------------------------------------------------
def training(FLAGS, is_finetune=False):
max_steps = FLAGS.max_steps
batch_size = FLAGS.batch_size
train_dir = FLAGS.log_dir # /tmp3/first350/TensorFlow/Logs
#image_dir = FLAGS.image_dir # /tmp3/first350/SegNet-Tutorial/CamVid/train.txt
image_dir = "/home/ml/song/Segnet_tensorflow_3/dataset/train.txt"
#val_dir = FLAGS.val_dir # /tmp3/first350/SegNet-Tutorial/CamVid/val.txt
val_dir = "/home/ml/song/Segnet_tensorflow_3/dataset/val.txt"
finetune_ckpt = FLAGS.finetune
image_w = FLAGS.image_w
image_h = FLAGS.image_h
image_c = FLAGS.image_c
startstep = 0 if not is_finetune else int(FLAGS.finetune.split('-')[-1])
image_filenames, label_filenames = get_filename_list(image_dir)
val_image_filenames, val_label_filenames = get_filename_list(val_dir)
with tf.Graph().as_default():
train_data_node = tf.compat.v1.placeholder( tf.float32, shape=[batch_size, image_h, image_w, image_c])
train_labels_node = tf.compat.v1.placeholder(tf.int64, shape=[batch_size, image_h, image_w, 1])
phase_train = tf.compat.v1.placeholder(tf.bool, name='phase_train')
global_step = tf.Variable(0, trainable=False)
# For CamVid
images, labels = CamVidInputs(image_filenames, label_filenames, batch_size)
val_images, val_labels = CamVidInputs(val_image_filenames, val_label_filenames, batch_size)
# Build a Graph that computes the logits predictions from the inference model.
loss, eval_prediction = inference(train_data_node, train_labels_node, batch_size, phase_train)
# Build a Graph that trains the model with one batch of examples and updates the model parameters.
train_op = train(loss, global_step)
saver = tf.train.Saver(tf.global_variables())
summary_op = tf.summary.merge_all()
# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.0001)
with tf.Session() as sess:
# Build an initialization operation to run below.
if (is_finetune == True):
saver.restore(sess, finetune_ckpt )
else:
init = tf.global_variables_initializer()
sess.run(init)
# Start the queue runners.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# Summery placeholders
summary_writer = tf.summary.FileWriter(train_dir, sess.graph)
average_pl = tf.compat.v1.placeholder(tf.float32)
acc_pl = tf.compat.v1.placeholder(tf.float32)
iu_pl = tf.compat.v1.placeholder(tf.float32)
average_summary = tf.summary.scalar("test_average_loss", average_pl)
acc_summary = tf.summary.scalar("test_accuracy", acc_pl)
iu_summary = tf.summary.scalar("Mean_IU", iu_pl)
for step in range(startstep, startstep + max_steps):
image_batch ,label_batch = sess.run([images, labels])
# since we still use mini-batches in validation, still set bn-layer phase_train = True
feed_dict = {
train_data_node: image_batch,
train_labels_node: label_batch,
phase_train: True
}
start_time = time.time()
_, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
# eval current training batch pre-class accuracy
pred = sess.run(eval_prediction, feed_dict=feed_dict)
per_class_acc(pred, label_batch)
if step % 100 == 0:
print("start validating.....")
total_val_loss = 0.0
hist = np.zeros((NUM_CLASSES, NUM_CLASSES))
for test_step in range(int(TEST_ITER)):
val_images_batch, val_labels_batch = sess.run([val_images, val_labels])
_val_loss, _val_pred = sess.run([loss, eval_prediction], feed_dict={
train_data_node: val_images_batch,
train_labels_node: val_labels_batch,
phase_train: True
})
total_val_loss += _val_loss
hist += get_hist(_val_pred, val_labels_batch)
print("val loss: ", total_val_loss / TEST_ITER)
acc_total = np.diag(hist).sum() / hist.sum()
iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
test_summary_str = sess.run(average_summary, feed_dict={average_pl: total_val_loss / TEST_ITER})
acc_summary_str = sess.run(acc_summary, feed_dict={acc_pl: acc_total})
iu_summary_str = sess.run(iu_summary, feed_dict={iu_pl: np.nanmean(iu)})
print_hist_summery(hist)
print(" end validating.... ")
summary_str = sess.run(summary_op, feed_dict=feed_dict)
summary_writer.add_summary(summary_str, step)
summary_writer.add_summary(test_summary_str, step)
summary_writer.add_summary(acc_summary_str, step)
summary_writer.add_summary(iu_summary_str, step)
# Save the model checkpoint periodically.
if step % 1000 == 0 or (step + 1) == max_steps:
checkpoint_path = os.path.join(train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
coord.request_stop()
coord.join(threads)
I have Unet network which takes in MRI images of the brain, where the goal is to segment white substance in the brain. The images has the shape 256x256x183 (reshaped to 183x256x256) (FLAIR and T1 images). The problem I am having is that before sending the input to the Unet network, I have requires_grad=True on my pytorch tensor, but after one torch.nn.conv2d operation the requires_grad=False. This is a huge problem since the gradient will not update and learn.
from collections import OrderedDict
import torch
import torch.nn as nn
class UNet(nn.Module):
def __init__(self, in_channels=3, out_channels=1, init_features=32):
super(UNet, self).__init__()
features = init_features
self.encoder1 = UNet._block(in_channels, features, name="enc1")
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.encoder2 = UNet._block(features, features * 2, name="enc2")
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.encoder3 = UNet._block(features * 2, features * 4, name="enc3")
self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
self.encoder4 = UNet._block(features * 4, features * 8, name="enc4")
self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
self.bottleneck = UNet._block(features * 8, features * 16, name="bottleneck")
self.upconv4 = nn.ConvTranspose2d(
features * 16, features * 8, kernel_size=2, stride=2
)
self.decoder4 = UNet._block((features * 8) * 2, features * 8, name="dec4")
self.upconv3 = nn.ConvTranspose2d(
features * 8, features * 4, kernel_size=2, stride=2
)
self.decoder3 = UNet._block((features * 4) * 2, features * 4, name="dec3")
self.upconv2 = nn.ConvTranspose2d(
features * 4, features * 2, kernel_size=2, stride=2
)
self.decoder2 = UNet._block((features * 2) * 2, features * 2, name="dec2")
self.upconv1 = nn.ConvTranspose2d(
features * 2, features, kernel_size=2, stride=2
)
self.decoder1 = UNet._block(features * 2, features, name="dec1")
self.conv = nn.Conv2d(
in_channels=features, out_channels=out_channels, kernel_size=1
)
def forward(self, x):
print(x.requires_grad) #<---- here it is true
enc1 = self.encoder1(x)#<---- where the problem happens
print(enc1.requires_grad) #<---- here it is false
enc2 = self.encoder2(self.pool1(enc1))
print(enc2.requires_grad)
enc3 = self.encoder3(self.pool2(enc2))
print(enc3.requires_grad)
enc4 = self.encoder4(self.pool3(enc3))
print(enc4.requires_grad)
bottleneck = self.bottleneck(self.pool4(enc4))
print(bottleneck.requires_grad)
dec4 = self.upconv4(bottleneck)
print(dec4.requires_grad)
dec4 = torch.cat((dec4, enc4), dim=1)
print(dec4.requires_grad)
dec4 = self.decoder4(dec4)
print(dec4.requires_grad)
dec3 = self.upconv3(dec4)
print(dec3.requires_grad)
dec3 = torch.cat((dec3, enc3), dim=1)
print(dec3.requires_grad)
dec3 = self.decoder3(dec3)
print(dec3.requires_grad)
dec2 = self.upconv2(dec3)
print(dec2.requires_grad)
dec2 = torch.cat((dec2, enc2), dim=1)
print(dec2.requires_grad)
dec2 = self.decoder2(dec2)
print(dec2.requires_grad)
dec1 = self.upconv1(dec2)
print(dec1.requires_grad)
dec1 = torch.cat((dec1, enc1), dim=1)
print(dec1.requires_grad)
dec1 = self.decoder1(dec1)
print(dec1.requires_grad)
print("going out")
return torch.sigmoid(self.conv(dec1))
#staticmethod
def _block(in_channels, features, name):
return nn.Sequential(
OrderedDict(
[
(
name + "conv1",
nn.Conv2d(
in_channels=in_channels,
out_channels=features,
kernel_size=3,
padding=1,
bias=False,
),
),
(name + "norm1", nn.BatchNorm2d(num_features=features)),
(name + "relu1", nn.ReLU(inplace=True)),
(
name + "conv2",
nn.Conv2d(
in_channels=features,
out_channels=features,
kernel_size=3,
padding=1,
bias=False,
),
),
(name + "norm2", nn.BatchNorm2d(num_features=features)),
(name + "relu2", nn.ReLU(inplace=True)),
]
)
)
Edit:
This is the training code
class run_network:
def __init__(self, eta, epoch, batch_size, train_file_path, validation_file_path, shuffle_after_epoch = True):
self.eta = eta
self.epoch = epoch
self.batch_size = batch_size
self.train_file_path = train_file_path
self.validation_file_path = validation_file_path
self.shuffle_after_epoch = shuffle_after_epoch
def __call__(self, is_train = False):
device = torch.device("cpu" if not torch.cuda.is_available() else torch.cuda())
unet = torch.hub.load('mateuszbuda/brain-segmentation-pytorch', 'unet',
in_channels=3, out_channels=1, init_features=32, pretrained=True)
unet.to(device)
unet = unet.double()
optimizer = optim.Adam(unet.parameters(), lr=self.eta)
dsc_loss = DiceLoss()
Load_training = NiftiLoader(self.train_file_path)
Load_validation = NiftiLoader(self.validation_file_path)
mean_flair, mean_t1, std_flair, std_t1 = Load_training.average_mean_and_std(20, 79,99)
total_mean = [mean_flair, mean_t1]
total_std = [std_flair, std_t1]
loss_train = []
loss_validation = []
for current_epoch in tqdm(range(self.epoch)):
for phase in ["train", "validation"]:
if phase == "train":
mini_batch = Load_training.create_batch(self.batch_size, self.shuffle_after_epoch)
unet.train()
print("her22")
if phase == "validation":
print("her")
mini_batch = Load_validation.create_batch(self.batch_size, self.shuffle_after_epoch)
unet.eval()
dim1, dim2, dim3 = mini_batch.shape
for iteration in range(1):
if phase == "train":
current_batch = Load_training.Load_Image_batch(mini_batch, iteration)
image_batch = Load_training.image_zero_mean_normalizer(current_batch)
if phase == "validation":
current_batch = Load_validation.Load_Image_batch(mini_batch, iteration)
image_batch = Load_training.image_zero_mean_normalizer(current_batch, False, mean_list, std_list)
image_dim0, image_dim1, image_dim2, image_dim3, image_dim4 = image_batch.shape
image_batch = image_batch.reshape((
image_dim0,
image_dim1*image_dim2,
image_dim3,
image_dim4
))
image_batch = np.swapaxes(image_batch, 0,1)
image_batch = torch.as_tensor(image_batch)#.requires_grad_(True) #, requires_grad=True)
image_batch = image_batch.to(device)
print(image_batch.requires_grad)
optimizer.zero_grad()
with torch.set_grad_enabled(is_train == "train"):
for j in range(0, 10, 1):
# [183*5, 3, 256, 256] -> [12, 3, 256, 256]
# ANTALL ITERASJONER: (183*5/12) -> en chunk
input_image = image_batch[j:j+2,0:3,:,:]
print(input_image.requires_grad)
print("går inn")
y_predicted = unet(input_image)
print(y_predicted.requires_grad)
print(image_batch[j:j+2,3,:,:].requires_grad)
loss = dsc_loss(y_predicted.squeeze(1), image_batch[j:j+2,3,:,:])
print(loss.requires_grad)
if phase == "train":
loss_train.append(loss.item())
loss.backward()
print(loss.item())
exit()
optimizer.step()
print(loss.item())
exit()
if phase == "validation":
loss_validation.append(loss.item())
Number of iteration and print statement are for experimenting what the cause could be.
It works fine to me.
'''
# I changed your code a little bit to catch up the problem.
def forward(self, x):
print("encoder1", x.requires_grad) #<---- here it is true
enc1 = self.encoder1(x)#<---- where the problem happens
print("encoder2", enc1.requires_grad) #<---- here it is false
'''
a = torch.randn(32, 3, 255, 255, requires_grad=True)
# a.requires_grads = True
print(a)
UNet()(a)
# This is the result:
encoder1 True
encoder2 True
True
True
True
True
True
Can you show me your training source? I guess it's the problem. And why do you need to update the input data?
The training code is fine and the input doesn't need a gradient at all, if you just want to train and update the weights.
The real problem is this line here
with torch.set_grad_enabled(is_train == "train"):
So you want to disable the gradients if you are not training. The thing is is_train is a bool (judging form this: def __call__(self, is_train=False):), so the comparisons will be always false and no gradients will bet set. Just change it to
with torch.set_grad_enabled(is_train):
and you will be fine.
Introduction:
I am trying to get a CDCGAN (Conditional Deep Convolutional Generative Adversarial Network) to work on the MNIST dataset which should be fairly easy considering that the library (PyTorch) I am using has a tutorial on its website.
But I can't seem to get It working it just produces garbage or the model collapses or both.
What I tried:
making the model Conditional semi-supervised learning
using batch norm
using dropout on each layer besides the input/output layer on the generator and discriminator
label smoothing to combat overconfidence
adding noise to the images (I guess you call this instance noise) to get a better data distribution
use leaky relu to avoid vanishing gradients
using a replay buffer to combat forgetting of learned stuff and overfitting
playing with hyperparameters
comparing it to the model from PyTorch tutorial
basically what I did besides some things like Embedding layer ect.
Images my Model generated:
Hyperparameters:
batch_size=50, learning_rate_discrimiantor=0.0001, learning_rate_generator=0.0003, shuffle=True, ndf=64, ngf=64, droupout=0.5
batch_size=50, learning_rate_discriminator=0.0003, learning_rate_generator=0.0003, shuffle=True, ndf=64, ngf=64, dropout=0
Images Pytorch tutorial Model generated:
Code for the pytorch tutorial dcgan model
As comparison here are the images from the DCGAN from the pytorch turoial:
My Code:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms, datasets
import torch.nn.functional as F
from torch import optim as optim
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import os
import time
class Discriminator(torch.nn.Module):
def __init__(self, ndf=16, dropout_value=0.5): # ndf feature map discriminator
super().__init__()
self.ndf = ndf
self.droupout_value = dropout_value
self.condi = nn.Sequential(
nn.Linear(in_features=10, out_features=64 * 64)
)
self.hidden0 = nn.Sequential(
nn.Conv2d(in_channels=2, out_channels=self.ndf, kernel_size=4, stride=2, padding=1, bias=False),
nn.LeakyReLU(0.2),
)
self.hidden1 = nn.Sequential(
nn.Conv2d(in_channels=self.ndf, out_channels=self.ndf * 2, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(self.ndf * 2),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.hidden2 = nn.Sequential(
nn.Conv2d(in_channels=self.ndf * 2, out_channels=self.ndf * 4, kernel_size=4, stride=2, padding=1, bias=False),
#nn.BatchNorm2d(self.ndf * 4),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.hidden3 = nn.Sequential(
nn.Conv2d(in_channels=self.ndf * 4, out_channels=self.ndf * 8, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(self.ndf * 8),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.out = nn.Sequential(
nn.Conv2d(in_channels=self.ndf * 8, out_channels=1, kernel_size=4, stride=1, padding=0, bias=False),
torch.nn.Sigmoid()
)
def forward(self, x, y):
y = self.condi(y.view(-1, 10))
y = y.view(-1, 1, 64, 64)
x = torch.cat((x, y), dim=1)
x = self.hidden0(x)
x = self.hidden1(x)
x = self.hidden2(x)
x = self.hidden3(x)
x = self.out(x)
return x
class Generator(torch.nn.Module):
def __init__(self, n_features=100, ngf=16, c_channels=1, dropout_value=0.5): # ngf feature map of generator
super().__init__()
self.ngf = ngf
self.n_features = n_features
self.c_channels = c_channels
self.droupout_value = dropout_value
self.hidden0 = nn.Sequential(
nn.ConvTranspose2d(in_channels=self.n_features + 10, out_channels=self.ngf * 8,
kernel_size=4, stride=1, padding=0, bias=False),
nn.BatchNorm2d(self.ngf * 8),
nn.LeakyReLU(0.2)
)
self.hidden1 = nn.Sequential(
nn.ConvTranspose2d(in_channels=self.ngf * 8, out_channels=self.ngf * 4,
kernel_size=4, stride=2, padding=1, bias=False),
#nn.BatchNorm2d(self.ngf * 4),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.hidden2 = nn.Sequential(
nn.ConvTranspose2d(in_channels=self.ngf * 4, out_channels=self.ngf * 2,
kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(self.ngf * 2),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.hidden3 = nn.Sequential(
nn.ConvTranspose2d(in_channels=self.ngf * 2, out_channels=self.ngf,
kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(self.ngf),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.out = nn.Sequential(
# "out_channels=1" because gray scale
nn.ConvTranspose2d(in_channels=self.ngf, out_channels=1, kernel_size=4,
stride=2, padding=1, bias=False),
nn.Tanh()
)
def forward(self, x, y):
x_cond = torch.cat((x, y), dim=1) # Combine flatten image with conditional input (class labels)
x = self.hidden0(x_cond) # Image goes into a "ConvTranspose2d" layer
x = self.hidden1(x)
x = self.hidden2(x)
x = self.hidden3(x)
x = self.out(x)
return x
class Logger:
def __init__(self, model_name, model1, model2, m1_optimizer, m2_optimizer, model_parameter, train_loader):
self.out_dir = "data"
self.model_name = model_name
self.train_loader = train_loader
self.model1 = model1
self.model2 = model2
self.model_parameter = model_parameter
self.m1_optimizer = m1_optimizer
self.m2_optimizer = m2_optimizer
# Exclude Epochs of the model name. This make sense e.g. when we stop a training progress and continue later on.
self.experiment_name = '_'.join("{!s}={!r}".format(k, v) for (k, v) in model_parameter.items())\
.replace("Epochs" + "=" + str(model_parameter["Epochs"]), "")
self.d_error = 0
self.g_error = 0
self.tb = SummaryWriter(log_dir=str(self.out_dir + "/log/" + self.model_name + "/runs/" + self.experiment_name))
self.path_image = os.path.join(os.getcwd(), f'{self.out_dir}/log/{self.model_name}/images/{self.experiment_name}')
self.path_model = os.path.join(os.getcwd(), f'{self.out_dir}/log/{self.model_name}/model/{self.experiment_name}')
try:
os.makedirs(self.path_image)
except Exception as e:
print("WARNING: ", str(e))
try:
os.makedirs(self.path_model)
except Exception as e:
print("WARNING: ", str(e))
def log_graph(self, model1_input, model2_input, model1_label, model2_label):
self.tb.add_graph(self.model1, input_to_model=(model1_input, model1_label))
self.tb.add_graph(self.model2, input_to_model=(model2_input, model2_label))
def log(self, num_epoch, d_error, g_error):
self.d_error = d_error
self.g_error = g_error
self.tb.add_scalar("Discriminator Train Error", self.d_error, num_epoch)
self.tb.add_scalar("Generator Train Error", self.g_error, num_epoch)
def log_image(self, images, epoch, batch_num):
grid = torchvision.utils.make_grid(images)
torchvision.utils.save_image(grid, f'{self.path_image}\\Epoch_{epoch}_batch_{batch_num}.png')
self.tb.add_image("Generator Image", grid)
def log_histogramm(self):
for name, param in self.model2.named_parameters():
self.tb.add_histogram(name, param, self.model_parameter["Epochs"])
self.tb.add_histogram(f'gen_{name}.grad', param.grad, self.model_parameter["Epochs"])
for name, param in self.model1.named_parameters():
self.tb.add_histogram(name, param, self.model_parameter["Epochs"])
self.tb.add_histogram(f'dis_{name}.grad', param.grad, self.model_parameter["Epochs"])
def log_model(self, num_epoch):
torch.save({
"epoch": num_epoch,
"model_generator_state_dict": self.model1.state_dict(),
"model_discriminator_state_dict": self.model2.state_dict(),
"optimizer_generator_state_dict": self.m1_optimizer.state_dict(),
"optimizer_discriminator_state_dict": self.m2_optimizer.state_dict(),
}, str(self.path_model + f'\\{time.time()}_epoch{num_epoch}.pth'))
def close(self, logger, images, num_epoch, d_error, g_error):
logger.log_model(num_epoch)
logger.log_histogramm()
logger.log(num_epoch, d_error, g_error)
self.tb.close()
def display_stats(self, epoch, batch_num, dis_error, gen_error):
print(f'Epoch: [{epoch}/{self.model_parameter["Epochs"]}] '
f'Batch: [{batch_num}/{len(self.train_loader)}] '
f'Loss_D: {dis_error.data.cpu()}, '
f'Loss_G: {gen_error.data.cpu()}')
def get_MNIST_dataset(num_workers_loader, model_parameter, out_dir="data"):
compose = transforms.Compose([
transforms.Resize((64, 64)),
transforms.CenterCrop((64, 64)),
transforms.ToTensor(),
torchvision.transforms.Normalize(mean=[0.5], std=[0.5])
])
dataset = datasets.MNIST(
root=out_dir,
train=True,
download=True,
transform=compose
)
train_loader = torch.utils.data.DataLoader(dataset,
batch_size=model_parameter["batch_size"],
num_workers=num_workers_loader,
shuffle=model_parameter["shuffle"])
return dataset, train_loader
def train_discriminator(p_optimizer, p_noise, p_images, p_fake_target, p_real_target, p_images_labels, p_fake_labels, device):
p_optimizer.zero_grad()
# 1.1 Train on real data
pred_dis_real = discriminator(p_images, p_images_labels)
error_real = loss(pred_dis_real, p_real_target)
error_real.backward()
# 1.2 Train on fake data
fake_data = generator(p_noise, p_fake_labels).detach()
fake_data = add_noise_to_image(fake_data, device)
pred_dis_fake = discriminator(fake_data, p_fake_labels)
error_fake = loss(pred_dis_fake, p_fake_target)
error_fake.backward()
p_optimizer.step()
return error_fake + error_real
def train_generator(p_optimizer, p_noise, p_real_target, p_fake_labels, device):
p_optimizer.zero_grad()
fake_images = generator(p_noise, p_fake_labels)
fake_images = add_noise_to_image(fake_images, device)
pred_dis_fake = discriminator(fake_images, p_fake_labels)
error_fake = loss(pred_dis_fake, p_real_target) # because
"""
We use "p_real_target" instead of "p_fake_target" because we want to
maximize that the discriminator is wrong.
"""
error_fake.backward()
p_optimizer.step()
return fake_images, pred_dis_fake, error_fake
# TODO change to a Truncated normal distribution
def get_noise(batch_size, n_features=100):
return torch.FloatTensor(batch_size, n_features, 1, 1).uniform_(-1, 1)
# We flip label of real and fate data. Better gradient flow I have told
def get_real_data_target(batch_size):
return torch.FloatTensor(batch_size, 1, 1, 1).uniform_(0.0, 0.2)
def get_fake_data_target(batch_size):
return torch.FloatTensor(batch_size, 1, 1, 1).uniform_(0.8, 1.1)
def image_to_vector(images):
return torch.flatten(images, start_dim=1, end_dim=-1)
def vector_to_image(images):
return images.view(images.size(0), 1, 28, 28)
def get_rand_labels(batch_size):
return torch.randint(low=0, high=9, size=(batch_size,))
def load_model(model_load_path):
if model_load_path:
checkpoint = torch.load(model_load_path)
discriminator.load_state_dict(checkpoint["model_discriminator_state_dict"])
generator.load_state_dict(checkpoint["model_generator_state_dict"])
dis_opti.load_state_dict(checkpoint["optimizer_discriminator_state_dict"])
gen_opti.load_state_dict(checkpoint["optimizer_generator_state_dict"])
return checkpoint["epoch"]
else:
return 0
def init_model_optimizer(model_parameter, device):
# Initialize the Models
discriminator = Discriminator(ndf=model_parameter["ndf"], dropout_value=model_parameter["dropout"]).to(device)
generator = Generator(ngf=model_parameter["ngf"], dropout_value=model_parameter["dropout"]).to(device)
# train
dis_opti = optim.Adam(discriminator.parameters(), lr=model_parameter["learning_rate_dis"], betas=(0.5, 0.999))
gen_opti = optim.Adam(generator.parameters(), lr=model_parameter["learning_rate_gen"], betas=(0.5, 0.999))
return discriminator, generator, dis_opti, gen_opti
def get_hot_vector_encode(labels, device):
return torch.eye(10)[labels].view(-1, 10, 1, 1).to(device)
def add_noise_to_image(images, device, level_of_noise=0.1):
return images[0].to(device) + (level_of_noise) * torch.randn(images.shape).to(device)
if __name__ == "__main__":
# Hyperparameter
model_parameter = {
"batch_size": 500,
"learning_rate_dis": 0.0002,
"learning_rate_gen": 0.0002,
"shuffle": False,
"Epochs": 10,
"ndf": 64,
"ngf": 64,
"dropout": 0.5
}
# Parameter
r_frequent = 10 # How many samples we save for replay per batch (batch_size / r_frequent).
model_name = "CDCGAN" # The name of you model e.g. "Gan"
num_workers_loader = 1 # How many workers should load the data
sample_save_size = 16 # How many numbers your saved imaged should show
device = "cuda" # Which device should be used to train the neural network
model_load_path = "" # If set load model instead of training from new
num_epoch_log = 1 # How frequent you want to log/
torch.manual_seed(43) # Sets a seed for torch for reproducibility
dataset_train, train_loader = get_MNIST_dataset(num_workers_loader, model_parameter) # Get dataset
# Initialize the Models and optimizer
discriminator, generator, dis_opti, gen_opti = init_model_optimizer(model_parameter, device) # Init model/Optimizer
start_epoch = load_model(model_load_path) # when we want to load a model
# Init Logger
logger = Logger(model_name, generator, discriminator, gen_opti, dis_opti, model_parameter, train_loader)
loss = nn.BCELoss()
images, labels = next(iter(train_loader)) # For logging
# For testing
# pred = generator(get_noise(model_parameter["batch_size"]).to(device), get_hot_vector_encode(get_rand_labels(model_parameter["batch_size"]), device))
# dis = discriminator(images.to(device), get_hot_vector_encode(labels, device))
logger.log_graph(get_noise(model_parameter["batch_size"]).to(device), images.to(device),
get_hot_vector_encode(get_rand_labels(model_parameter["batch_size"]), device),
get_hot_vector_encode(labels, device))
# Array to store
exp_replay = torch.tensor([]).to(device)
for num_epoch in range(start_epoch, model_parameter["Epochs"]):
for batch_num, data_loader in enumerate(train_loader):
images, labels = data_loader
images = add_noise_to_image(images, device) # Add noise to the images
# 1. Train Discriminator
dis_error = train_discriminator(
dis_opti,
get_noise(model_parameter["batch_size"]).to(device),
images.to(device),
get_fake_data_target(model_parameter["batch_size"]).to(device),
get_real_data_target(model_parameter["batch_size"]).to(device),
get_hot_vector_encode(labels, device),
get_hot_vector_encode(
get_rand_labels(model_parameter["batch_size"]), device),
device
)
# 2. Train Generator
fake_image, pred_dis_fake, gen_error = train_generator(
gen_opti,
get_noise(model_parameter["batch_size"]).to(device),
get_real_data_target(model_parameter["batch_size"]).to(device),
get_hot_vector_encode(
get_rand_labels(model_parameter["batch_size"]),
device),
device
)
# Store a random point for experience replay
perm = torch.randperm(fake_image.size(0))
r_idx = perm[:max(1, int(model_parameter["batch_size"] / r_frequent))]
r_samples = add_noise_to_image(fake_image[r_idx], device)
exp_replay = torch.cat((exp_replay, r_samples), 0).detach()
if exp_replay.size(0) >= model_parameter["batch_size"]:
# Train on experienced data
dis_opti.zero_grad()
r_label = get_hot_vector_encode(torch.zeros(exp_replay.size(0)).numpy(), device)
pred_dis_real = discriminator(exp_replay, r_label)
error_real = loss(pred_dis_real, get_fake_data_target(exp_replay.size(0)).to(device))
error_real.backward()
dis_opti.step()
print(f'Epoch: [{num_epoch}/{model_parameter["Epochs"]}] '
f'Batch: Replay/Experience batch '
f'Loss_D: {error_real.data.cpu()}, '
)
exp_replay = torch.tensor([]).to(device)
logger.display_stats(epoch=num_epoch, batch_num=batch_num, dis_error=dis_error, gen_error=gen_error)
if batch_num % 100 == 0:
logger.log_image(fake_image[:sample_save_size], num_epoch, batch_num)
logger.log(num_epoch, dis_error, gen_error)
if num_epoch % num_epoch_log == 0:
logger.log_model(num_epoch)
logger.log_histogramm()
logger.close(logger, fake_image[:sample_save_size], num_epoch, dis_error, gen_error)
First link to my Code (Pastebin)
Second link to my Code (0bin)
Conclusion:
Since I implemented all these things (e.g. label smoothing) which are considered beneficial to a GAN/DCGAN.
And my Model still performs worse than the Tutorial DCGAN from PyTorch I think I might have a bug in my code but I can't seem to find it.
Reproducibility:
You should be able to just copy the code and run it if you have the libraries that I imported installed to look for yourself if you can find anything.
I appreciate any feedback.
So I solved this issue a while ago, but forgot to post an answer on stack overflow. So I will simply post my code here which should work probably pretty good.
Some disclaimer:
I am not quite sure if it works since I did this a year ago
its for 128x128px Images MNIST
It's not a vanilla GAN I used various optimization techniques
If you want to use it you need to change various details, such as the training dataset
Resources:
Multi-Scale Gradients
Instance Noise
Various tricks I used
More tricks
``
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning import loggers
from numpy.random import choice
import os
from pathlib import Path
import shutil
from collections import OrderedDict
# custom weights initialization called on netG and netD
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
# randomly flip some labels
def noisy_labels(y, p_flip=0.05): # # flip labels with 5% probability
# determine the number of labels to flip
n_select = int(p_flip * y.shape[0])
# choose labels to flip
flip_ix = choice([i for i in range(y.shape[0])], size=n_select)
# invert the labels in place
y[flip_ix] = 1 - y[flip_ix]
return y
class AddGaussianNoise(object):
def __init__(self, mean=0.0, std=0.1):
self.std = std
self.mean = mean
def __call__(self, tensor):
tensor = tensor.cuda()
return tensor + (torch.randn(tensor.size()) * self.std + self.mean).cuda()
def __repr__(self):
return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
def resize2d(img, size):
return (F.adaptive_avg_pool2d(img, size).data).cuda()
def get_valid_labels(img):
return ((0.8 - 1.1) * torch.rand(img.shape[0], 1, 1, 1) + 1.1).cuda() # soft labels
def get_unvalid_labels(img):
return (noisy_labels((0.0 - 0.3) * torch.rand(img.shape[0], 1, 1, 1) + 0.3)).cuda() # soft labels
class Generator(pl.LightningModule):
def __init__(self, ngf, nc, latent_dim):
super(Generator, self).__init__()
self.ngf = ngf
self.latent_dim = latent_dim
self.nc = nc
self.fc0 = nn.Sequential(
# input is Z, going into a convolution
nn.utils.spectral_norm(nn.ConvTranspose2d(latent_dim, ngf * 16, 4, 1, 0, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf * 16)
)
self.fc1 = nn.Sequential(
# state size. (ngf*8) x 4 x 4
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf * 16, ngf * 8, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf * 8)
)
self.fc2 = nn.Sequential(
# state size. (ngf*4) x 8 x 8
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf * 4)
)
self.fc3 = nn.Sequential(
# state size. (ngf*2) x 16 x 16
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf * 2)
)
self.fc4 = nn.Sequential(
# state size. (ngf) x 32 x 32
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf)
)
self.fc5 = nn.Sequential(
# state size. (nc) x 64 x 64
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False)),
nn.Tanh()
)
# state size. (nc) x 128 x 128
# For Multi-Scale Gradient
# Converting the intermediate layers into images
self.fc0_r = nn.Conv2d(ngf * 16, self.nc, 1)
self.fc1_r = nn.Conv2d(ngf * 8, self.nc, 1)
self.fc2_r = nn.Conv2d(ngf * 4, self.nc, 1)
self.fc3_r = nn.Conv2d(ngf * 2, self.nc, 1)
self.fc4_r = nn.Conv2d(ngf, self.nc, 1)
def forward(self, input):
x_0 = self.fc0(input)
x_1 = self.fc1(x_0)
x_2 = self.fc2(x_1)
x_3 = self.fc3(x_2)
x_4 = self.fc4(x_3)
x_5 = self.fc5(x_4)
# For Multi-Scale Gradient
# Converting the intermediate layers into images
x_0_r = self.fc0_r(x_0)
x_1_r = self.fc1_r(x_1)
x_2_r = self.fc2_r(x_2)
x_3_r = self.fc3_r(x_3)
x_4_r = self.fc4_r(x_4)
return x_5, x_0_r, x_1_r, x_2_r, x_3_r, x_4_r
class Discriminator(pl.LightningModule):
def __init__(self, ndf, nc):
super(Discriminator, self).__init__()
self.nc = nc
self.ndf = ndf
self.fc0 = nn.Sequential(
# input is (nc) x 128 x 128
nn.utils.spectral_norm(nn.Conv2d(nc, ndf, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True)
)
self.fc1 = nn.Sequential(
# state size. (ndf) x 64 x 64
nn.utils.spectral_norm(nn.Conv2d(ndf + nc, ndf * 2, 4, 2, 1, bias=False)),
# "+ nc" because of multi scale gradient
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ndf * 2)
)
self.fc2 = nn.Sequential(
# state size. (ndf*2) x 32 x 32
nn.utils.spectral_norm(nn.Conv2d(ndf * 2 + nc, ndf * 4, 4, 2, 1, bias=False)),
# "+ nc" because of multi scale gradient
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ndf * 4)
)
self.fc3 = nn.Sequential(
# state size. (ndf*4) x 16 x 16e
nn.utils.spectral_norm(nn.Conv2d(ndf * 4 + nc, ndf * 8, 4, 2, 1, bias=False)),
# "+ nc" because of multi scale gradient
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ndf * 8),
)
self.fc4 = nn.Sequential(
# state size. (ndf*8) x 8 x 8
nn.utils.spectral_norm(nn.Conv2d(ndf * 8 + nc, ndf * 16, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ndf * 16)
)
self.fc5 = nn.Sequential(
# state size. (ndf*8) x 4 x 4
nn.utils.spectral_norm(nn.Conv2d(ndf * 16 + nc, 1, 4, 1, 0, bias=False)),
nn.Sigmoid()
)
# state size. 1 x 1 x 1
def forward(self, input, detach_or_not):
# When we train i ncombination with generator we use multi scale gradient.
x, x_0_r, x_1_r, x_2_r, x_3_r, x_4_r = input
if detach_or_not:
x = x.detach()
x_0 = self.fc0(x)
x_0 = torch.cat((x_0, x_4_r), dim=1) # Concat Multi-Scale Gradient
x_1 = self.fc1(x_0)
x_1 = torch.cat((x_1, x_3_r), dim=1) # Concat Multi-Scale Gradient
x_2 = self.fc2(x_1)
x_2 = torch.cat((x_2, x_2_r), dim=1) # Concat Multi-Scale Gradient
x_3 = self.fc3(x_2)
x_3 = torch.cat((x_3, x_1_r), dim=1) # Concat Multi-Scale Gradient
x_4 = self.fc4(x_3)
x_4 = torch.cat((x_4, x_0_r), dim=1) # Concat Multi-Scale Gradient
x_5 = self.fc5(x_4)
return x_5
class DCGAN(pl.LightningModule):
def __init__(self, hparams, checkpoint_folder, experiment_name):
super().__init__()
self.hparams = hparams
self.checkpoint_folder = checkpoint_folder
self.experiment_name = experiment_name
# networks
self.generator = Generator(ngf=hparams.ngf, nc=hparams.nc, latent_dim=hparams.latent_dim)
self.discriminator = Discriminator(ndf=hparams.ndf, nc=hparams.nc)
self.generator.apply(weights_init)
self.discriminator.apply(weights_init)
# cache for generated images
self.generated_imgs = None
self.last_imgs = None
# For experience replay
self.exp_replay_dis = torch.tensor([])
def forward(self, z):
return self.generator(z)
def adversarial_loss(self, y_hat, y):
return F.binary_cross_entropy(y_hat, y)
def training_step(self, batch, batch_nb, optimizer_idx):
# For adding Instance noise for more visit: https://www.inference.vc/instance-noise-a-trick-for-stabilising-gan-training/
std_gaussian = max(0, self.hparams.level_of_noise - (
(self.hparams.level_of_noise * 2) * (self.current_epoch / self.hparams.epochs)))
AddGaussianNoiseInst = AddGaussianNoise(std=std_gaussian) # the noise decays over time
imgs, _ = batch
imgs = AddGaussianNoiseInst(imgs) # Adding instance noise to real images
self.last_imgs = imgs
# train generator
if optimizer_idx == 0:
# sample noise
z = torch.randn(imgs.shape[0], self.hparams.latent_dim, 1, 1).cuda()
# generate images
self.generated_imgs = self(z)
# ground truth result (ie: all fake)
g_loss = self.adversarial_loss(self.discriminator(self.generated_imgs, False), get_valid_labels(self.generated_imgs[0])) # adversarial loss is binary cross-entropy; [0] is the image of the last layer
tqdm_dict = {'g_loss': g_loss}
log = {'g_loss': g_loss, "std_gaussian": std_gaussian}
output = OrderedDict({
'loss': g_loss,
'progress_bar': tqdm_dict,
'log': log
})
return output
# train discriminator
if optimizer_idx == 1:
# Measure discriminator's ability to classify real from generated samples
# how well can it label as real?
real_loss = self.adversarial_loss(
self.discriminator([imgs, resize2d(imgs, 4), resize2d(imgs, 8), resize2d(imgs, 16), resize2d(imgs, 32), resize2d(imgs, 64)],
False), get_valid_labels(imgs))
fake_loss = self.adversarial_loss(self.discriminator(self.generated_imgs, True), get_unvalid_labels(
self.generated_imgs[0])) # how well can it label as fake?; [0] is the image of the last layer
# discriminator loss is the average of these
d_loss = (real_loss + fake_loss) / 2
tqdm_dict = {'d_loss': d_loss}
log = {'d_loss': d_loss, "std_gaussian": std_gaussian}
output = OrderedDict({
'loss': d_loss,
'progress_bar': tqdm_dict,
'log': log
})
return output
def configure_optimizers(self):
lr_gen = self.hparams.lr_gen
lr_dis = self.hparams.lr_dis
b1 = self.hparams.b1
b2 = self.hparams.b2
opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr_gen, betas=(b1, b2))
opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr_dis, betas=(b1, b2))
return [opt_g, opt_d], []
def backward(self, trainer, loss, optimizer, optimizer_idx: int) -> None:
loss.backward(retain_graph=True)
def train_dataloader(self):
# transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
# transforms.ToTensor(),
# transforms.Normalize([0.5], [0.5])])
# dataset = torchvision.datasets.MNIST(os.getcwd(), train=False, download=True, transform=transform)
# return DataLoader(dataset, batch_size=self.hparams.batch_size)
# transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
# transforms.ToTensor(),
# transforms.Normalize([0.5], [0.5])
# ])
# train_dataset = torchvision.datasets.ImageFolder(
# root="./drive/My Drive/datasets/flower_dataset/",
# # root="./drive/My Drive/datasets/ghibli_dataset_small_overfit/",
# transform=transform
# )
# return DataLoader(train_dataset, num_workers=self.hparams.num_workers, shuffle=True,
# batch_size=self.hparams.batch_size)
transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])
])
train_dataset = torchvision.datasets.ImageFolder(
root="ghibli_dataset_small_overfit/",
transform=transform
)
return DataLoader(train_dataset, num_workers=self.hparams.num_workers, shuffle=True,
batch_size=self.hparams.batch_size)
def on_epoch_end(self):
z = torch.randn(4, self.hparams.latent_dim, 1, 1).cuda()
# match gpu device (or keep as cpu)
if self.on_gpu:
z = z.cuda(self.last_imgs.device.index)
# log sampled images
sample_imgs = self.generator(z)[0]
torchvision.utils.save_image(sample_imgs, f'generated_images_epoch{self.current_epoch}.png')
# save model
if self.current_epoch % self.hparams.save_model_every_epoch == 0:
trainer.save_checkpoint(
self.checkpoint_folder + "/" + self.experiment_name + "_epoch_" + str(self.current_epoch) + ".ckpt")
from argparse import Namespace
args = {
'batch_size': 128, # batch size
'lr_gen': 0.0003, # TTUR;learnin rate of both networks; tested value: 0.0002
'lr_dis': 0.0003, # TTUR;learnin rate of both networks; tested value: 0.0002
'b1': 0.5, # Momentum for adam; tested value(dcgan paper): 0.5
'b2': 0.999, # Momentum for adam; tested value(dcgan paper): 0.999
'latent_dim': 256, # tested value which worked(in V4_1): 100
'nc': 3, # number of color channels
'ndf': 8, # number of discriminator features
'ngf': 8, # number of generator features
'epochs': 4, # the maxima lamount of epochs the algorith should run
'save_model_every_epoch': 1, # how often we save our model
'image_size': 128, # size of the image
'num_workers': 3,
'level_of_noise': 0.1, # how much instance noise we introduce(std; tested value: 0.15 and 0.1
'experience_save_per_batch': 1, # this value should be very low; tested value which works: 1
'experience_batch_size': 50 # this value shouldnt be too high; tested value which works: 50
}
hparams = Namespace(**args)
# Parameters
experiment_name = "DCGAN_6_2_MNIST_128px"
dataset_name = "mnist"
checkpoint_folder = "DCGAN/"
tags = ["DCGAN", "128x128"]
dirpath = Path(checkpoint_folder)
# defining net
net = DCGAN(hparams, checkpoint_folder, experiment_name)
torch.autograd.set_detect_anomaly(True)
trainer = pl.Trainer( # resume_from_checkpoint="DCGAN_V4_2_GHIBLI_epoch_999.ckpt",
max_epochs=args["epochs"],
gpus=1
)
trainer.fit(net)
``