Introduction:
I am trying to get a CDCGAN (Conditional Deep Convolutional Generative Adversarial Network) to work on the MNIST dataset which should be fairly easy considering that the library (PyTorch) I am using has a tutorial on its website.
But I can't seem to get It working it just produces garbage or the model collapses or both.
What I tried:
making the model Conditional semi-supervised learning
using batch norm
using dropout on each layer besides the input/output layer on the generator and discriminator
label smoothing to combat overconfidence
adding noise to the images (I guess you call this instance noise) to get a better data distribution
use leaky relu to avoid vanishing gradients
using a replay buffer to combat forgetting of learned stuff and overfitting
playing with hyperparameters
comparing it to the model from PyTorch tutorial
basically what I did besides some things like Embedding layer ect.
Images my Model generated:
Hyperparameters:
batch_size=50, learning_rate_discrimiantor=0.0001, learning_rate_generator=0.0003, shuffle=True, ndf=64, ngf=64, droupout=0.5
batch_size=50, learning_rate_discriminator=0.0003, learning_rate_generator=0.0003, shuffle=True, ndf=64, ngf=64, dropout=0
Images Pytorch tutorial Model generated:
Code for the pytorch tutorial dcgan model
As comparison here are the images from the DCGAN from the pytorch turoial:
My Code:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms, datasets
import torch.nn.functional as F
from torch import optim as optim
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import os
import time
class Discriminator(torch.nn.Module):
def __init__(self, ndf=16, dropout_value=0.5): # ndf feature map discriminator
super().__init__()
self.ndf = ndf
self.droupout_value = dropout_value
self.condi = nn.Sequential(
nn.Linear(in_features=10, out_features=64 * 64)
)
self.hidden0 = nn.Sequential(
nn.Conv2d(in_channels=2, out_channels=self.ndf, kernel_size=4, stride=2, padding=1, bias=False),
nn.LeakyReLU(0.2),
)
self.hidden1 = nn.Sequential(
nn.Conv2d(in_channels=self.ndf, out_channels=self.ndf * 2, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(self.ndf * 2),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.hidden2 = nn.Sequential(
nn.Conv2d(in_channels=self.ndf * 2, out_channels=self.ndf * 4, kernel_size=4, stride=2, padding=1, bias=False),
#nn.BatchNorm2d(self.ndf * 4),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.hidden3 = nn.Sequential(
nn.Conv2d(in_channels=self.ndf * 4, out_channels=self.ndf * 8, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(self.ndf * 8),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.out = nn.Sequential(
nn.Conv2d(in_channels=self.ndf * 8, out_channels=1, kernel_size=4, stride=1, padding=0, bias=False),
torch.nn.Sigmoid()
)
def forward(self, x, y):
y = self.condi(y.view(-1, 10))
y = y.view(-1, 1, 64, 64)
x = torch.cat((x, y), dim=1)
x = self.hidden0(x)
x = self.hidden1(x)
x = self.hidden2(x)
x = self.hidden3(x)
x = self.out(x)
return x
class Generator(torch.nn.Module):
def __init__(self, n_features=100, ngf=16, c_channels=1, dropout_value=0.5): # ngf feature map of generator
super().__init__()
self.ngf = ngf
self.n_features = n_features
self.c_channels = c_channels
self.droupout_value = dropout_value
self.hidden0 = nn.Sequential(
nn.ConvTranspose2d(in_channels=self.n_features + 10, out_channels=self.ngf * 8,
kernel_size=4, stride=1, padding=0, bias=False),
nn.BatchNorm2d(self.ngf * 8),
nn.LeakyReLU(0.2)
)
self.hidden1 = nn.Sequential(
nn.ConvTranspose2d(in_channels=self.ngf * 8, out_channels=self.ngf * 4,
kernel_size=4, stride=2, padding=1, bias=False),
#nn.BatchNorm2d(self.ngf * 4),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.hidden2 = nn.Sequential(
nn.ConvTranspose2d(in_channels=self.ngf * 4, out_channels=self.ngf * 2,
kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(self.ngf * 2),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.hidden3 = nn.Sequential(
nn.ConvTranspose2d(in_channels=self.ngf * 2, out_channels=self.ngf,
kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(self.ngf),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.out = nn.Sequential(
# "out_channels=1" because gray scale
nn.ConvTranspose2d(in_channels=self.ngf, out_channels=1, kernel_size=4,
stride=2, padding=1, bias=False),
nn.Tanh()
)
def forward(self, x, y):
x_cond = torch.cat((x, y), dim=1) # Combine flatten image with conditional input (class labels)
x = self.hidden0(x_cond) # Image goes into a "ConvTranspose2d" layer
x = self.hidden1(x)
x = self.hidden2(x)
x = self.hidden3(x)
x = self.out(x)
return x
class Logger:
def __init__(self, model_name, model1, model2, m1_optimizer, m2_optimizer, model_parameter, train_loader):
self.out_dir = "data"
self.model_name = model_name
self.train_loader = train_loader
self.model1 = model1
self.model2 = model2
self.model_parameter = model_parameter
self.m1_optimizer = m1_optimizer
self.m2_optimizer = m2_optimizer
# Exclude Epochs of the model name. This make sense e.g. when we stop a training progress and continue later on.
self.experiment_name = '_'.join("{!s}={!r}".format(k, v) for (k, v) in model_parameter.items())\
.replace("Epochs" + "=" + str(model_parameter["Epochs"]), "")
self.d_error = 0
self.g_error = 0
self.tb = SummaryWriter(log_dir=str(self.out_dir + "/log/" + self.model_name + "/runs/" + self.experiment_name))
self.path_image = os.path.join(os.getcwd(), f'{self.out_dir}/log/{self.model_name}/images/{self.experiment_name}')
self.path_model = os.path.join(os.getcwd(), f'{self.out_dir}/log/{self.model_name}/model/{self.experiment_name}')
try:
os.makedirs(self.path_image)
except Exception as e:
print("WARNING: ", str(e))
try:
os.makedirs(self.path_model)
except Exception as e:
print("WARNING: ", str(e))
def log_graph(self, model1_input, model2_input, model1_label, model2_label):
self.tb.add_graph(self.model1, input_to_model=(model1_input, model1_label))
self.tb.add_graph(self.model2, input_to_model=(model2_input, model2_label))
def log(self, num_epoch, d_error, g_error):
self.d_error = d_error
self.g_error = g_error
self.tb.add_scalar("Discriminator Train Error", self.d_error, num_epoch)
self.tb.add_scalar("Generator Train Error", self.g_error, num_epoch)
def log_image(self, images, epoch, batch_num):
grid = torchvision.utils.make_grid(images)
torchvision.utils.save_image(grid, f'{self.path_image}\\Epoch_{epoch}_batch_{batch_num}.png')
self.tb.add_image("Generator Image", grid)
def log_histogramm(self):
for name, param in self.model2.named_parameters():
self.tb.add_histogram(name, param, self.model_parameter["Epochs"])
self.tb.add_histogram(f'gen_{name}.grad', param.grad, self.model_parameter["Epochs"])
for name, param in self.model1.named_parameters():
self.tb.add_histogram(name, param, self.model_parameter["Epochs"])
self.tb.add_histogram(f'dis_{name}.grad', param.grad, self.model_parameter["Epochs"])
def log_model(self, num_epoch):
torch.save({
"epoch": num_epoch,
"model_generator_state_dict": self.model1.state_dict(),
"model_discriminator_state_dict": self.model2.state_dict(),
"optimizer_generator_state_dict": self.m1_optimizer.state_dict(),
"optimizer_discriminator_state_dict": self.m2_optimizer.state_dict(),
}, str(self.path_model + f'\\{time.time()}_epoch{num_epoch}.pth'))
def close(self, logger, images, num_epoch, d_error, g_error):
logger.log_model(num_epoch)
logger.log_histogramm()
logger.log(num_epoch, d_error, g_error)
self.tb.close()
def display_stats(self, epoch, batch_num, dis_error, gen_error):
print(f'Epoch: [{epoch}/{self.model_parameter["Epochs"]}] '
f'Batch: [{batch_num}/{len(self.train_loader)}] '
f'Loss_D: {dis_error.data.cpu()}, '
f'Loss_G: {gen_error.data.cpu()}')
def get_MNIST_dataset(num_workers_loader, model_parameter, out_dir="data"):
compose = transforms.Compose([
transforms.Resize((64, 64)),
transforms.CenterCrop((64, 64)),
transforms.ToTensor(),
torchvision.transforms.Normalize(mean=[0.5], std=[0.5])
])
dataset = datasets.MNIST(
root=out_dir,
train=True,
download=True,
transform=compose
)
train_loader = torch.utils.data.DataLoader(dataset,
batch_size=model_parameter["batch_size"],
num_workers=num_workers_loader,
shuffle=model_parameter["shuffle"])
return dataset, train_loader
def train_discriminator(p_optimizer, p_noise, p_images, p_fake_target, p_real_target, p_images_labels, p_fake_labels, device):
p_optimizer.zero_grad()
# 1.1 Train on real data
pred_dis_real = discriminator(p_images, p_images_labels)
error_real = loss(pred_dis_real, p_real_target)
error_real.backward()
# 1.2 Train on fake data
fake_data = generator(p_noise, p_fake_labels).detach()
fake_data = add_noise_to_image(fake_data, device)
pred_dis_fake = discriminator(fake_data, p_fake_labels)
error_fake = loss(pred_dis_fake, p_fake_target)
error_fake.backward()
p_optimizer.step()
return error_fake + error_real
def train_generator(p_optimizer, p_noise, p_real_target, p_fake_labels, device):
p_optimizer.zero_grad()
fake_images = generator(p_noise, p_fake_labels)
fake_images = add_noise_to_image(fake_images, device)
pred_dis_fake = discriminator(fake_images, p_fake_labels)
error_fake = loss(pred_dis_fake, p_real_target) # because
"""
We use "p_real_target" instead of "p_fake_target" because we want to
maximize that the discriminator is wrong.
"""
error_fake.backward()
p_optimizer.step()
return fake_images, pred_dis_fake, error_fake
# TODO change to a Truncated normal distribution
def get_noise(batch_size, n_features=100):
return torch.FloatTensor(batch_size, n_features, 1, 1).uniform_(-1, 1)
# We flip label of real and fate data. Better gradient flow I have told
def get_real_data_target(batch_size):
return torch.FloatTensor(batch_size, 1, 1, 1).uniform_(0.0, 0.2)
def get_fake_data_target(batch_size):
return torch.FloatTensor(batch_size, 1, 1, 1).uniform_(0.8, 1.1)
def image_to_vector(images):
return torch.flatten(images, start_dim=1, end_dim=-1)
def vector_to_image(images):
return images.view(images.size(0), 1, 28, 28)
def get_rand_labels(batch_size):
return torch.randint(low=0, high=9, size=(batch_size,))
def load_model(model_load_path):
if model_load_path:
checkpoint = torch.load(model_load_path)
discriminator.load_state_dict(checkpoint["model_discriminator_state_dict"])
generator.load_state_dict(checkpoint["model_generator_state_dict"])
dis_opti.load_state_dict(checkpoint["optimizer_discriminator_state_dict"])
gen_opti.load_state_dict(checkpoint["optimizer_generator_state_dict"])
return checkpoint["epoch"]
else:
return 0
def init_model_optimizer(model_parameter, device):
# Initialize the Models
discriminator = Discriminator(ndf=model_parameter["ndf"], dropout_value=model_parameter["dropout"]).to(device)
generator = Generator(ngf=model_parameter["ngf"], dropout_value=model_parameter["dropout"]).to(device)
# train
dis_opti = optim.Adam(discriminator.parameters(), lr=model_parameter["learning_rate_dis"], betas=(0.5, 0.999))
gen_opti = optim.Adam(generator.parameters(), lr=model_parameter["learning_rate_gen"], betas=(0.5, 0.999))
return discriminator, generator, dis_opti, gen_opti
def get_hot_vector_encode(labels, device):
return torch.eye(10)[labels].view(-1, 10, 1, 1).to(device)
def add_noise_to_image(images, device, level_of_noise=0.1):
return images[0].to(device) + (level_of_noise) * torch.randn(images.shape).to(device)
if __name__ == "__main__":
# Hyperparameter
model_parameter = {
"batch_size": 500,
"learning_rate_dis": 0.0002,
"learning_rate_gen": 0.0002,
"shuffle": False,
"Epochs": 10,
"ndf": 64,
"ngf": 64,
"dropout": 0.5
}
# Parameter
r_frequent = 10 # How many samples we save for replay per batch (batch_size / r_frequent).
model_name = "CDCGAN" # The name of you model e.g. "Gan"
num_workers_loader = 1 # How many workers should load the data
sample_save_size = 16 # How many numbers your saved imaged should show
device = "cuda" # Which device should be used to train the neural network
model_load_path = "" # If set load model instead of training from new
num_epoch_log = 1 # How frequent you want to log/
torch.manual_seed(43) # Sets a seed for torch for reproducibility
dataset_train, train_loader = get_MNIST_dataset(num_workers_loader, model_parameter) # Get dataset
# Initialize the Models and optimizer
discriminator, generator, dis_opti, gen_opti = init_model_optimizer(model_parameter, device) # Init model/Optimizer
start_epoch = load_model(model_load_path) # when we want to load a model
# Init Logger
logger = Logger(model_name, generator, discriminator, gen_opti, dis_opti, model_parameter, train_loader)
loss = nn.BCELoss()
images, labels = next(iter(train_loader)) # For logging
# For testing
# pred = generator(get_noise(model_parameter["batch_size"]).to(device), get_hot_vector_encode(get_rand_labels(model_parameter["batch_size"]), device))
# dis = discriminator(images.to(device), get_hot_vector_encode(labels, device))
logger.log_graph(get_noise(model_parameter["batch_size"]).to(device), images.to(device),
get_hot_vector_encode(get_rand_labels(model_parameter["batch_size"]), device),
get_hot_vector_encode(labels, device))
# Array to store
exp_replay = torch.tensor([]).to(device)
for num_epoch in range(start_epoch, model_parameter["Epochs"]):
for batch_num, data_loader in enumerate(train_loader):
images, labels = data_loader
images = add_noise_to_image(images, device) # Add noise to the images
# 1. Train Discriminator
dis_error = train_discriminator(
dis_opti,
get_noise(model_parameter["batch_size"]).to(device),
images.to(device),
get_fake_data_target(model_parameter["batch_size"]).to(device),
get_real_data_target(model_parameter["batch_size"]).to(device),
get_hot_vector_encode(labels, device),
get_hot_vector_encode(
get_rand_labels(model_parameter["batch_size"]), device),
device
)
# 2. Train Generator
fake_image, pred_dis_fake, gen_error = train_generator(
gen_opti,
get_noise(model_parameter["batch_size"]).to(device),
get_real_data_target(model_parameter["batch_size"]).to(device),
get_hot_vector_encode(
get_rand_labels(model_parameter["batch_size"]),
device),
device
)
# Store a random point for experience replay
perm = torch.randperm(fake_image.size(0))
r_idx = perm[:max(1, int(model_parameter["batch_size"] / r_frequent))]
r_samples = add_noise_to_image(fake_image[r_idx], device)
exp_replay = torch.cat((exp_replay, r_samples), 0).detach()
if exp_replay.size(0) >= model_parameter["batch_size"]:
# Train on experienced data
dis_opti.zero_grad()
r_label = get_hot_vector_encode(torch.zeros(exp_replay.size(0)).numpy(), device)
pred_dis_real = discriminator(exp_replay, r_label)
error_real = loss(pred_dis_real, get_fake_data_target(exp_replay.size(0)).to(device))
error_real.backward()
dis_opti.step()
print(f'Epoch: [{num_epoch}/{model_parameter["Epochs"]}] '
f'Batch: Replay/Experience batch '
f'Loss_D: {error_real.data.cpu()}, '
)
exp_replay = torch.tensor([]).to(device)
logger.display_stats(epoch=num_epoch, batch_num=batch_num, dis_error=dis_error, gen_error=gen_error)
if batch_num % 100 == 0:
logger.log_image(fake_image[:sample_save_size], num_epoch, batch_num)
logger.log(num_epoch, dis_error, gen_error)
if num_epoch % num_epoch_log == 0:
logger.log_model(num_epoch)
logger.log_histogramm()
logger.close(logger, fake_image[:sample_save_size], num_epoch, dis_error, gen_error)
First link to my Code (Pastebin)
Second link to my Code (0bin)
Conclusion:
Since I implemented all these things (e.g. label smoothing) which are considered beneficial to a GAN/DCGAN.
And my Model still performs worse than the Tutorial DCGAN from PyTorch I think I might have a bug in my code but I can't seem to find it.
Reproducibility:
You should be able to just copy the code and run it if you have the libraries that I imported installed to look for yourself if you can find anything.
I appreciate any feedback.
So I solved this issue a while ago, but forgot to post an answer on stack overflow. So I will simply post my code here which should work probably pretty good.
Some disclaimer:
I am not quite sure if it works since I did this a year ago
its for 128x128px Images MNIST
It's not a vanilla GAN I used various optimization techniques
If you want to use it you need to change various details, such as the training dataset
Resources:
Multi-Scale Gradients
Instance Noise
Various tricks I used
More tricks
``
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning import loggers
from numpy.random import choice
import os
from pathlib import Path
import shutil
from collections import OrderedDict
# custom weights initialization called on netG and netD
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
# randomly flip some labels
def noisy_labels(y, p_flip=0.05): # # flip labels with 5% probability
# determine the number of labels to flip
n_select = int(p_flip * y.shape[0])
# choose labels to flip
flip_ix = choice([i for i in range(y.shape[0])], size=n_select)
# invert the labels in place
y[flip_ix] = 1 - y[flip_ix]
return y
class AddGaussianNoise(object):
def __init__(self, mean=0.0, std=0.1):
self.std = std
self.mean = mean
def __call__(self, tensor):
tensor = tensor.cuda()
return tensor + (torch.randn(tensor.size()) * self.std + self.mean).cuda()
def __repr__(self):
return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
def resize2d(img, size):
return (F.adaptive_avg_pool2d(img, size).data).cuda()
def get_valid_labels(img):
return ((0.8 - 1.1) * torch.rand(img.shape[0], 1, 1, 1) + 1.1).cuda() # soft labels
def get_unvalid_labels(img):
return (noisy_labels((0.0 - 0.3) * torch.rand(img.shape[0], 1, 1, 1) + 0.3)).cuda() # soft labels
class Generator(pl.LightningModule):
def __init__(self, ngf, nc, latent_dim):
super(Generator, self).__init__()
self.ngf = ngf
self.latent_dim = latent_dim
self.nc = nc
self.fc0 = nn.Sequential(
# input is Z, going into a convolution
nn.utils.spectral_norm(nn.ConvTranspose2d(latent_dim, ngf * 16, 4, 1, 0, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf * 16)
)
self.fc1 = nn.Sequential(
# state size. (ngf*8) x 4 x 4
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf * 16, ngf * 8, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf * 8)
)
self.fc2 = nn.Sequential(
# state size. (ngf*4) x 8 x 8
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf * 4)
)
self.fc3 = nn.Sequential(
# state size. (ngf*2) x 16 x 16
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf * 2)
)
self.fc4 = nn.Sequential(
# state size. (ngf) x 32 x 32
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf)
)
self.fc5 = nn.Sequential(
# state size. (nc) x 64 x 64
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False)),
nn.Tanh()
)
# state size. (nc) x 128 x 128
# For Multi-Scale Gradient
# Converting the intermediate layers into images
self.fc0_r = nn.Conv2d(ngf * 16, self.nc, 1)
self.fc1_r = nn.Conv2d(ngf * 8, self.nc, 1)
self.fc2_r = nn.Conv2d(ngf * 4, self.nc, 1)
self.fc3_r = nn.Conv2d(ngf * 2, self.nc, 1)
self.fc4_r = nn.Conv2d(ngf, self.nc, 1)
def forward(self, input):
x_0 = self.fc0(input)
x_1 = self.fc1(x_0)
x_2 = self.fc2(x_1)
x_3 = self.fc3(x_2)
x_4 = self.fc4(x_3)
x_5 = self.fc5(x_4)
# For Multi-Scale Gradient
# Converting the intermediate layers into images
x_0_r = self.fc0_r(x_0)
x_1_r = self.fc1_r(x_1)
x_2_r = self.fc2_r(x_2)
x_3_r = self.fc3_r(x_3)
x_4_r = self.fc4_r(x_4)
return x_5, x_0_r, x_1_r, x_2_r, x_3_r, x_4_r
class Discriminator(pl.LightningModule):
def __init__(self, ndf, nc):
super(Discriminator, self).__init__()
self.nc = nc
self.ndf = ndf
self.fc0 = nn.Sequential(
# input is (nc) x 128 x 128
nn.utils.spectral_norm(nn.Conv2d(nc, ndf, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True)
)
self.fc1 = nn.Sequential(
# state size. (ndf) x 64 x 64
nn.utils.spectral_norm(nn.Conv2d(ndf + nc, ndf * 2, 4, 2, 1, bias=False)),
# "+ nc" because of multi scale gradient
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ndf * 2)
)
self.fc2 = nn.Sequential(
# state size. (ndf*2) x 32 x 32
nn.utils.spectral_norm(nn.Conv2d(ndf * 2 + nc, ndf * 4, 4, 2, 1, bias=False)),
# "+ nc" because of multi scale gradient
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ndf * 4)
)
self.fc3 = nn.Sequential(
# state size. (ndf*4) x 16 x 16e
nn.utils.spectral_norm(nn.Conv2d(ndf * 4 + nc, ndf * 8, 4, 2, 1, bias=False)),
# "+ nc" because of multi scale gradient
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ndf * 8),
)
self.fc4 = nn.Sequential(
# state size. (ndf*8) x 8 x 8
nn.utils.spectral_norm(nn.Conv2d(ndf * 8 + nc, ndf * 16, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ndf * 16)
)
self.fc5 = nn.Sequential(
# state size. (ndf*8) x 4 x 4
nn.utils.spectral_norm(nn.Conv2d(ndf * 16 + nc, 1, 4, 1, 0, bias=False)),
nn.Sigmoid()
)
# state size. 1 x 1 x 1
def forward(self, input, detach_or_not):
# When we train i ncombination with generator we use multi scale gradient.
x, x_0_r, x_1_r, x_2_r, x_3_r, x_4_r = input
if detach_or_not:
x = x.detach()
x_0 = self.fc0(x)
x_0 = torch.cat((x_0, x_4_r), dim=1) # Concat Multi-Scale Gradient
x_1 = self.fc1(x_0)
x_1 = torch.cat((x_1, x_3_r), dim=1) # Concat Multi-Scale Gradient
x_2 = self.fc2(x_1)
x_2 = torch.cat((x_2, x_2_r), dim=1) # Concat Multi-Scale Gradient
x_3 = self.fc3(x_2)
x_3 = torch.cat((x_3, x_1_r), dim=1) # Concat Multi-Scale Gradient
x_4 = self.fc4(x_3)
x_4 = torch.cat((x_4, x_0_r), dim=1) # Concat Multi-Scale Gradient
x_5 = self.fc5(x_4)
return x_5
class DCGAN(pl.LightningModule):
def __init__(self, hparams, checkpoint_folder, experiment_name):
super().__init__()
self.hparams = hparams
self.checkpoint_folder = checkpoint_folder
self.experiment_name = experiment_name
# networks
self.generator = Generator(ngf=hparams.ngf, nc=hparams.nc, latent_dim=hparams.latent_dim)
self.discriminator = Discriminator(ndf=hparams.ndf, nc=hparams.nc)
self.generator.apply(weights_init)
self.discriminator.apply(weights_init)
# cache for generated images
self.generated_imgs = None
self.last_imgs = None
# For experience replay
self.exp_replay_dis = torch.tensor([])
def forward(self, z):
return self.generator(z)
def adversarial_loss(self, y_hat, y):
return F.binary_cross_entropy(y_hat, y)
def training_step(self, batch, batch_nb, optimizer_idx):
# For adding Instance noise for more visit: https://www.inference.vc/instance-noise-a-trick-for-stabilising-gan-training/
std_gaussian = max(0, self.hparams.level_of_noise - (
(self.hparams.level_of_noise * 2) * (self.current_epoch / self.hparams.epochs)))
AddGaussianNoiseInst = AddGaussianNoise(std=std_gaussian) # the noise decays over time
imgs, _ = batch
imgs = AddGaussianNoiseInst(imgs) # Adding instance noise to real images
self.last_imgs = imgs
# train generator
if optimizer_idx == 0:
# sample noise
z = torch.randn(imgs.shape[0], self.hparams.latent_dim, 1, 1).cuda()
# generate images
self.generated_imgs = self(z)
# ground truth result (ie: all fake)
g_loss = self.adversarial_loss(self.discriminator(self.generated_imgs, False), get_valid_labels(self.generated_imgs[0])) # adversarial loss is binary cross-entropy; [0] is the image of the last layer
tqdm_dict = {'g_loss': g_loss}
log = {'g_loss': g_loss, "std_gaussian": std_gaussian}
output = OrderedDict({
'loss': g_loss,
'progress_bar': tqdm_dict,
'log': log
})
return output
# train discriminator
if optimizer_idx == 1:
# Measure discriminator's ability to classify real from generated samples
# how well can it label as real?
real_loss = self.adversarial_loss(
self.discriminator([imgs, resize2d(imgs, 4), resize2d(imgs, 8), resize2d(imgs, 16), resize2d(imgs, 32), resize2d(imgs, 64)],
False), get_valid_labels(imgs))
fake_loss = self.adversarial_loss(self.discriminator(self.generated_imgs, True), get_unvalid_labels(
self.generated_imgs[0])) # how well can it label as fake?; [0] is the image of the last layer
# discriminator loss is the average of these
d_loss = (real_loss + fake_loss) / 2
tqdm_dict = {'d_loss': d_loss}
log = {'d_loss': d_loss, "std_gaussian": std_gaussian}
output = OrderedDict({
'loss': d_loss,
'progress_bar': tqdm_dict,
'log': log
})
return output
def configure_optimizers(self):
lr_gen = self.hparams.lr_gen
lr_dis = self.hparams.lr_dis
b1 = self.hparams.b1
b2 = self.hparams.b2
opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr_gen, betas=(b1, b2))
opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr_dis, betas=(b1, b2))
return [opt_g, opt_d], []
def backward(self, trainer, loss, optimizer, optimizer_idx: int) -> None:
loss.backward(retain_graph=True)
def train_dataloader(self):
# transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
# transforms.ToTensor(),
# transforms.Normalize([0.5], [0.5])])
# dataset = torchvision.datasets.MNIST(os.getcwd(), train=False, download=True, transform=transform)
# return DataLoader(dataset, batch_size=self.hparams.batch_size)
# transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
# transforms.ToTensor(),
# transforms.Normalize([0.5], [0.5])
# ])
# train_dataset = torchvision.datasets.ImageFolder(
# root="./drive/My Drive/datasets/flower_dataset/",
# # root="./drive/My Drive/datasets/ghibli_dataset_small_overfit/",
# transform=transform
# )
# return DataLoader(train_dataset, num_workers=self.hparams.num_workers, shuffle=True,
# batch_size=self.hparams.batch_size)
transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])
])
train_dataset = torchvision.datasets.ImageFolder(
root="ghibli_dataset_small_overfit/",
transform=transform
)
return DataLoader(train_dataset, num_workers=self.hparams.num_workers, shuffle=True,
batch_size=self.hparams.batch_size)
def on_epoch_end(self):
z = torch.randn(4, self.hparams.latent_dim, 1, 1).cuda()
# match gpu device (or keep as cpu)
if self.on_gpu:
z = z.cuda(self.last_imgs.device.index)
# log sampled images
sample_imgs = self.generator(z)[0]
torchvision.utils.save_image(sample_imgs, f'generated_images_epoch{self.current_epoch}.png')
# save model
if self.current_epoch % self.hparams.save_model_every_epoch == 0:
trainer.save_checkpoint(
self.checkpoint_folder + "/" + self.experiment_name + "_epoch_" + str(self.current_epoch) + ".ckpt")
from argparse import Namespace
args = {
'batch_size': 128, # batch size
'lr_gen': 0.0003, # TTUR;learnin rate of both networks; tested value: 0.0002
'lr_dis': 0.0003, # TTUR;learnin rate of both networks; tested value: 0.0002
'b1': 0.5, # Momentum for adam; tested value(dcgan paper): 0.5
'b2': 0.999, # Momentum for adam; tested value(dcgan paper): 0.999
'latent_dim': 256, # tested value which worked(in V4_1): 100
'nc': 3, # number of color channels
'ndf': 8, # number of discriminator features
'ngf': 8, # number of generator features
'epochs': 4, # the maxima lamount of epochs the algorith should run
'save_model_every_epoch': 1, # how often we save our model
'image_size': 128, # size of the image
'num_workers': 3,
'level_of_noise': 0.1, # how much instance noise we introduce(std; tested value: 0.15 and 0.1
'experience_save_per_batch': 1, # this value should be very low; tested value which works: 1
'experience_batch_size': 50 # this value shouldnt be too high; tested value which works: 50
}
hparams = Namespace(**args)
# Parameters
experiment_name = "DCGAN_6_2_MNIST_128px"
dataset_name = "mnist"
checkpoint_folder = "DCGAN/"
tags = ["DCGAN", "128x128"]
dirpath = Path(checkpoint_folder)
# defining net
net = DCGAN(hparams, checkpoint_folder, experiment_name)
torch.autograd.set_detect_anomaly(True)
trainer = pl.Trainer( # resume_from_checkpoint="DCGAN_V4_2_GHIBLI_epoch_999.ckpt",
max_epochs=args["epochs"],
gpus=1
)
trainer.fit(net)
``
Related
I have a trained PyTorch model and I want to get the confidence score of predictions in range (0-100) or (0-1). The code below is giving me a score but its range is undefined. I want the score in a defined range of (0-1) or (0-100). Any idea how to get this?
conf, classes = torch.max(output, 1)
My code:
model = torch.load(r'best.pt')
model.eval()
def preprocess(imgs):
im = torch.from_numpy(imgs)
im = im.float() # uint8 to fp16/32
im /= 255.0
return im
img_path = cv2.imread("/content/634282.jpg",0)
cropped = cv2.resize(img_path,(28,28))
imgs = preprocess(np.array([[cropped]]))
def predict_allCharacters(imgs):
output = model(imgs)
conf, classes = torch.max(output, 1)
class_names = '0123456789'
return conf, class_names[classes.item()]
Model definition:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
# fully connected layer, output 10 classes
self.out = nn.Linear(32 * 7 * 7, 37)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# flatten the output of conv2 to (batch_size, 32 * 7 * 7)
x = x.view(x.size(0), -1)
output = self.out(x)
return output # return x for visualization
In your case, output represents the logits. One way of getting a probability out of them is to use the Softmax function. As it seems that output contains the outputs from a batch, not a single sample, you can do something like this:
probs = torch.nn.functional.softmax(output, dim=1)
Then, in probs, each row would have the probability (i.e., in range [0, 1], sum=1) of each class for a given sample.
So, your predict_allCharacters could be modified to:
def predict_allCharacters(imgs):
output = model(imgs)
probs = torch.nn.functional.softmax(output, dim=1)
conf, classes = torch.max(probs, 1)
class_names = '0123456789'
return conf, class_names[classes.item()]
I have Images set which has transparency.
I'm trying to train GAN(Generative adversarial networks).
How can I preserve transparency. I can see from output images all transparent area is BLACK.
How can I avoid doing that ?
I think this is called "Alpha Channel".
Anyways How can I keep my transparency ?
Below is my code.
# Importing the libraries
from __future__ import print_function
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
from generator import G
from discriminator import D
import os
batchSize = 64 # We set the size of the batch.
imageSize = 64 # We set the size of the generated images (64x64).
input_vector = 100
nb_epochs = 500
# Creating the transformations
transform = transforms.Compose([transforms.Resize((imageSize, imageSize)), transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5,
0.5)), ]) # We create a list of transformations (scaling, tensor conversion, normalization) to apply to the input images.
# Loading the dataset
dataset = dset.ImageFolder(root='./data', transform=transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batchSize, shuffle=True,
num_workers=2) # We use dataLoader to get the images of the training set batch by batch.
# Defining the weights_init function that takes as input a neural network m and that will initialize all its weights.
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
def is_cuda_available():
return torch.cuda.is_available()
def is_gpu_available():
if is_cuda_available():
if int(torch.cuda.device_count()) > 0:
return True
return False
return False
# Create results directory
def create_dir(name):
if not os.path.exists(name):
os.makedirs(name)
# Creating the generator
netG = G(input_vector)
netG.apply(weights_init)
# Creating the discriminator
netD = D()
netD.apply(weights_init)
if is_gpu_available():
netG.cuda()
netD.cuda()
# Training the DCGANs
criterion = nn.BCELoss()
optimizerD = optim.Adam(netD.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=0.0002, betas=(0.5, 0.999))
generator_model = 'generator_model'
discriminator_model = 'discriminator_model'
def save_model(epoch, model, optimizer, error, filepath, noise=None):
if os.path.exists(filepath):
os.remove(filepath)
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': error,
'noise': noise
}, filepath)
def load_checkpoint(filepath):
if os.path.exists(filepath):
return torch.load(filepath)
return None
def main():
print("Device name : " + torch.cuda.get_device_name(0))
for epoch in range(nb_epochs):
for i, data in enumerate(dataloader, 0):
checkpointG = load_checkpoint(generator_model)
checkpointD = load_checkpoint(discriminator_model)
if checkpointG:
netG.load_state_dict(checkpointG['model_state_dict'])
optimizerG.load_state_dict(checkpointG['optimizer_state_dict'])
if checkpointD:
netD.load_state_dict(checkpointD['model_state_dict'])
optimizerD.load_state_dict(checkpointD['optimizer_state_dict'])
# 1st Step: Updating the weights of the neural network of the discriminator
netD.zero_grad()
# Training the discriminator with a real image of the dataset
real, _ = data
if is_gpu_available():
input = Variable(real.cuda()).cuda()
target = Variable(torch.ones(input.size()[0]).cuda()).cuda()
else:
input = Variable(real)
target = Variable(torch.ones(input.size()[0]))
output = netD(input)
errD_real = criterion(output, target)
# Training the discriminator with a fake image generated by the generator
if is_gpu_available():
noise = Variable(torch.randn(input.size()[0], input_vector, 1, 1)).cuda()
target = Variable(torch.zeros(input.size()[0])).cuda()
else:
noise = Variable(torch.randn(input.size()[0], input_vector, 1, 1))
target = Variable(torch.zeros(input.size()[0]))
fake = netG(noise)
output = netD(fake.detach())
errD_fake = criterion(output, target)
# Backpropagating the total error
errD = errD_real + errD_fake
errD.backward()
optimizerD.step()
# 2nd Step: Updating the weights of the neural network of the generator
netG.zero_grad()
if is_gpu_available():
target = Variable(torch.ones(input.size()[0])).cuda()
else:
target = Variable(torch.ones(input.size()[0]))
output = netD(fake)
errG = criterion(output, target)
errG.backward()
optimizerG.step()
# 3rd Step: Printing the losses and saving the real images and the generated images of the minibatch every 100 steps
print('[%d/%d][%d/%d] Loss_D: %.4f Loss_G: %.4f' % (epoch, nb_epochs, i, len(dataloader), errD.data, errG.data))
save_model(epoch, netG, optimizerG, errG, generator_model, noise)
save_model(epoch, netD, optimizerD, errD, discriminator_model, noise)
if i % 100 == 0:
create_dir('results')
vutils.save_image(real, '%s/real_samples.png' % "./results", normalize=True)
fake = netG(noise)
vutils.save_image(fake.data, '%s/fake_samples_epoch_%03d.png' % ("./results", epoch), normalize=True)
if __name__ == "__main__":
main()
generator.py
import torch.nn as nn
class G(nn.Module):
feature_maps = 512
kernel_size = 4
stride = 2
padding = 1
bias = False
def __init__(self, input_vector):
super(G, self).__init__()
self.main = nn.Sequential(
nn.ConvTranspose2d(input_vector, self.feature_maps, self.kernel_size, 1, 0, bias=self.bias),
nn.BatchNorm2d(self.feature_maps), nn.ReLU(True),
nn.ConvTranspose2d(self.feature_maps, int(self.feature_maps // 2), self.kernel_size, self.stride, self.padding,
bias=self.bias),
nn.BatchNorm2d(int(self.feature_maps // 2)), nn.ReLU(True),
nn.ConvTranspose2d(int(self.feature_maps // 2), int((self.feature_maps // 2) // 2), self.kernel_size, self.stride,
self.padding,
bias=self.bias),
nn.BatchNorm2d(int((self.feature_maps // 2) // 2)), nn.ReLU(True),
nn.ConvTranspose2d((int((self.feature_maps // 2) // 2)), int(((self.feature_maps // 2) // 2) // 2), self.kernel_size,
self.stride, self.padding,
bias=self.bias),
nn.BatchNorm2d(int((self.feature_maps // 2) // 2) // 2), nn.ReLU(True),
nn.ConvTranspose2d(int(((self.feature_maps // 2) // 2) // 2), 4, self.kernel_size, self.stride, self.padding,
bias=self.bias),
nn.Tanh()
)
def forward(self, input):
output = self.main(input)
return output
discriminator.py
import torch.nn as nn
class D(nn.Module):
feature_maps = 64
kernel_size = 4
stride = 2
padding = 1
bias = False
inplace = True
def __init__(self):
super(D, self).__init__()
self.main = nn.Sequential(
nn.Conv2d(4, self.feature_maps, self.kernel_size, self.stride, self.padding, bias=self.bias),
nn.LeakyReLU(0.2, inplace=self.inplace),
nn.Conv2d(self.feature_maps, self.feature_maps * 2, self.kernel_size, self.stride, self.padding,
bias=self.bias),
nn.BatchNorm2d(self.feature_maps * 2), nn.LeakyReLU(0.2, inplace=self.inplace),
nn.Conv2d(self.feature_maps * 2, self.feature_maps * (2 * 2), self.kernel_size, self.stride, self.padding,
bias=self.bias),
nn.BatchNorm2d(self.feature_maps * (2 * 2)), nn.LeakyReLU(0.2, inplace=self.inplace),
nn.Conv2d(self.feature_maps * (2 * 2), self.feature_maps * (2 * 2 * 2), self.kernel_size, self.stride,
self.padding, bias=self.bias),
nn.BatchNorm2d(self.feature_maps * (2 * 2 * 2)), nn.LeakyReLU(0.2, inplace=self.inplace),
nn.Conv2d(self.feature_maps * (2 * 2 * 2), 1, self.kernel_size, 1, 0, bias=self.bias),
nn.Sigmoid()
)
def forward(self, input):
output = self.main(input)
return output.view(-1)
Using dset.ImageFolder, without explicitly defining the function that reads the image (the loader) results with your dataset using the default pil_loader:
def pil_loader(path: str) -> Image.Image:
# open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
with open(path, 'rb') as f:
img = Image.open(f)
return img.convert('RGB')
As you can see, the default loader discards the alpha channel and forces the image to be with only three color channels: RGB.
You can define your own loader:
def pil_loader_rgba(path: str) -> Image.Image:
with open(path, 'rb') as f:
img = Image.open(f)
return img.convert('RGBA') # force alpha channel
You can use this loader in your dataset:
dataset = dset.ImageFolder(root='./data', transform=transform, loader=pil_loader_rgba)
Now your images will have the alpha channel.
Note that the transparency ("alpha channel") is an additional channel and is not part of the RGB channels. You need to make sure your model knows how to handle 4-channel inputs, otherwise, you'll run into errors such as this.
I'm trying to train a GAN in some images, I followed the tutorial on pytorch's page and got to the following code, but when the crossentropy function is applyed during the training it returns the error below the code:
import random
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as torch_dataset
import torchvision.transforms as transforms
import torchvision.utils as vutils
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython.display import HTML
seed = 1
print("Random Seed: ", seed)
random.seed(seed)
torch.manual_seed(seed)
images_folder_path = "./images/"
batch_size = 128
image_size = 256
n_channels = 1
z_vector = 100
n_features_generator = 64
n_features_discriminator = 64
num_epochs = 5
lr = 0.0002
beta1 = 0.5
dataset = torch_dataset.ImageFolder(
root=images_folder_path, transform=transforms.Compose(
[
transforms.Grayscale(num_output_channels=1),
transforms.Resize(image_size),
transforms.CenterCrop(image_size),
transforms.ToTensor(),
transforms.Normalize(0.5, 0.5)
]
)
)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)
device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
self.main = nn.Sequential(
nn.ConvTranspose2d(z_vector, n_features_generator * 8, 4, 1, 0, bias=False),
nn.BatchNorm2d(n_features_generator * 8),
nn.ReLU(True),
nn.ConvTranspose2d(n_features_generator * 8, n_features_generator * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(n_features_generator * 4),
nn.ReLU(True),
nn.ConvTranspose2d(n_features_generator * 4, n_features_generator * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(n_features_generator * 2),
nn.ReLU(True),
nn.ConvTranspose2d(n_features_generator * 2, n_features_generator, 4, 2, 1, bias=False),
nn.BatchNorm2d(n_features_generator),
nn.ReLU(True),
nn.ConvTranspose2d(n_features_generator, n_channels, 4, 2, 1, bias=False),
nn.Tanh()
)
def forward(self, inputs):
return self.main(inputs)
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.main = nn.Sequential(
nn.Conv2d(n_channels, n_features_discriminator, 4, 2, 1, bias=False),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(n_features_discriminator, n_features_discriminator * 2, 4, 2, 1, bias=False),
nn.BatchNorm2d(n_features_discriminator * 2),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(n_features_discriminator * 2, n_features_discriminator * 4, 4, 2, 1, bias=False),
nn.BatchNorm2d(n_features_discriminator * 4),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(n_features_discriminator * 4, n_features_discriminator * 8, 4, 2, 1, bias=False),
nn.BatchNorm2d(n_features_discriminator * 8),
nn.LeakyReLU(0.2, inplace=True),
nn.Conv2d(n_features_discriminator * 8, 1, 4, 1, 0, bias=False),
nn.Sigmoid()
)
def forward(self, inputs):
return self.main(inputs)
netG = Generator().to(device)
if device.type == 'cuda':
netG = nn.DataParallel(netG)
netG.apply(weights_init)
print(netG)
netD = Discriminator().to(device)
if device.type == 'cuda':
netD = nn.DataParallel(netD)
netD.apply(weights_init)
print(netD)
criterion = nn.CrossEntropyLoss()
fixed_noise = torch.randn(64, z_vector, 1, 1, device=device)
real_label = 1.
fake_label = 0.
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))
img_list = []
G_losses = []
D_losses = []
iters = 0
print("Starting Training Loop...")
for epoch in range(num_epochs):
for i, data in enumerate(dataloader, 0):
netD.zero_grad()
real_cpu = data[0].to(device)
b_size = real_cpu.size(0)
label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
output = netD(real_cpu).view(-1)
# ----------------------------------------------------------------------------------
errD_real = criterion(output, label) # ERROR HAPPENS HERE
# ----------------------------------------------------------------------------------
errD_real.backward()
D_x = output.mean().item()
noise = torch.randn(b_size, z_vector, 1, 1, device=device)
fake = netG(noise)
label.fill_(fake_label)
output = netD(fake.detach()).view(-1)
errD_fake = criterion(output, label)
errD_fake.backward()
D_G_z1 = output.mean().item()
errD = errD_real + errD_fake
optimizerD.step()
netG.zero_grad()
label.fill_(real_label)
output = netD(fake).view(-1)
errG = criterion(output, label)
errG.backward()
D_G_z2 = output.mean().item()
optimizerG.step()
if i % 50 == 0:
print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
% (epoch, num_epochs, i, len(dataloader),
errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))
G_losses.append(errG.item())
D_losses.append(errD.item())
if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(dataloader)-1)):
with torch.no_grad():
fake = netG(fixed_noise).detach().cpu()
img_list.append(vutils.make_grid(fake, padding=2, normalize=True))
iters += 1
Error:
Traceback (most recent call last):
File "G:/Pastas Estruturadas/Conhecimento/CEFET/IA/SpectroGAN/dcgan.py", line 137, in
errD_real = criterion(output, label)
File "C:\Users\Ramon\anaconda3\envs\vision\lib\site-packages\torch\nn\modules\module.py", line 722, in _call_impl
result = self.forward(*input, **kwargs)
File "C:\Users\Ramon\anaconda3\envs\vision\lib\site-packages\torch\nn\modules\loss.py", line 948, in forward
ignore_index=self.ignore_index, reduction=self.reduction)
File "C:\Users\Ramon\anaconda3\envs\vision\lib\site-packages\torch\nn\functional.py", line 2422, in cross_entropy
return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction)
File "C:\Users\Ramon\anaconda3\envs\vision\lib\site-packages\torch\nn\functional.py", line 1591, in log_softmax
ret = input.log_softmax(dim)
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
Process finished with exit code 1
Your model's output is not consistent with your criterion.
If you want to keep the model and change the criterion:
Use BCELoss instead of CrossEntropyLoss. Note: You will need to cast your labels to float before passing them in. Also consider removing the Sigmoid() from the model and using BCEWithLogitsLoss.
If you want to keep the criterion and change the model:
CrossEntropyLoss expects the shape (..., num_classes). So for your 2 class case (real & fake), you will have to predict 2 values for each image in the batch which means you will need to alter the output channels of the last layer in your model. It also expects the raw logits, so you should remove the Sigmoid().
I have Unet network which takes in MRI images of the brain, where the goal is to segment white substance in the brain. The images has the shape 256x256x183 (reshaped to 183x256x256) (FLAIR and T1 images). The problem I am having is that before sending the input to the Unet network, I have requires_grad=True on my pytorch tensor, but after one torch.nn.conv2d operation the requires_grad=False. This is a huge problem since the gradient will not update and learn.
from collections import OrderedDict
import torch
import torch.nn as nn
class UNet(nn.Module):
def __init__(self, in_channels=3, out_channels=1, init_features=32):
super(UNet, self).__init__()
features = init_features
self.encoder1 = UNet._block(in_channels, features, name="enc1")
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.encoder2 = UNet._block(features, features * 2, name="enc2")
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.encoder3 = UNet._block(features * 2, features * 4, name="enc3")
self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
self.encoder4 = UNet._block(features * 4, features * 8, name="enc4")
self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
self.bottleneck = UNet._block(features * 8, features * 16, name="bottleneck")
self.upconv4 = nn.ConvTranspose2d(
features * 16, features * 8, kernel_size=2, stride=2
)
self.decoder4 = UNet._block((features * 8) * 2, features * 8, name="dec4")
self.upconv3 = nn.ConvTranspose2d(
features * 8, features * 4, kernel_size=2, stride=2
)
self.decoder3 = UNet._block((features * 4) * 2, features * 4, name="dec3")
self.upconv2 = nn.ConvTranspose2d(
features * 4, features * 2, kernel_size=2, stride=2
)
self.decoder2 = UNet._block((features * 2) * 2, features * 2, name="dec2")
self.upconv1 = nn.ConvTranspose2d(
features * 2, features, kernel_size=2, stride=2
)
self.decoder1 = UNet._block(features * 2, features, name="dec1")
self.conv = nn.Conv2d(
in_channels=features, out_channels=out_channels, kernel_size=1
)
def forward(self, x):
print(x.requires_grad) #<---- here it is true
enc1 = self.encoder1(x)#<---- where the problem happens
print(enc1.requires_grad) #<---- here it is false
enc2 = self.encoder2(self.pool1(enc1))
print(enc2.requires_grad)
enc3 = self.encoder3(self.pool2(enc2))
print(enc3.requires_grad)
enc4 = self.encoder4(self.pool3(enc3))
print(enc4.requires_grad)
bottleneck = self.bottleneck(self.pool4(enc4))
print(bottleneck.requires_grad)
dec4 = self.upconv4(bottleneck)
print(dec4.requires_grad)
dec4 = torch.cat((dec4, enc4), dim=1)
print(dec4.requires_grad)
dec4 = self.decoder4(dec4)
print(dec4.requires_grad)
dec3 = self.upconv3(dec4)
print(dec3.requires_grad)
dec3 = torch.cat((dec3, enc3), dim=1)
print(dec3.requires_grad)
dec3 = self.decoder3(dec3)
print(dec3.requires_grad)
dec2 = self.upconv2(dec3)
print(dec2.requires_grad)
dec2 = torch.cat((dec2, enc2), dim=1)
print(dec2.requires_grad)
dec2 = self.decoder2(dec2)
print(dec2.requires_grad)
dec1 = self.upconv1(dec2)
print(dec1.requires_grad)
dec1 = torch.cat((dec1, enc1), dim=1)
print(dec1.requires_grad)
dec1 = self.decoder1(dec1)
print(dec1.requires_grad)
print("going out")
return torch.sigmoid(self.conv(dec1))
#staticmethod
def _block(in_channels, features, name):
return nn.Sequential(
OrderedDict(
[
(
name + "conv1",
nn.Conv2d(
in_channels=in_channels,
out_channels=features,
kernel_size=3,
padding=1,
bias=False,
),
),
(name + "norm1", nn.BatchNorm2d(num_features=features)),
(name + "relu1", nn.ReLU(inplace=True)),
(
name + "conv2",
nn.Conv2d(
in_channels=features,
out_channels=features,
kernel_size=3,
padding=1,
bias=False,
),
),
(name + "norm2", nn.BatchNorm2d(num_features=features)),
(name + "relu2", nn.ReLU(inplace=True)),
]
)
)
Edit:
This is the training code
class run_network:
def __init__(self, eta, epoch, batch_size, train_file_path, validation_file_path, shuffle_after_epoch = True):
self.eta = eta
self.epoch = epoch
self.batch_size = batch_size
self.train_file_path = train_file_path
self.validation_file_path = validation_file_path
self.shuffle_after_epoch = shuffle_after_epoch
def __call__(self, is_train = False):
device = torch.device("cpu" if not torch.cuda.is_available() else torch.cuda())
unet = torch.hub.load('mateuszbuda/brain-segmentation-pytorch', 'unet',
in_channels=3, out_channels=1, init_features=32, pretrained=True)
unet.to(device)
unet = unet.double()
optimizer = optim.Adam(unet.parameters(), lr=self.eta)
dsc_loss = DiceLoss()
Load_training = NiftiLoader(self.train_file_path)
Load_validation = NiftiLoader(self.validation_file_path)
mean_flair, mean_t1, std_flair, std_t1 = Load_training.average_mean_and_std(20, 79,99)
total_mean = [mean_flair, mean_t1]
total_std = [std_flair, std_t1]
loss_train = []
loss_validation = []
for current_epoch in tqdm(range(self.epoch)):
for phase in ["train", "validation"]:
if phase == "train":
mini_batch = Load_training.create_batch(self.batch_size, self.shuffle_after_epoch)
unet.train()
print("her22")
if phase == "validation":
print("her")
mini_batch = Load_validation.create_batch(self.batch_size, self.shuffle_after_epoch)
unet.eval()
dim1, dim2, dim3 = mini_batch.shape
for iteration in range(1):
if phase == "train":
current_batch = Load_training.Load_Image_batch(mini_batch, iteration)
image_batch = Load_training.image_zero_mean_normalizer(current_batch)
if phase == "validation":
current_batch = Load_validation.Load_Image_batch(mini_batch, iteration)
image_batch = Load_training.image_zero_mean_normalizer(current_batch, False, mean_list, std_list)
image_dim0, image_dim1, image_dim2, image_dim3, image_dim4 = image_batch.shape
image_batch = image_batch.reshape((
image_dim0,
image_dim1*image_dim2,
image_dim3,
image_dim4
))
image_batch = np.swapaxes(image_batch, 0,1)
image_batch = torch.as_tensor(image_batch)#.requires_grad_(True) #, requires_grad=True)
image_batch = image_batch.to(device)
print(image_batch.requires_grad)
optimizer.zero_grad()
with torch.set_grad_enabled(is_train == "train"):
for j in range(0, 10, 1):
# [183*5, 3, 256, 256] -> [12, 3, 256, 256]
# ANTALL ITERASJONER: (183*5/12) -> en chunk
input_image = image_batch[j:j+2,0:3,:,:]
print(input_image.requires_grad)
print("går inn")
y_predicted = unet(input_image)
print(y_predicted.requires_grad)
print(image_batch[j:j+2,3,:,:].requires_grad)
loss = dsc_loss(y_predicted.squeeze(1), image_batch[j:j+2,3,:,:])
print(loss.requires_grad)
if phase == "train":
loss_train.append(loss.item())
loss.backward()
print(loss.item())
exit()
optimizer.step()
print(loss.item())
exit()
if phase == "validation":
loss_validation.append(loss.item())
Number of iteration and print statement are for experimenting what the cause could be.
It works fine to me.
'''
# I changed your code a little bit to catch up the problem.
def forward(self, x):
print("encoder1", x.requires_grad) #<---- here it is true
enc1 = self.encoder1(x)#<---- where the problem happens
print("encoder2", enc1.requires_grad) #<---- here it is false
'''
a = torch.randn(32, 3, 255, 255, requires_grad=True)
# a.requires_grads = True
print(a)
UNet()(a)
# This is the result:
encoder1 True
encoder2 True
True
True
True
True
True
Can you show me your training source? I guess it's the problem. And why do you need to update the input data?
The training code is fine and the input doesn't need a gradient at all, if you just want to train and update the weights.
The real problem is this line here
with torch.set_grad_enabled(is_train == "train"):
So you want to disable the gradients if you are not training. The thing is is_train is a bool (judging form this: def __call__(self, is_train=False):), so the comparisons will be always false and no gradients will bet set. Just change it to
with torch.set_grad_enabled(is_train):
and you will be fine.
this is my model :
# basic LeNet5 network
class LeNet5_mode0 (nn.Module) :
# constructor
def __init__(self):
super(LeNet5_mode0, self).__init__() # call to super constructor
# define layers
# 6 # 28x28
self.conv1 = nn.Sequential(
# Lenet's first conv layer is 3x32x32, squeeze color channels into 1 and pad 2
nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5, stride = 1, padding = 2),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2, stride = 2)
)
# 16 # 10x10
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels = 6, out_channels = 16, kernel_size = 5, stride = 1, padding = 0),
nn.ReLU(),
nn.MaxPool2d(kernel_size =2, stride = 2)
)
self.fc1 = nn.Sequential(
nn.Linear(in_features = 16*5*5, out_features = 120),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(in_features = 120, out_features = 84),
nn.ReLU()
)
self.classifier = nn.Sequential(
nn.Linear(in_features = 84,out_features = 10),
nn.Softmax(dim = 1) # dim =1 meaning do softmax on the colums of 84x10
)
# define forward function
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(-1, 16*5*5) # reshape the tensor to [-1,16*5*5]
x = self.fc1(x)
x = self.fc2(x)
x = self.classifier(x)
return x
and I train this model once with :
criterion = nn.CrossEntropyLoss() # aka, LogLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5,10,15], gamma=0.5)
and then save with with
torch.save(model.state_dict(), savepath)
and load it with
model.load_state_dict(torch.load(loadpath))
so far no problem . but when i change the optimizer a little to
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay = 0.0005)
and use the same save & load method
I receive the following error:
in loading state_dict for LeNet5_mode0:
Unexpected key(s) in state_dict: "conv1.1.weight", "conv1.1.bias", "conv1.1.running_mean", "conv1.1.running_var", "conv1.1.num_batches_tracked", "conv2.1.weight", "conv2.1.bias", "conv2.1.running_mean", "conv2.1.running_var", "conv2.1.num_batches_tracked".
how can it be fixed? why different optimizer have that effect on the saving of the trained network?