saving model in pytorch with weight decay

saving model in pytorch with weight decay - python

this is my model :
# basic LeNet5 network
class LeNet5_mode0 (nn.Module) :
# constructor
def __init__(self):
super(LeNet5_mode0, self).__init__() # call to super constructor
# define layers
# 6 # 28x28
self.conv1 = nn.Sequential(
# Lenet's first conv layer is 3x32x32, squeeze color channels into 1 and pad 2
nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5, stride = 1, padding = 2),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2, stride = 2)
)
# 16 # 10x10
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels = 6, out_channels = 16, kernel_size = 5, stride = 1, padding = 0),
nn.ReLU(),
nn.MaxPool2d(kernel_size =2, stride = 2)
)
self.fc1 = nn.Sequential(
nn.Linear(in_features = 16*5*5, out_features = 120),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(in_features = 120, out_features = 84),
nn.ReLU()
)
self.classifier = nn.Sequential(
nn.Linear(in_features = 84,out_features = 10),
nn.Softmax(dim = 1) # dim =1 meaning do softmax on the colums of 84x10
)
# define forward function
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(-1, 16*5*5) # reshape the tensor to [-1,16*5*5]
x = self.fc1(x)
x = self.fc2(x)
x = self.classifier(x)
return x
and I train this model once with :
criterion = nn.CrossEntropyLoss() # aka, LogLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5,10,15], gamma=0.5)
and then save with with
torch.save(model.state_dict(), savepath)
and load it with
model.load_state_dict(torch.load(loadpath))
so far no problem . but when i change the optimizer a little to
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay = 0.0005)
and use the same save & load method
I receive the following error:
in loading state_dict for LeNet5_mode0:
Unexpected key(s) in state_dict: "conv1.1.weight", "conv1.1.bias", "conv1.1.running_mean", "conv1.1.running_var", "conv1.1.num_batches_tracked", "conv2.1.weight", "conv2.1.bias", "conv2.1.running_mean", "conv2.1.running_var", "conv2.1.num_batches_tracked".
how can it be fixed? why different optimizer have that effect on the saving of the trained network?

Related

Pytorch optimizer returns empty

I already check relevant questions but can not figure out what is wrong with my code. I build a cnn and add residual value but, it returns
"ValueError: optimizer got an empty parameter list"
In below you can see some of my code
def _init_(self):
super()._init_()
self.conv1 = nn.Conv2d(3, 6, 3, 1, 1)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 10, 3, 1, 1)
self.conv3 = nn.Conv2d(10, 5, 3, 1, 1)
self.conv4 = nn.Conv2d(5, 10, 3, 1, 1)
self.conv5 = nn.Conv2d(10, 10, 3, 1, 1)
self.fc1 = nn.Linear(40960, 15)
self.dropout = nn.Dropout(0.15)
def forward(self, x):
x = (F.relu(self.conv1(x)))
x = (F.relu(self.conv2(x)))
x = (F.relu(self.conv3(x)))
residual = x.clone()
x = (F.relu(self.conv4(x)))
x += residual
x = F.relu(x)
x = (F.relu(self.conv5(x)))
x = torch.flatten(x, 1)
x = F.relu(self.fc1(x))
return x
and it returns error in here:
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=learn_rate, momentum=0.9)
thank you for your helps in advance

What is the type of net.parameters()? optim.SGD() is expecting an "iterable of parameters...". Try this:
net = Net()
criterion = nn.CrossEntropyLoss()
parameters = list(net.parameters())
optimizer = optim.SGD(parameters, lr=learn_rate, momentum=0.9)

Your Net class needs to extend torch.nn.Module
class Net(nn.Module):
def __init__(self):
...
def forward(self, x):
...

About the memory usage of Mobilenet

I'm building MobileNetV1 with Pytorch and had my memory ran out every time I train the model. (The pytorch log "Killed!" and suddenly crashed).
This is my code
Config file: (yaml)
n_gpu: 0
arch:
type: MobileNet
args:
in_channels: 3
num_classes: 26
data_loader:
type: BallDataLoader
args:
data_dir: data/balls/
batch_size: 64
shuffle: true
validation_split: 0.2
num_workers: 0
resize:
- 224
- 224
optimizer:
type: Adam
args:
lr: 1.0e-2
weight_decay: 0
amsgrad: true
loss: nll_loss
metrics:
- accuracy
- top_k_acc
lr_scheduler:
type: StepLR
args:
step_size: 50
gamma: 0.1
trainer:
epochs: 50
save_dir: saved/
save_period: 2
verbosity: 2
monitor: min val_loss
early_stop: 10
tensorboard: true
modules.py:
class DepthwiseSeparableConv(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size = 3, stride = 1, padding = None):
super().__init__()
if padding == None:
padding = kernel_size // 2
self.depth_wise_conv = nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, groups= in_channels)
self.bn1 = nn.BatchNorm2d(in_channels)
self.point_wise_conv = nn.Conv2d(in_channels, out_channels, (1,1), 1, 0)
self.bn2 = nn.BatchNorm2d(out_channels)
self.in_channels = in_channels
self.out_channels = out_channels
def forward(self, x):
x = self.depth_wise_conv(x)
x = self.bn1(x)
x = F.relu(x)
x = self.point_wise_conv(x)
x = self.bn2(x)
x = F.relu(x)
return x
model.py
class MobileNet(ImageNet):
def __init__(self, in_channels = 3, num_classes = 1000):
super().__init__()
self.convs = nn.Sequential(
nn.Conv2d(in_channels, 32, kernel_size= 3, padding= 1, stride = 1 ),
nn.BatchNorm2d(32),
nn.ReLU(inplace = True),
DepthwiseSeparableConv(32, 64),
DepthwiseSeparableConv(64, 128, stride = 2),
DepthwiseSeparableConv(128, 128),
DepthwiseSeparableConv(128, 256),
DepthwiseSeparableConv(256, 256),
DepthwiseSeparableConv(256, 512, stride = 2),
DepthwiseSeparableConv(512, 512),
DepthwiseSeparableConv(512, 512),
DepthwiseSeparableConv(512, 512),
DepthwiseSeparableConv(512, 512),
DepthwiseSeparableConv(512, 512),
DepthwiseSeparableConv(512, 1024, stride = 1),
DepthwiseSeparableConv(1024, 1024, stride= 2),
nn.AdaptiveAvgPool2d(1)
)
self.fc = nn.Linear(1024, num_classes)
def forward(self, x):
x = self.convs(x)
x = x.view(-1, 1024)
x = self.fc(x)
x = F.log_softmax(x, dim = 1)
return x
So I found a model from https://github.com/jmjeon94/MobileNet-Pytorch, and it worked. After hours I still can't find out why this happened as the models are nearly identical, and since the architect of mobilenet is farely light, this shouldn't take much space to run I supposed. Is there any chance that this is because of the python interpreter or there are actually something wrong with my code?

I think it's because of your batch size. Try using smaller batch size like 32,16,8,4,2.

I delete the line nn.Conv2d(in_channels, 32, kernel_size= 3, padding= 1, stride = 1 ) and rewrite the same and the code ran. Still don't know why but it seem to be the interpreter or the text editor which cause the error. Thank you for attending.
And special thanks to Mr. #Anmol Narang for your efford, I'm very appreciated.

PyTorch NotImplementedError

i have been working on a code to train and test the images dataset. But i am getting this error at every instance of output = model(images)
class ConvNeuralNet(nn.Module):
# Determine what layers and their order in CNN object
def __init__(self, num_classes):
super(ConvNeuralNet, self).__init__()
self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.fc1 = nn.Linear(1600, 128)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(128, num_classes)
for i, (images, labels) in enumerate(train_loader):
# Move tensors to the configured device
device = torch.device('cpu')
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _forward_unimplemented(self, *input)
199 registered hooks while the latter silently ignores them.
200 """
--> 201 raise NotImplementedError
202
203
NotImplementedError:
I have checked that there is no indentation error so i don't understand what's wrong here.

When you subclass nn.Module, you need to implement a forward() method.
Here's an update to your ConvNeuralNet class:
class ConvNeuralNet(nn.Module):
# Determine what layers and their order in CNN object
def __init__(self, num_classes):
super(ConvNeuralNet, self).__init__()
self.conv_layer1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
self.conv_layer2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3)
self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.conv_layer3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
self.conv_layer4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)
self.fc1 = nn.Linear(1600, 128)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(128, num_classes)
## UPDATE: Implement forward() method
def forward(self, x):
# First three layers (from above)
x = self.conv_layer1(x)
x = self.conv_layer2(x)
x = self.max_pool1(x)
# Next three layers
x = self.conv_layer3(x)
x = self.conv_layer4(x)
x = self.max_pool2(x)
# Final three layers
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
return x
Notice how x (the input data) moves through each layer you defined and is eventually returned.
From the documentation for nn.Module:
forward(*input)
Defines the computation performed at every call.
Should be overridden by all subclasses.

How to get confidence score from a trained pytorch model

I have a trained PyTorch model and I want to get the confidence score of predictions in range (0-100) or (0-1). The code below is giving me a score but its range is undefined. I want the score in a defined range of (0-1) or (0-100). Any idea how to get this?
conf, classes = torch.max(output, 1)
My code:
model = torch.load(r'best.pt')
model.eval()
def preprocess(imgs):
im = torch.from_numpy(imgs)
im = im.float() # uint8 to fp16/32
im /= 255.0
return im
img_path = cv2.imread("/content/634282.jpg",0)
cropped = cv2.resize(img_path,(28,28))
imgs = preprocess(np.array([[cropped]]))
def predict_allCharacters(imgs):
output = model(imgs)
conf, classes = torch.max(output, 1)
class_names = '0123456789'
return conf, class_names[classes.item()]
Model definition:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
# fully connected layer, output 10 classes
self.out = nn.Linear(32 * 7 * 7, 37)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# flatten the output of conv2 to (batch_size, 32 * 7 * 7)
x = x.view(x.size(0), -1)
output = self.out(x)
return output # return x for visualization

In your case, output represents the logits. One way of getting a probability out of them is to use the Softmax function. As it seems that output contains the outputs from a batch, not a single sample, you can do something like this:
probs = torch.nn.functional.softmax(output, dim=1)
Then, in probs, each row would have the probability (i.e., in range [0, 1], sum=1) of each class for a given sample.
So, your predict_allCharacters could be modified to:
def predict_allCharacters(imgs):
output = model(imgs)
probs = torch.nn.functional.softmax(output, dim=1)
conf, classes = torch.max(probs, 1)
class_names = '0123456789'
return conf, class_names[classes.item()]

DCGAN debugging. Getting just garbage

Introduction:
I am trying to get a CDCGAN (Conditional Deep Convolutional Generative Adversarial Network) to work on the MNIST dataset which should be fairly easy considering that the library (PyTorch) I am using has a tutorial on its website.
But I can't seem to get It working it just produces garbage or the model collapses or both.
What I tried:
making the model Conditional semi-supervised learning
using batch norm
using dropout on each layer besides the input/output layer on the generator and discriminator
label smoothing to combat overconfidence
adding noise to the images (I guess you call this instance noise) to get a better data distribution
use leaky relu to avoid vanishing gradients
using a replay buffer to combat forgetting of learned stuff and overfitting
playing with hyperparameters
comparing it to the model from PyTorch tutorial
basically what I did besides some things like Embedding layer ect.
Images my Model generated:
Hyperparameters:
batch_size=50, learning_rate_discrimiantor=0.0001, learning_rate_generator=0.0003, shuffle=True, ndf=64, ngf=64, droupout=0.5
batch_size=50, learning_rate_discriminator=0.0003, learning_rate_generator=0.0003, shuffle=True, ndf=64, ngf=64, dropout=0
Images Pytorch tutorial Model generated:
Code for the pytorch tutorial dcgan model
As comparison here are the images from the DCGAN from the pytorch turoial:
My Code:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms, datasets
import torch.nn.functional as F
from torch import optim as optim
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import os
import time
class Discriminator(torch.nn.Module):
def __init__(self, ndf=16, dropout_value=0.5): # ndf feature map discriminator
super().__init__()
self.ndf = ndf
self.droupout_value = dropout_value
self.condi = nn.Sequential(
nn.Linear(in_features=10, out_features=64 * 64)
)
self.hidden0 = nn.Sequential(
nn.Conv2d(in_channels=2, out_channels=self.ndf, kernel_size=4, stride=2, padding=1, bias=False),
nn.LeakyReLU(0.2),
)
self.hidden1 = nn.Sequential(
nn.Conv2d(in_channels=self.ndf, out_channels=self.ndf * 2, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(self.ndf * 2),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.hidden2 = nn.Sequential(
nn.Conv2d(in_channels=self.ndf * 2, out_channels=self.ndf * 4, kernel_size=4, stride=2, padding=1, bias=False),
#nn.BatchNorm2d(self.ndf * 4),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.hidden3 = nn.Sequential(
nn.Conv2d(in_channels=self.ndf * 4, out_channels=self.ndf * 8, kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(self.ndf * 8),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.out = nn.Sequential(
nn.Conv2d(in_channels=self.ndf * 8, out_channels=1, kernel_size=4, stride=1, padding=0, bias=False),
torch.nn.Sigmoid()
)
def forward(self, x, y):
y = self.condi(y.view(-1, 10))
y = y.view(-1, 1, 64, 64)
x = torch.cat((x, y), dim=1)
x = self.hidden0(x)
x = self.hidden1(x)
x = self.hidden2(x)
x = self.hidden3(x)
x = self.out(x)
return x
class Generator(torch.nn.Module):
def __init__(self, n_features=100, ngf=16, c_channels=1, dropout_value=0.5): # ngf feature map of generator
super().__init__()
self.ngf = ngf
self.n_features = n_features
self.c_channels = c_channels
self.droupout_value = dropout_value
self.hidden0 = nn.Sequential(
nn.ConvTranspose2d(in_channels=self.n_features + 10, out_channels=self.ngf * 8,
kernel_size=4, stride=1, padding=0, bias=False),
nn.BatchNorm2d(self.ngf * 8),
nn.LeakyReLU(0.2)
)
self.hidden1 = nn.Sequential(
nn.ConvTranspose2d(in_channels=self.ngf * 8, out_channels=self.ngf * 4,
kernel_size=4, stride=2, padding=1, bias=False),
#nn.BatchNorm2d(self.ngf * 4),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.hidden2 = nn.Sequential(
nn.ConvTranspose2d(in_channels=self.ngf * 4, out_channels=self.ngf * 2,
kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(self.ngf * 2),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.hidden3 = nn.Sequential(
nn.ConvTranspose2d(in_channels=self.ngf * 2, out_channels=self.ngf,
kernel_size=4, stride=2, padding=1, bias=False),
nn.BatchNorm2d(self.ngf),
nn.LeakyReLU(0.2),
nn.Dropout(self.droupout_value)
)
self.out = nn.Sequential(
# "out_channels=1" because gray scale
nn.ConvTranspose2d(in_channels=self.ngf, out_channels=1, kernel_size=4,
stride=2, padding=1, bias=False),
nn.Tanh()
)
def forward(self, x, y):
x_cond = torch.cat((x, y), dim=1) # Combine flatten image with conditional input (class labels)
x = self.hidden0(x_cond) # Image goes into a "ConvTranspose2d" layer
x = self.hidden1(x)
x = self.hidden2(x)
x = self.hidden3(x)
x = self.out(x)
return x
class Logger:
def __init__(self, model_name, model1, model2, m1_optimizer, m2_optimizer, model_parameter, train_loader):
self.out_dir = "data"
self.model_name = model_name
self.train_loader = train_loader
self.model1 = model1
self.model2 = model2
self.model_parameter = model_parameter
self.m1_optimizer = m1_optimizer
self.m2_optimizer = m2_optimizer
# Exclude Epochs of the model name. This make sense e.g. when we stop a training progress and continue later on.
self.experiment_name = '_'.join("{!s}={!r}".format(k, v) for (k, v) in model_parameter.items())\
.replace("Epochs" + "=" + str(model_parameter["Epochs"]), "")
self.d_error = 0
self.g_error = 0
self.tb = SummaryWriter(log_dir=str(self.out_dir + "/log/" + self.model_name + "/runs/" + self.experiment_name))
self.path_image = os.path.join(os.getcwd(), f'{self.out_dir}/log/{self.model_name}/images/{self.experiment_name}')
self.path_model = os.path.join(os.getcwd(), f'{self.out_dir}/log/{self.model_name}/model/{self.experiment_name}')
try:
os.makedirs(self.path_image)
except Exception as e:
print("WARNING: ", str(e))
try:
os.makedirs(self.path_model)
except Exception as e:
print("WARNING: ", str(e))
def log_graph(self, model1_input, model2_input, model1_label, model2_label):
self.tb.add_graph(self.model1, input_to_model=(model1_input, model1_label))
self.tb.add_graph(self.model2, input_to_model=(model2_input, model2_label))
def log(self, num_epoch, d_error, g_error):
self.d_error = d_error
self.g_error = g_error
self.tb.add_scalar("Discriminator Train Error", self.d_error, num_epoch)
self.tb.add_scalar("Generator Train Error", self.g_error, num_epoch)
def log_image(self, images, epoch, batch_num):
grid = torchvision.utils.make_grid(images)
torchvision.utils.save_image(grid, f'{self.path_image}\\Epoch_{epoch}_batch_{batch_num}.png')
self.tb.add_image("Generator Image", grid)
def log_histogramm(self):
for name, param in self.model2.named_parameters():
self.tb.add_histogram(name, param, self.model_parameter["Epochs"])
self.tb.add_histogram(f'gen_{name}.grad', param.grad, self.model_parameter["Epochs"])
for name, param in self.model1.named_parameters():
self.tb.add_histogram(name, param, self.model_parameter["Epochs"])
self.tb.add_histogram(f'dis_{name}.grad', param.grad, self.model_parameter["Epochs"])
def log_model(self, num_epoch):
torch.save({
"epoch": num_epoch,
"model_generator_state_dict": self.model1.state_dict(),
"model_discriminator_state_dict": self.model2.state_dict(),
"optimizer_generator_state_dict": self.m1_optimizer.state_dict(),
"optimizer_discriminator_state_dict": self.m2_optimizer.state_dict(),
}, str(self.path_model + f'\\{time.time()}_epoch{num_epoch}.pth'))
def close(self, logger, images, num_epoch, d_error, g_error):
logger.log_model(num_epoch)
logger.log_histogramm()
logger.log(num_epoch, d_error, g_error)
self.tb.close()
def display_stats(self, epoch, batch_num, dis_error, gen_error):
print(f'Epoch: [{epoch}/{self.model_parameter["Epochs"]}] '
f'Batch: [{batch_num}/{len(self.train_loader)}] '
f'Loss_D: {dis_error.data.cpu()}, '
f'Loss_G: {gen_error.data.cpu()}')
def get_MNIST_dataset(num_workers_loader, model_parameter, out_dir="data"):
compose = transforms.Compose([
transforms.Resize((64, 64)),
transforms.CenterCrop((64, 64)),
transforms.ToTensor(),
torchvision.transforms.Normalize(mean=[0.5], std=[0.5])
])
dataset = datasets.MNIST(
root=out_dir,
train=True,
download=True,
transform=compose
)
train_loader = torch.utils.data.DataLoader(dataset,
batch_size=model_parameter["batch_size"],
num_workers=num_workers_loader,
shuffle=model_parameter["shuffle"])
return dataset, train_loader
def train_discriminator(p_optimizer, p_noise, p_images, p_fake_target, p_real_target, p_images_labels, p_fake_labels, device):
p_optimizer.zero_grad()
# 1.1 Train on real data
pred_dis_real = discriminator(p_images, p_images_labels)
error_real = loss(pred_dis_real, p_real_target)
error_real.backward()
# 1.2 Train on fake data
fake_data = generator(p_noise, p_fake_labels).detach()
fake_data = add_noise_to_image(fake_data, device)
pred_dis_fake = discriminator(fake_data, p_fake_labels)
error_fake = loss(pred_dis_fake, p_fake_target)
error_fake.backward()
p_optimizer.step()
return error_fake + error_real
def train_generator(p_optimizer, p_noise, p_real_target, p_fake_labels, device):
p_optimizer.zero_grad()
fake_images = generator(p_noise, p_fake_labels)
fake_images = add_noise_to_image(fake_images, device)
pred_dis_fake = discriminator(fake_images, p_fake_labels)
error_fake = loss(pred_dis_fake, p_real_target) # because
"""
We use "p_real_target" instead of "p_fake_target" because we want to
maximize that the discriminator is wrong.
"""
error_fake.backward()
p_optimizer.step()
return fake_images, pred_dis_fake, error_fake
# TODO change to a Truncated normal distribution
def get_noise(batch_size, n_features=100):
return torch.FloatTensor(batch_size, n_features, 1, 1).uniform_(-1, 1)
# We flip label of real and fate data. Better gradient flow I have told
def get_real_data_target(batch_size):
return torch.FloatTensor(batch_size, 1, 1, 1).uniform_(0.0, 0.2)
def get_fake_data_target(batch_size):
return torch.FloatTensor(batch_size, 1, 1, 1).uniform_(0.8, 1.1)
def image_to_vector(images):
return torch.flatten(images, start_dim=1, end_dim=-1)
def vector_to_image(images):
return images.view(images.size(0), 1, 28, 28)
def get_rand_labels(batch_size):
return torch.randint(low=0, high=9, size=(batch_size,))
def load_model(model_load_path):
if model_load_path:
checkpoint = torch.load(model_load_path)
discriminator.load_state_dict(checkpoint["model_discriminator_state_dict"])
generator.load_state_dict(checkpoint["model_generator_state_dict"])
dis_opti.load_state_dict(checkpoint["optimizer_discriminator_state_dict"])
gen_opti.load_state_dict(checkpoint["optimizer_generator_state_dict"])
return checkpoint["epoch"]
else:
return 0
def init_model_optimizer(model_parameter, device):
# Initialize the Models
discriminator = Discriminator(ndf=model_parameter["ndf"], dropout_value=model_parameter["dropout"]).to(device)
generator = Generator(ngf=model_parameter["ngf"], dropout_value=model_parameter["dropout"]).to(device)
# train
dis_opti = optim.Adam(discriminator.parameters(), lr=model_parameter["learning_rate_dis"], betas=(0.5, 0.999))
gen_opti = optim.Adam(generator.parameters(), lr=model_parameter["learning_rate_gen"], betas=(0.5, 0.999))
return discriminator, generator, dis_opti, gen_opti
def get_hot_vector_encode(labels, device):
return torch.eye(10)[labels].view(-1, 10, 1, 1).to(device)
def add_noise_to_image(images, device, level_of_noise=0.1):
return images[0].to(device) + (level_of_noise) * torch.randn(images.shape).to(device)
if __name__ == "__main__":
# Hyperparameter
model_parameter = {
"batch_size": 500,
"learning_rate_dis": 0.0002,
"learning_rate_gen": 0.0002,
"shuffle": False,
"Epochs": 10,
"ndf": 64,
"ngf": 64,
"dropout": 0.5
}
# Parameter
r_frequent = 10 # How many samples we save for replay per batch (batch_size / r_frequent).
model_name = "CDCGAN" # The name of you model e.g. "Gan"
num_workers_loader = 1 # How many workers should load the data
sample_save_size = 16 # How many numbers your saved imaged should show
device = "cuda" # Which device should be used to train the neural network
model_load_path = "" # If set load model instead of training from new
num_epoch_log = 1 # How frequent you want to log/
torch.manual_seed(43) # Sets a seed for torch for reproducibility
dataset_train, train_loader = get_MNIST_dataset(num_workers_loader, model_parameter) # Get dataset
# Initialize the Models and optimizer
discriminator, generator, dis_opti, gen_opti = init_model_optimizer(model_parameter, device) # Init model/Optimizer
start_epoch = load_model(model_load_path) # when we want to load a model
# Init Logger
logger = Logger(model_name, generator, discriminator, gen_opti, dis_opti, model_parameter, train_loader)
loss = nn.BCELoss()
images, labels = next(iter(train_loader)) # For logging
# For testing
# pred = generator(get_noise(model_parameter["batch_size"]).to(device), get_hot_vector_encode(get_rand_labels(model_parameter["batch_size"]), device))
# dis = discriminator(images.to(device), get_hot_vector_encode(labels, device))
logger.log_graph(get_noise(model_parameter["batch_size"]).to(device), images.to(device),
get_hot_vector_encode(get_rand_labels(model_parameter["batch_size"]), device),
get_hot_vector_encode(labels, device))
# Array to store
exp_replay = torch.tensor([]).to(device)
for num_epoch in range(start_epoch, model_parameter["Epochs"]):
for batch_num, data_loader in enumerate(train_loader):
images, labels = data_loader
images = add_noise_to_image(images, device) # Add noise to the images
# 1. Train Discriminator
dis_error = train_discriminator(
dis_opti,
get_noise(model_parameter["batch_size"]).to(device),
images.to(device),
get_fake_data_target(model_parameter["batch_size"]).to(device),
get_real_data_target(model_parameter["batch_size"]).to(device),
get_hot_vector_encode(labels, device),
get_hot_vector_encode(
get_rand_labels(model_parameter["batch_size"]), device),
device
)
# 2. Train Generator
fake_image, pred_dis_fake, gen_error = train_generator(
gen_opti,
get_noise(model_parameter["batch_size"]).to(device),
get_real_data_target(model_parameter["batch_size"]).to(device),
get_hot_vector_encode(
get_rand_labels(model_parameter["batch_size"]),
device),
device
)
# Store a random point for experience replay
perm = torch.randperm(fake_image.size(0))
r_idx = perm[:max(1, int(model_parameter["batch_size"] / r_frequent))]
r_samples = add_noise_to_image(fake_image[r_idx], device)
exp_replay = torch.cat((exp_replay, r_samples), 0).detach()
if exp_replay.size(0) >= model_parameter["batch_size"]:
# Train on experienced data
dis_opti.zero_grad()
r_label = get_hot_vector_encode(torch.zeros(exp_replay.size(0)).numpy(), device)
pred_dis_real = discriminator(exp_replay, r_label)
error_real = loss(pred_dis_real, get_fake_data_target(exp_replay.size(0)).to(device))
error_real.backward()
dis_opti.step()
print(f'Epoch: [{num_epoch}/{model_parameter["Epochs"]}] '
f'Batch: Replay/Experience batch '
f'Loss_D: {error_real.data.cpu()}, '
)
exp_replay = torch.tensor([]).to(device)
logger.display_stats(epoch=num_epoch, batch_num=batch_num, dis_error=dis_error, gen_error=gen_error)
if batch_num % 100 == 0:
logger.log_image(fake_image[:sample_save_size], num_epoch, batch_num)
logger.log(num_epoch, dis_error, gen_error)
if num_epoch % num_epoch_log == 0:
logger.log_model(num_epoch)
logger.log_histogramm()
logger.close(logger, fake_image[:sample_save_size], num_epoch, dis_error, gen_error)
First link to my Code (Pastebin)
Second link to my Code (0bin)
Conclusion:
Since I implemented all these things (e.g. label smoothing) which are considered beneficial to a GAN/DCGAN.
And my Model still performs worse than the Tutorial DCGAN from PyTorch I think I might have a bug in my code but I can't seem to find it.
Reproducibility:
You should be able to just copy the code and run it if you have the libraries that I imported installed to look for yourself if you can find anything.
I appreciate any feedback.

So I solved this issue a while ago, but forgot to post an answer on stack overflow. So I will simply post my code here which should work probably pretty good.
Some disclaimer:
I am not quite sure if it works since I did this a year ago
its for 128x128px Images MNIST
It's not a vanilla GAN I used various optimization techniques
If you want to use it you need to change various details, such as the training dataset
Resources:
Multi-Scale Gradients
Instance Noise
Various tricks I used
More tricks
``
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning import loggers
from numpy.random import choice
import os
from pathlib import Path
import shutil
from collections import OrderedDict
# custom weights initialization called on netG and netD
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
# randomly flip some labels
def noisy_labels(y, p_flip=0.05): # # flip labels with 5% probability
# determine the number of labels to flip
n_select = int(p_flip * y.shape[0])
# choose labels to flip
flip_ix = choice([i for i in range(y.shape[0])], size=n_select)
# invert the labels in place
y[flip_ix] = 1 - y[flip_ix]
return y
class AddGaussianNoise(object):
def __init__(self, mean=0.0, std=0.1):
self.std = std
self.mean = mean
def __call__(self, tensor):
tensor = tensor.cuda()
return tensor + (torch.randn(tensor.size()) * self.std + self.mean).cuda()
def __repr__(self):
return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
def resize2d(img, size):
return (F.adaptive_avg_pool2d(img, size).data).cuda()
def get_valid_labels(img):
return ((0.8 - 1.1) * torch.rand(img.shape[0], 1, 1, 1) + 1.1).cuda() # soft labels
def get_unvalid_labels(img):
return (noisy_labels((0.0 - 0.3) * torch.rand(img.shape[0], 1, 1, 1) + 0.3)).cuda() # soft labels
class Generator(pl.LightningModule):
def __init__(self, ngf, nc, latent_dim):
super(Generator, self).__init__()
self.ngf = ngf
self.latent_dim = latent_dim
self.nc = nc
self.fc0 = nn.Sequential(
# input is Z, going into a convolution
nn.utils.spectral_norm(nn.ConvTranspose2d(latent_dim, ngf * 16, 4, 1, 0, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf * 16)
)
self.fc1 = nn.Sequential(
# state size. (ngf*8) x 4 x 4
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf * 16, ngf * 8, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf * 8)
)
self.fc2 = nn.Sequential(
# state size. (ngf*4) x 8 x 8
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf * 4)
)
self.fc3 = nn.Sequential(
# state size. (ngf*2) x 16 x 16
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf * 2)
)
self.fc4 = nn.Sequential(
# state size. (ngf) x 32 x 32
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ngf)
)
self.fc5 = nn.Sequential(
# state size. (nc) x 64 x 64
nn.utils.spectral_norm(nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False)),
nn.Tanh()
)
# state size. (nc) x 128 x 128
# For Multi-Scale Gradient
# Converting the intermediate layers into images
self.fc0_r = nn.Conv2d(ngf * 16, self.nc, 1)
self.fc1_r = nn.Conv2d(ngf * 8, self.nc, 1)
self.fc2_r = nn.Conv2d(ngf * 4, self.nc, 1)
self.fc3_r = nn.Conv2d(ngf * 2, self.nc, 1)
self.fc4_r = nn.Conv2d(ngf, self.nc, 1)
def forward(self, input):
x_0 = self.fc0(input)
x_1 = self.fc1(x_0)
x_2 = self.fc2(x_1)
x_3 = self.fc3(x_2)
x_4 = self.fc4(x_3)
x_5 = self.fc5(x_4)
# For Multi-Scale Gradient
# Converting the intermediate layers into images
x_0_r = self.fc0_r(x_0)
x_1_r = self.fc1_r(x_1)
x_2_r = self.fc2_r(x_2)
x_3_r = self.fc3_r(x_3)
x_4_r = self.fc4_r(x_4)
return x_5, x_0_r, x_1_r, x_2_r, x_3_r, x_4_r
class Discriminator(pl.LightningModule):
def __init__(self, ndf, nc):
super(Discriminator, self).__init__()
self.nc = nc
self.ndf = ndf
self.fc0 = nn.Sequential(
# input is (nc) x 128 x 128
nn.utils.spectral_norm(nn.Conv2d(nc, ndf, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True)
)
self.fc1 = nn.Sequential(
# state size. (ndf) x 64 x 64
nn.utils.spectral_norm(nn.Conv2d(ndf + nc, ndf * 2, 4, 2, 1, bias=False)),
# "+ nc" because of multi scale gradient
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ndf * 2)
)
self.fc2 = nn.Sequential(
# state size. (ndf*2) x 32 x 32
nn.utils.spectral_norm(nn.Conv2d(ndf * 2 + nc, ndf * 4, 4, 2, 1, bias=False)),
# "+ nc" because of multi scale gradient
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ndf * 4)
)
self.fc3 = nn.Sequential(
# state size. (ndf*4) x 16 x 16e
nn.utils.spectral_norm(nn.Conv2d(ndf * 4 + nc, ndf * 8, 4, 2, 1, bias=False)),
# "+ nc" because of multi scale gradient
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ndf * 8),
)
self.fc4 = nn.Sequential(
# state size. (ndf*8) x 8 x 8
nn.utils.spectral_norm(nn.Conv2d(ndf * 8 + nc, ndf * 16, 4, 2, 1, bias=False)),
nn.LeakyReLU(0.2, inplace=True),
nn.BatchNorm2d(ndf * 16)
)
self.fc5 = nn.Sequential(
# state size. (ndf*8) x 4 x 4
nn.utils.spectral_norm(nn.Conv2d(ndf * 16 + nc, 1, 4, 1, 0, bias=False)),
nn.Sigmoid()
)
# state size. 1 x 1 x 1
def forward(self, input, detach_or_not):
# When we train i ncombination with generator we use multi scale gradient.
x, x_0_r, x_1_r, x_2_r, x_3_r, x_4_r = input
if detach_or_not:
x = x.detach()
x_0 = self.fc0(x)
x_0 = torch.cat((x_0, x_4_r), dim=1) # Concat Multi-Scale Gradient
x_1 = self.fc1(x_0)
x_1 = torch.cat((x_1, x_3_r), dim=1) # Concat Multi-Scale Gradient
x_2 = self.fc2(x_1)
x_2 = torch.cat((x_2, x_2_r), dim=1) # Concat Multi-Scale Gradient
x_3 = self.fc3(x_2)
x_3 = torch.cat((x_3, x_1_r), dim=1) # Concat Multi-Scale Gradient
x_4 = self.fc4(x_3)
x_4 = torch.cat((x_4, x_0_r), dim=1) # Concat Multi-Scale Gradient
x_5 = self.fc5(x_4)
return x_5
class DCGAN(pl.LightningModule):
def __init__(self, hparams, checkpoint_folder, experiment_name):
super().__init__()
self.hparams = hparams
self.checkpoint_folder = checkpoint_folder
self.experiment_name = experiment_name
# networks
self.generator = Generator(ngf=hparams.ngf, nc=hparams.nc, latent_dim=hparams.latent_dim)
self.discriminator = Discriminator(ndf=hparams.ndf, nc=hparams.nc)
self.generator.apply(weights_init)
self.discriminator.apply(weights_init)
# cache for generated images
self.generated_imgs = None
self.last_imgs = None
# For experience replay
self.exp_replay_dis = torch.tensor([])
def forward(self, z):
return self.generator(z)
def adversarial_loss(self, y_hat, y):
return F.binary_cross_entropy(y_hat, y)
def training_step(self, batch, batch_nb, optimizer_idx):
# For adding Instance noise for more visit: https://www.inference.vc/instance-noise-a-trick-for-stabilising-gan-training/
std_gaussian = max(0, self.hparams.level_of_noise - (
(self.hparams.level_of_noise * 2) * (self.current_epoch / self.hparams.epochs)))
AddGaussianNoiseInst = AddGaussianNoise(std=std_gaussian) # the noise decays over time
imgs, _ = batch
imgs = AddGaussianNoiseInst(imgs) # Adding instance noise to real images
self.last_imgs = imgs
# train generator
if optimizer_idx == 0:
# sample noise
z = torch.randn(imgs.shape[0], self.hparams.latent_dim, 1, 1).cuda()
# generate images
self.generated_imgs = self(z)
# ground truth result (ie: all fake)
g_loss = self.adversarial_loss(self.discriminator(self.generated_imgs, False), get_valid_labels(self.generated_imgs[0])) # adversarial loss is binary cross-entropy; [0] is the image of the last layer
tqdm_dict = {'g_loss': g_loss}
log = {'g_loss': g_loss, "std_gaussian": std_gaussian}
output = OrderedDict({
'loss': g_loss,
'progress_bar': tqdm_dict,
'log': log
})
return output
# train discriminator
if optimizer_idx == 1:
# Measure discriminator's ability to classify real from generated samples
# how well can it label as real?
real_loss = self.adversarial_loss(
self.discriminator([imgs, resize2d(imgs, 4), resize2d(imgs, 8), resize2d(imgs, 16), resize2d(imgs, 32), resize2d(imgs, 64)],
False), get_valid_labels(imgs))
fake_loss = self.adversarial_loss(self.discriminator(self.generated_imgs, True), get_unvalid_labels(
self.generated_imgs[0])) # how well can it label as fake?; [0] is the image of the last layer
# discriminator loss is the average of these
d_loss = (real_loss + fake_loss) / 2
tqdm_dict = {'d_loss': d_loss}
log = {'d_loss': d_loss, "std_gaussian": std_gaussian}
output = OrderedDict({
'loss': d_loss,
'progress_bar': tqdm_dict,
'log': log
})
return output
def configure_optimizers(self):
lr_gen = self.hparams.lr_gen
lr_dis = self.hparams.lr_dis
b1 = self.hparams.b1
b2 = self.hparams.b2
opt_g = torch.optim.Adam(self.generator.parameters(), lr=lr_gen, betas=(b1, b2))
opt_d = torch.optim.Adam(self.discriminator.parameters(), lr=lr_dis, betas=(b1, b2))
return [opt_g, opt_d], []
def backward(self, trainer, loss, optimizer, optimizer_idx: int) -> None:
loss.backward(retain_graph=True)
def train_dataloader(self):
# transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
# transforms.ToTensor(),
# transforms.Normalize([0.5], [0.5])])
# dataset = torchvision.datasets.MNIST(os.getcwd(), train=False, download=True, transform=transform)
# return DataLoader(dataset, batch_size=self.hparams.batch_size)
# transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
# transforms.ToTensor(),
# transforms.Normalize([0.5], [0.5])
# ])
# train_dataset = torchvision.datasets.ImageFolder(
# root="./drive/My Drive/datasets/flower_dataset/",
# # root="./drive/My Drive/datasets/ghibli_dataset_small_overfit/",
# transform=transform
# )
# return DataLoader(train_dataset, num_workers=self.hparams.num_workers, shuffle=True,
# batch_size=self.hparams.batch_size)
transform = transforms.Compose([transforms.Resize((self.hparams.image_size, self.hparams.image_size)),
transforms.ToTensor(),
transforms.Normalize([0.5], [0.5])
])
train_dataset = torchvision.datasets.ImageFolder(
root="ghibli_dataset_small_overfit/",
transform=transform
)
return DataLoader(train_dataset, num_workers=self.hparams.num_workers, shuffle=True,
batch_size=self.hparams.batch_size)
def on_epoch_end(self):
z = torch.randn(4, self.hparams.latent_dim, 1, 1).cuda()
# match gpu device (or keep as cpu)
if self.on_gpu:
z = z.cuda(self.last_imgs.device.index)
# log sampled images
sample_imgs = self.generator(z)[0]
torchvision.utils.save_image(sample_imgs, f'generated_images_epoch{self.current_epoch}.png')
# save model
if self.current_epoch % self.hparams.save_model_every_epoch == 0:
trainer.save_checkpoint(
self.checkpoint_folder + "/" + self.experiment_name + "_epoch_" + str(self.current_epoch) + ".ckpt")
from argparse import Namespace
args = {
'batch_size': 128, # batch size
'lr_gen': 0.0003, # TTUR;learnin rate of both networks; tested value: 0.0002
'lr_dis': 0.0003, # TTUR;learnin rate of both networks; tested value: 0.0002
'b1': 0.5, # Momentum for adam; tested value(dcgan paper): 0.5
'b2': 0.999, # Momentum for adam; tested value(dcgan paper): 0.999
'latent_dim': 256, # tested value which worked(in V4_1): 100
'nc': 3, # number of color channels
'ndf': 8, # number of discriminator features
'ngf': 8, # number of generator features
'epochs': 4, # the maxima lamount of epochs the algorith should run
'save_model_every_epoch': 1, # how often we save our model
'image_size': 128, # size of the image
'num_workers': 3,
'level_of_noise': 0.1, # how much instance noise we introduce(std; tested value: 0.15 and 0.1
'experience_save_per_batch': 1, # this value should be very low; tested value which works: 1
'experience_batch_size': 50 # this value shouldnt be too high; tested value which works: 50
}
hparams = Namespace(**args)
# Parameters
experiment_name = "DCGAN_6_2_MNIST_128px"
dataset_name = "mnist"
checkpoint_folder = "DCGAN/"
tags = ["DCGAN", "128x128"]
dirpath = Path(checkpoint_folder)
# defining net
net = DCGAN(hparams, checkpoint_folder, experiment_name)
torch.autograd.set_detect_anomaly(True)
trainer = pl.Trainer( # resume_from_checkpoint="DCGAN_V4_2_GHIBLI_epoch_999.ckpt",
max_epochs=args["epochs"],
gpus=1
)
trainer.fit(net)
``

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

saving model in pytorch with weight decay - python

Related

Pytorch optimizer returns empty

About the memory usage of Mobilenet

PyTorch NotImplementedError

How to get confidence score from a trained pytorch model

DCGAN debugging. Getting just garbage

Categories

Resources