I’m dealing with a strange issue where the gradients after backward pass have different shapes depending on whether CUDA or CPU is used. The model used is relatively simple:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
self.relu1 = nn.ReLU()
self.relu2 = nn.ReLU()
self.relu3 = nn.ReLU()
self.relu4 = nn.ReLU()
def forward(self, x):
x = self.pool1(self.relu1(self.conv1(x)))
x = self.pool2(self.relu2(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = self.relu3(self.fc1(x))
x = self.relu4(self.fc2(x))
x = self.fc3(x)
return x
The input tensor has shape (1, 3, 32, 32), and the relevant section of code is as follows, with the method generate_gradients being of particular importance:
class VanillaBackprop():
"""
Produces gradients generated with vanilla back propagation from the image
"""
def __init__(self, model):
self.model = model
self.gradients = None
# Put model in evaluation mode
self.model.eval()
# Hook the first layer to get the gradient
self.hook_layers()
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model.to(self.device)
def hook_layers(self):
def hook_function(module, grad_in, grad_out):
self.gradients = grad_in[0]
# Register hook to the first layer
try:
first_layer = list(self.model.features._modules.items())[0][1]
except:
first_layer = list(self.model._modules.items())[0][1]
first_layer.register_backward_hook(hook_function)
def generate_gradients(self, input_image, target_class):
# Forward
model_output = self.model(input_image.to(self.device))
# Zero grads
self.model.zero_grad()
# Target for backprop
one_hot_output = torch.FloatTensor(1, model_output.size()[-1]).zero_()
one_hot_output[0][target_class] = 1
# Backward pass
model_output.backward(gradient=one_hot_output.to(self.device))
# Convert Pytorch variable to numpy array
gradients_as_arr = self.gradients.data.cpu().numpy()[0]
return gradients_as_arr
When on CPU, self.gradients has shape (1, 3, 32, 32), while on CUDA it has shape (1, 6, 28, 28). How is that possible, and how do I fix this? Any help is much appreciated.
It looks like the issue stems from the register_backward_hook() function. As pointed out in the PyTorch forums:
You might want to double check the register_backward_hook() doc. But
it is known to be kind of broken at the moment and can have this
behavior.
I would recommend you use autograd.grad() for this though. That will
make it simpler than backward+access to the .grad field.
I, however, opted to use register_hook() instead of register_backward_hook() (as opposed to autograd.grad() as suggested), which seems to work as well:
class VanillaBackprop():
"""
Produces gradients generated with vanilla back propagation from the image
"""
def __init__(self, model):
self.model = model
self.gradients = None
# Put model in evaluation mode
self.model.eval()
# Hook the first layer to get the gradient
def hook_input(self, input_tensor):
def hook_function(grad_in):
self.gradients = grad_in
input_tensor.register_hook(hook_function)
def generate_gradients(self, input_image, target_class):
# Register input hook
self.hook_input(input_image)
# Forward
model_output = self.model(input_image)
# Zero grads
self.model.zero_grad()
# Target for backprop
device = next(self.model.parameters()).device
one_hot_output = torch.FloatTensor(1, model_output.size()[-1]).zero_()
one_hot_output[0][target_class] = 1
one_hot_output = one_hot_output.to(device)
# Backward pass
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_output.backward(gradient=one_hot_output.to(device))
# Convert Pytorch variable to numpy array
# [0] to get rid of the first channel (1,3,224,224)
gradients_as_arr = self.gradients.data.cpu().numpy()[0]
return gradients_as_arr
Related
I want to add a linear layer after an encoder in VAE to get a smaller latent space of a group of data, but the loss returns nan.
This is the simple linear layer I want to add between the encoder and decoder of VAE.
class FC_en(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(2429*32, 64)
self.BN1 = nn.BatchNorm1d(64)
def forward(self, x):
z_loc = self.BN1(self.fc1(x))
return z_loc
class FC_de(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(64,2429*32)
self.BN1 = nn.BatchNorm1d(2429*32)
def forward(self, z):
x = self.BN1(self.fc1(z))
return x
The code followed is the VAE model
class VAE(nn.Module):
def __init__(self, z_dim=16, hidden_dim=1000, use_cuda=True):
super().__init__()
# create the encoder and decoder networks
self.encoder = Encoder(z_dim, hidden_dim)
self.decoder = Decoder(z_dim, hidden_dim)
self.fc3 = FC_en()
self.fc4 = FC_de()
if use_cuda:
# calling cuda() here will put all the parameters of
# the encoder and decoder networks into gpu memory
self.cuda()
self.use_cuda = use_cuda
self.z_dim = z_dim
# define the model p(x|z)p(z)
def model(self, x):
# register PyTorch module `decoder` with Pyro
pyro.module("decoder", self.decoder)
with pyro.plate("data", x.shape[0]):
# setup hyperparameters for prior p(z)
z_loc = x.new_zeros(torch.Size((x.shape[0], self.z_dim)))
z_scale = x.new_ones(torch.Size((x.shape[0], self.z_dim)))
# sample from prior (value will be sampled by guide when computing the ELBO)
z = pyro.sample("latent", dist.Normal(z_loc, z_scale).to_event(1))
# decode the latent code z
loc_img = self.decoder(z)
loc_img = loc_img.reshape(-1,200*200)
pyro.sample("obs", dist.Bernoulli(loc_img).to_event(1), obs=x.reshape(-1, 200*200))
# define the guide (i.e. variational distribution) q(z|x)
def guide(self, x):
# register PyTorch module `encoder` with Pyro
pyro.module("encoder", self.encoder)
with pyro.plate("data", x.shape[0]):
# use the encoder to get the parameters used to define q(z|x)
z_loc, z_scale = self.encoder(x)
z_sum = torch.cat((z_loc,z_scale),1)
z_sum = z_sum.view(2, 2429*32)
z_sum_z = self.fc3(z_sum)
loc_img = self.fc4(z_sum_z)
loc_img = loc_img.reshape(2429*2, 32)
z_loc = loc_img[:, 0:16]
z_scale = loc_img[:, 16:32]
# sample the latent code z
pyro.sample("latent", dist.Normal(z_loc, z_scale).to_event(1))
Does my idea have some problems? how to avoid the nan loss in the VAE model?
I'm try to modify Inception v3 pre trained in pytorch to have a multi-input. ( 4 output precisely).
I get this error:
Expected 4-dimensional input for 4-dimensional weight [192, 768, 1, 1], but got 2-dimensional input of size [50, 1000] instead
My input shape is : torch.Size([50, 3, 299, 299])
This is the code for my model,
class CNN1(nn.Module):
def __init__(self, pretrained):
super(CNN1, self).__init__()
if pretrained is True:
self.model = models.inception_v3(pretrained=True)
modules = list(self.model.children())[:-1] # delete the last fc layer.
self.features = nn.Sequential(*modules)
self.fc0 = nn.Linear(2048, 10) #digit 0
self.fc1 = nn.Linear(2048, 10) #digit 1
self.fc2 = nn.Linear(2048, 10) #digit 2
self.fc3 = nn.Linear(2048, 10) #digit 3
def forward(self, x):
bs, _, _, _ = x.shape
x = self.features(x)
x = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)
label0 = self.fc0(x)
label1 = self.fc1(x)
label2= self.fc2(x)
label3= self.fc3(x)
return {'label0': label0, 'label1': label1,'label2':label2, 'label3': label3}
and this is a piece of iteration:
for batch_idx, sample_batched in enumerate(train_dataloader):
# importing data and moving to GPU
image,label0, label1, label2, label3 = sample_batched['image'].to(device),\
sample_batched['label0'].to(device),\
sample_batched['label1'].to(device),\
sample_batched['label2'].to(device) ,\
sample_batched['label3'].to(device)
# zero the parameter gradients
optimizer.zero_grad()
output=model(image.float())
anyone have a suggestion?
One way to remove layers of a PyTorch model is to use nn.Identity() layer. I think you want to remove the last fully connected layer. If so check this:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
class CNN1(nn.Module):
def __init__(self, pretrained):
super(CNN1, self).__init__()
if pretrained is True:
self.model = models.inception_v3(pretrained=True)
else:
self.model = models.inception_v3(pretrained=False)
# modules = list(self.model.children())[:-1]
# delete the last fc layer.
self.model.fc = nn.Identity()
# # to freeze training of inception weights
# for param in self.model.parameters():
# param.requires_grad = False
self.fc0 = nn.Linear(2048, 10)
self.fc1 = nn.Linear(2048, 10)
self.fc2 = nn.Linear(2048, 10)
self.fc3 = nn.Linear(2048, 10)
def forward(self, x):
bs, _, _, _ = x.shape
x, aux_x = self.model(x)
# x = F.adaptive_avg_pool2d(x, 1).reshape(bs, -1)
label0 = self.fc0(x)
label1 = self.fc1(x)
label2= self.fc2(x)
label3= self.fc3(x)
return {'label0': label0, 'label1': label1,'label2':label2, 'label3': label3}
if __name__ == '__main__':
net = CNN1(True)
print(net)
inp = torch.randn(50, 3, 299, 299)
out = net(inp)
print('label0 shape:', out['label0'].shape)
Note: if you want to freeze training of inception layers set requires_grad = False for each parameter
In you code you are assuming all the layer connections are sequential by using nn.Sequential(*modules) line, may be that is causing the error.
I have a trained PyTorch model and I want to get the confidence score of predictions in range (0-100) or (0-1). The code below is giving me a score but its range is undefined. I want the score in a defined range of (0-1) or (0-100). Any idea how to get this?
conf, classes = torch.max(output, 1)
My code:
model = torch.load(r'best.pt')
model.eval()
def preprocess(imgs):
im = torch.from_numpy(imgs)
im = im.float() # uint8 to fp16/32
im /= 255.0
return im
img_path = cv2.imread("/content/634282.jpg",0)
cropped = cv2.resize(img_path,(28,28))
imgs = preprocess(np.array([[cropped]]))
def predict_allCharacters(imgs):
output = model(imgs)
conf, classes = torch.max(output, 1)
class_names = '0123456789'
return conf, class_names[classes.item()]
Model definition:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
# fully connected layer, output 10 classes
self.out = nn.Linear(32 * 7 * 7, 37)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# flatten the output of conv2 to (batch_size, 32 * 7 * 7)
x = x.view(x.size(0), -1)
output = self.out(x)
return output # return x for visualization
In your case, output represents the logits. One way of getting a probability out of them is to use the Softmax function. As it seems that output contains the outputs from a batch, not a single sample, you can do something like this:
probs = torch.nn.functional.softmax(output, dim=1)
Then, in probs, each row would have the probability (i.e., in range [0, 1], sum=1) of each class for a given sample.
So, your predict_allCharacters could be modified to:
def predict_allCharacters(imgs):
output = model(imgs)
probs = torch.nn.functional.softmax(output, dim=1)
conf, classes = torch.max(probs, 1)
class_names = '0123456789'
return conf, class_names[classes.item()]
I have a GAN which I am using to generate plasma image data. My mentor has asked me to be able to change the number of layers in my GAN via cmd line argument. With that being said, anytime I use more than 4 or 5 blocks, my generated images are all almost identical, some batches just having more noise than others. And with larger amounts of layers my images are more likely to just come out wrong(sometimes completely inverted colors). When this happens the model isn’t changing at all even when there is a loss. How can I prevent this mode collapse and why is it happening? My best guess is that all the data has an area with similar values and the GAN is just guessing that image over and over
Helpful details: Using 16000 images for training, have tried adding noise to input data, been trying to train for 100 or 200 epochs, also tried with a wasserstein loss function, seemed to have the same issue but its possible I implemented it wrong
class Block(nn.Module):
def __init__(self, in_feat, out_feat, normalize=True):
super(Block, self).__init__()
self.layers = nn.ModuleList()
self.layers.append(nn.Linear(in_feat, out_feat))
if normalize:
self.layers.append(nn.BatchNorm1d(out_feat, 0.8))
self.layers.append(nn.LeakyReLU(0.2, inplace=True))
def forward(self, z):
img = z
for layer in self.layers:
img = layer(img)
return img
class Generator(nn.Module):
def __init__(self):
super(Generator, self).__init__()
spacing = (1024 - 128)/opt.layers
self.block_modules = nn.ModuleList()
self.block_modules.append(Block(opt.latent_dim, 128, normalize=False))
current_size = 128
prev_size = current_size
for i in range(1, opt.layers):
current_size = 128 + (i * spacing)
self.block_modules.append(Block(int(prev_size),int(current_size)))
prev_size = current_size
self.block_modules.append(nn.Linear(int(current_size), int(np.prod(img_shape))))
self.block_modules.append(nn.Tanh())
def forward(self, z):
img = z
for layer in self.block_modules:
img = layer(img)
img = img.view(img.size(0), *img_shape)
return img
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
self.model = nn.Sequential(
nn.Linear(int(np.prod(img_shape)), 512),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(512, 256),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(256, 128),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(128, 64),
nn.LeakyReLU(0.2, inplace=True),
nn.Linear(64, 1),
nn.Sigmoid(),
)
def forward(self, img):
img_flat = img.view(img.size(0), -1)
validity = self.model(img_flat)
return validity
adversarial_loss = torch.nn.BCELoss()
generator = Generator()
discriminator = Discriminator()
optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
for epoch in range(opt.n_epochs):
for i, (imgs, _) in enumerate(dataloader):
if opt.noise is not None:
ns = torch.normal(mean=0.0, std=imgs.detach()*opt.noise)
imgs = imgs + ns
valid = Variable(Tensor(imgs.size(0), 1).fill_(1.0), requires_grad=False)
fake = Variable(Tensor(imgs.size(0), 1).fill_(0.0), requires_grad=False)
# Configure input
real_imgs = Variable(imgs.type(Tensor))
optimizer_G.zero_grad()
# Sample noise as generator input
z = Variable(Tensor(np.random.normal(0, 1, (imgs.shape[0], opt.latent_dim))))
# Generate a batch of images
gen_imgs = generator(z)
g_loss = adversarial_loss(discriminator(gen_imgs), valid)
g_loss.backward()
optimizer_G.step()
# Train Discriminator
optimizer_D.zero_grad()
# Measure discriminator's ability to classify real from generated samples
real_loss = adversarial_loss(discriminator(real_imgs), valid)
fake_loss = adversarial_loss(discriminator(gen_imgs.detach()), fake)
d_loss = (real_loss + fake_loss) / 2
d_loss.backward()
optimizer_D.step()
Input: Set of ten "Vowels", set of ten "Consonents", Image dataset where in every image both, one vowel and one consonent, are written.
Task: To identify the vowel and consonent from given image.
Approach: First apply CNN hidden layers on image, then apply two parallel fully connected/dense layers where one will classify vowel in image and other will classify consonent in image.
Problem: I am taking Pretrained Model like VGG or GoogleNet. How to modify that pretrained model to apply two parallel dense layers and return two outputs.
I have tried two different models but my query is can we modify pretrained model for this task.
Right now my model is having only one "fc" layer. I have modified number of neurons in final "fc" layer, like this
final_in_features = googlenet.fc.in_features
googlenet.fc = nn.Linear(final_in_features, 10)
But I need to add one more fc layer so that both "fc" layers are connecting with hidden layers parallel.
Right now model is returning only one output.
outputs1 = googlenet(inputs)
Task is to return two outputs from both "fc" layers, so that it should be look like this
outputs1, outputs2 = googlenet(inputs)
Here is the source for a Linear Layer in Pytorch :
class Linear(Module):
r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`
Args:
in_features: size of each input sample
out_features: size of each output sample
bias: If set to ``False``, the layer will not learn an additive bias.
Default: ``True``
Shape:
- Input: :math:`(N, *, H_{in})` where :math:`*` means any number of
additional dimensions and :math:`H_{in} = \text{in\_features}`
- Output: :math:`(N, *, H_{out})` where all but the last dimension
are the same shape as the input and :math:`H_{out} = \text{out\_features}`.
Attributes:
weight: the learnable weights of the module of shape
:math:`(\text{out\_features}, \text{in\_features})`. The values are
initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
:math:`k = \frac{1}{\text{in\_features}}`
bias: the learnable bias of the module of shape :math:`(\text{out\_features})`.
If :attr:`bias` is ``True``, the values are initialized from
:math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
:math:`k = \frac{1}{\text{in\_features}}`
Examples::
>>> m = nn.Linear(20, 30)
>>> input = torch.randn(128, 20)
>>> output = m(input)
>>> print(output.size())
torch.Size([128, 30])
"""
__constants__ = ['bias']
def __init__(self, in_features, out_features, bias=True):
super(Linear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.weight = Parameter(torch.Tensor(out_features, in_features))
if bias:
self.bias = Parameter(torch.Tensor(out_features))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
init.kaiming_uniform_(self.weight, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
bound = 1 / math.sqrt(fan_in)
init.uniform_(self.bias, -bound, bound)
#weak_script_method
def forward(self, input):
return F.linear(input, self.weight, self.bias)
def extra_repr(self):
return 'in_features={}, out_features={}, bias={}'.format(
self.in_features, self.out_features, self.bias is not None
)
You can create a class DoubleLinear like this :
class DoubleLinear(Module):
def __init__(self, Linear1, Linear2):
self.Linear1 = Linear1
self.Linear2 = Linear2
#weak_script_method
def forward(self, input):
return self.Linear1(input), self.Linear2(input)
Then, create your two Linear layers :
Linear_vow = nn.Linear(final_in_features, 10)
Linear_con = nn.Linear(final_in_features, 10)
final_layer = DoubleLinear(Linear_vow, Linear_con)
now outputs1, outputs2 = final_layer(inputs) will work as expected.
class DoubleLinear(torch.nn.Module):
def __init__(self, Linear1, Linear2):
super(DoubleLinear, self).__init__()
self.Linear1 = Linear1
self.Linear2 = Linear2
def forward(self, input):
return self.Linear1(input), self.Linear2(input)
in_features = model._fc.in_features
Linear_first = nn.Linear(in_features, 10)
Linear_second = nn.Linear(in_features, 5)
model._fc = DoubleLinear(Linear_first, Linear_second)
i have used resnet as my pretrained model from torchvision.models
i am nullifying the fc layer using nn.Identity()
class MyModel(nn.Module):
def __init__(self, num_classes1, num_classes2):
super(MyModel, self).__init__()
self.model_resnet = models.resnet18(pretrained=True)
num_ftrs = self.model_resnet.fc.in_features
self.model_resnet.fc = nn.Identity()
self.fc1 = nn.Linear(num_ftrs, num_classes1)
self.fc2 = nn.Linear(num_ftrs, num_classes2)
def forward(self, x):
x = self.model_resnet(x)
out1 = self.fc1(x)
out2 = self.fc2(x)
return out1, out2