Inplace operation error in control problem - python

I'm new to pytorch and I'm having a problem with some code to train a a neural network to solve a control problem. I use the following code to solve a toy version of my problem:
# SOME IMPORTS
import torch
import torch.autograd as autograd
from torch import Tensor
import torch.nn as nn
import torch.optim as optim
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# PARAMETERS OF THE PROBLEM
layers = [4, 32, 32, 4] # Layers of the NN
steps = 10000 # Simulation steps
train_step = 1 # I train the NN for 1 epoch every train_step steps
lr = 1e-3 # Learning rate
After this I define a very simple network:
# DEFINITION OF THE NETWORK (A SIMPLE FEED FORWARD)
class FCN(nn.Module):
def __init__(self,layers):
super(FCN, self).__init__() #call __init__ from parent class
self.linears = []
for i in range(len(layers)-2):
self.linears.append(
nn.Linear(layers[i], layers[i+1])
)
self.linears.append(
nn.ReLU()
)
self.linears.append(
nn.Linear(layers[-2], layers[-1])
)
self.linear_stack = nn.Sequential(*self.linears)
'forward pass'
def forward(self,x):
out = self.linear_stack(x)
return out
I then use the defined class to create my model:
model = FCN(layers)
model.to(device)
params = list(model.parameters())
optimizer = torch.optim.Adam(model.parameters(),lr=lr,amsgrad=False)
Then I define the loss function and the simulation function, i.e. the function that updates the state of my problem.
def simulate(state_old, model):
state_new = model(state_old)
return state_new
def lossNN(state_old,state_new, model):
error = torch.sum( (state_old-state_new)**2 )
return error
And finally I train my model:
torch.autograd.set_detect_anomaly(True)
state_old = torch.Tensor([0.01, 0.01, 0.5, 0.1]).to(device)
for i in range(steps):
state_new = simulate(state_old, model)
if i%train_step == 0:
optimizer.zero_grad()
loss = lossNN(state_old, state_new, model)
loss.backward(retain_graph=True)
optimizer.step()
state_old = state_new
if (i%1000)==0:
print(loss)
print(state_new)
I then get the following error. Here you can find the backtrace :
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [32, 4]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

You need to use detach to remove the gradient created in the previous state.
state_old = state_new
state_old = state_new.detach()
Then your training code changes to:
torch.autograd.set_detect_anomaly(True)
state_old = torch.Tensor([0.01, 0.01, 0.5, 0.1]).to(device)
for i in range(steps):
state_new = simulate(state_old, model)
if i%train_step == 0:
optimizer.zero_grad()
loss = lossNN(state_old, state_new, model)
loss.backward(retain_graph=True)
optimizer.step()
state_old = state_new.detach()
if (i%1000)==0:
print(loss)
print(state_new)

Related

I want to use `backward` multiple times in pytorch, but I get an error and cannot run the neural network successfully

I want to use backward multiple times in pytorch, but I get an error and cannot run the neural network successfully.
The error below is the reason why it does not work.
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation:
[torch.FloatTensor [32, 2]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead.
Hint: the backtrace further above shows the operation that failed to compute its gradient.
The variable in question was changed in there or anywhere later. Good luck!
I have tried to resolve this error by checking the following website to try and resolve the error, but without success. What should I do?
discuss.pytorch
discuss.pytorch
discuss.pytorch
discuss.pytorch
https://nieznanm.medium.com/runtimeerror-one-of-the-variables-needed-for-gradient-computation-has-been-modified-by-an-inplace-85d0d207623
from sklearn.utils.extmath import cartesian
import torch
import numpy as np
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from tqdm.notebook import trange
from torchvision import transforms
x_1 = np.arange(0, 2 + 1e-2, 1e-5)
x_2 = np.arange(0, 1 + 5e-2, 5e-2)
product_train = cartesian((x_1,x_2))
product_initial_condition0 = cartesian((np.array([0.]), x_2))
product_initial_condition1 = cartesian((np.array([2e-6]), x_2))
device = "cuda" if torch.cuda.is_available() else "cpu"
x1 = torch.tensor(product_train[:,0].astype(np.float32), requires_grad=True).to(device)
x2 = torch.tensor(product_train[:,1].astype(np.float32)).to(device)
initial_condition0 = torch.tensor(product_initial_condition0.astype(np.float32)).to(device)
initial_condition0_output = torch.tensor(product_initial_condition0[:,1].astype(np.float32)).to(device)
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear1 = nn.Linear(2, 32)
self.linear2 = nn.Linear(32, 2)
def forward(self, x1, x2):
inputs = torch.stack([x1,x2], dim=1)
x = self.linear1(inputs)
x = torch.sigmoid(x)
x = self.linear2(x)
return x
NN = Model()
device = "cuda" if torch.cuda.is_available() else "cpu"
NN.to(device)
def f(x1, x2, NN):
initial_conditon_Phi0 = NN(initial_condition0[:,0], initial_condition0[:,1])
loss1 = ms_erorr(initial_conditon_Phi0[:,0], initial_condition0_output)
loss2 = ms_erorr(initial_conditon_Phi0[:,1], initial_condition0_output)
return loss1, loss2
def train(NN, optimizer, iteration):
with trange(iteration) as t:
torch.autograd.set_detect_anomaly(True)
for i in t:
optimizer.zero_grad()
a1, a2 = f(x1, x2, NN)
a1.backward(retain_graph=True)
optimizer.step()
optimizer.zero_grad()
a2.backward()
optimizer.step()
t.set_postfix(a1=a1.item(), a2=a2.item())
return NN
iteration = 1000
optimizer = optim.Adam(NN.parameters(), lr=0.001)
ms_erorr = nn.MSELoss()
NN= train(NN, optimizer, iteration)
This neural network model has two inputs and two outputs; for each of the two outputs, I want to create a model that outputs an arbitrary value. However, there is a situation where I want to learn the values of the two outputs separately, instead of learning the values of the two outputs at the same time, so I want to be able to train the neural network successfully using backward() twice. How can I do this?
As far as I understand, you are updating the network (optimizer.step) before finishing back propagation. You need first backpropagate all the errors you've got on the previous step, and then do the update. This can be the source of an error.

How to implement batch normalization merging in python?

I have defined the model as in the code below, and I used batch normalization merging to make 3 layers into 1 linear layer.
The first layer of the model is a linear layer and there is no bias.
The second layer of the model is a batch normalization and there is no weight and bias ( affine is false )
The third layer of the model is a linear layer.
The variables named new_weight and new_bias are the weight and bias of the newly created linear layer, respectively.
My question is: Why is the output of the following two print functions different? And where is the wrong part in the code below the batch merge comment?
import torch
import torch.nn as nn
import torch.optim as optim
learning_rate = 0.01
in_nodes = 20
internal_nodes = 8
out_nodes = 9
batch_size = 100
# model define
class M(nn.Module):
def __init__(self):
super(M, self).__init__()
self.layer1 = nn.Linear(in_nodes, internal_nodes, bias=False)
self.layer2 = nn.BatchNorm1d(internal_nodes, affine=False)
self.layer3 = nn.Linear(internal_nodes, out_nodes)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
return x
# optimizer and criterion
model = M()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()
# training
for batch_num in range(1000):
model.train()
optimizer.zero_grad()
input = torch.randn(batch_size, in_nodes)
target = torch.ones(batch_size, out_nodes)
output = model(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()
# batch merge
divider = torch.sqrt(model.layer2.eps + model.layer2.running_var)
w_bn = torch.diag(torch.ones(internal_nodes) / divider)
new_weight = torch.mm(w_bn, model.layer1.weight)
new_weight = torch.mm(model.layer3.weight, new_weight)
b_bn = - model.layer2.running_mean / divider
new_bias = model.layer3.bias + torch.squeeze(torch.mm(model.layer3.weight, b_bn.reshape(-1, 1)))
input = torch.randn(batch_size, in_nodes)
print(model(input))
print(torch.t(torch.mm(new_weight, torch.t(input))) + new_bias)
Short Answer: As far as I can tell you need a model.eval() before the line
input = torch.randn(batch_size, in_nodes)
such that the end looks like this
...
model.eval()
input = torch.randn(batch_size, in_nodes)
test_input = torch.ones(batch_size,internal_nodes)/100
print(model(input))
print(torch.t(torch.mm(new_weight, torch.t(input))) + new_bias)
with that (I tested it) the two print-statements should output the same. It fixed the weights.
Long Answer:
When using Batch-Normalization according to PyTorch documentation a default momentum of 0.1 is used to compute the running_mean and running_var. The momentum defines how much the estimated statistics and how much the new observed value influence the value.
Now when you don't set a model.eval() statement the batch_normalization computes an updated running_mean and running_var due to the momentum in line
print(model(input))
For further details and or confirmation: Related Question, PyTorch-Documentation

Training a tensorflow model with an intermediate function call in training loop

I am trying to train a simple neural network where the input data is taken from a matlab simulink simulation and the output is then fed back into a different matlab simulink simulation. My code is as follows:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
def get_pid_values():
# call simulink model that just produces PID values
return random.random()
def get_plant(intermediate_val):
# get plant output.
return random.random()
class CustomDataGen(tf.keras.utils.Sequence):
def __init__(self, df, X_col,
batch_size,
input_size=(1,),
shuffle=True):
self.df = df.copy()
self.X_col = X_col
self.batch_size = batch_size
self.input_size = input_size
self.shuffle = shuffle
self.n = len(self.df)
def __get_input(self, index):
# Need to adjust this to support retrieving ref voltage.
return self.df[self.X_col].iloc[index]
def on_epoch_end(self):
if self.shuffle:
self.df = self.df.sample(frac=1).reset_index(drop=True)
def __getitem__(self, index):
X = self.__get_input(index)
return X
def __len__(self):
return self.n // self.batch_size
def get_model(input_shape, hidden, output_shape):
inputs = keras.layers.Input(shape=input_shape)
x = layers.Dense(hidden, activation="relu")(inputs)
x = layers.Dense(hidden, activation='relu')(x)
outputs = layers.Dense(output_shape)(x)
model = keras.Model(inputs=inputs, outputs=outputs, name="pid-modifier")
return model
loss_object = tf.keras.losses.MeanSquaredError()
def loss(y_ref, y_plant):
y_ = y_plant
y = y_ref
return loss_object(y_true=y, y_pred=y_)
if __name__ == "__main__":
# Hyperparameters
lr = 0.01
num_epochs = 1
hidden_size = 4
net_input_size = 1
net_output_size = 1
batch_size = 1
reference_fpath = "Run2_rThrottleTarget.csv"
references = pd.read_csv(reference_fpath)
data_generator = CustomDataGen(df=references, X_col='Throttle', batch_size=1)
# Keep results for plotting
train_loss_results = []
# Initialize optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
# error intitial condition
err = 0
# instantiate model
model = get_model(input_shape=(2,), hidden=hidden_size, output_shape=net_output_size)
for epoch in range(num_epochs):
for ref in data_generator:
with tf.GradientTape() as tape:
tape.watch(model.trainable_variables)
# Get pid values
pid = get_pid_values()
# Group ref with pid voltage for input
input = tf.constant([[ref, pid]])
# Get the adjusted voltage from the network
intermediate_val = model(input)
# Get the plant output based on the adjusted value.
plant = get_plant(intermediate_val)
plant = tf.constant([plant], dtype=tf.float64)
ref = tf.constant([ref], dtype=tf.float64)
# Calculate loss
loss_value = loss(ref, plant)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
err = ref - plant
if epoch % 50 == 0:
print("Epoch {:03d}: Loss: {:.3f}".format(epoch, loss_value))
fig, axes = plt.subplots(1, figsize=(12, 8))
fig.suptitle('Training Metrics')
axes[0].set_ylabel("Loss", fontsize=14)
axes[0].plot(train_loss_results)
plt.show()
For the moment I am just mocking the calls to simulink by returning a random number. My problem is that when I take the model output and then call the function that mocks a call to simulink and calculate my loss:
# Get the adjusted voltage from the network
intermediate_val = model(input)
# Get the plant output based on the adjusted value.
plant = get_plant(intermediate_val)
plant = tf.constant([plant], dtype=tf.float64)
ref = tf.constant([ref], dtype=tf.float64)
# Calculate loss
loss_value = loss(ref, plant)
I get the error ValueError: No gradients provided for any variable. I've figured out that if I pass the model's output directly to the loss function everything works fine. My question is how can I have the intermediate step of passing my model's output to another function and using the returned value to calculate loss?
a gradient exists between intermediate_val and model.trainable_variables as it is calculated by back propagation, the tape however cannot perform back-propagation on plant because it wasn't calculated by tensorflow, it's just a constant to it, it has no gradient.
since the model knows nothing about the relation between the loss and how it is generated, this becomes a case of reinforcement learning, which can be done using the tensorflow-agents module.
this is a tutorial about it on youtube Everything You Need To Master Actor Critic Methods | Tensorflow 2 Tutorial , it's about a certain network architecture but its gradient calculation method is exactly the same as your case, the code is easily adaptabe.

Fully Connected Neural Network not predicting correctly

I am a newbie in ML and DL but i decide to try out something but i found out that my network is not predicting correctly.
I have a fully connected neural network with just one dense (linear) layer and i used SGD as the optimizer and it predicted 9.9 instead of 10 but when i use Adam it predicted 10. expected result is 10, i'm confused can someone explain to me why is this so?
!pip install -Uqq tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm as tqdm
My training data as a sample
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)
My model or network for forward pass and neural net
class SimpleNeuralNetwork(nn.Module) :
def __init__(self, num_input, num_output):
super(SimpleNeuralNetwork, self).__init__()
self.fc = nn.Linear(num_input, num_output)
def forward(self, x):
x = self.fc(x)
return x
In features and batch
in_samples, in_features = X.shape
Defining and Initialising my loss function
criterion = nn.MSELoss()
Parameters for the training process
learning_rate = 0.01
ePoch = 1000
Initialising my model
sNN = SimpleNeuralNetwork(in_features, in_features)
Initialising my optimizer
optimiser = optim.SGD(sNN.parameters(), lr=learning_rate)
Training my Network
for i in tqdm(list(range(ePoch))):
# prediction - forward pass in the model
y_pred = sNN(X)
# loss - check how well or how far our model did with the prediction
loss = criterion(Y, y_pred)
# gradient - do a backward propagation (backward pass)
loss.backward()
# update weight - readjust the weight using our learning rate as a proximity
optimiser.step()
# zero gradient - reinitialize our memory to zero so that the neural network will not cram
optimiser.zero_grad()
# if i % 10 == 0:
# [w, b] = sNN.parameters()
# print(f'epoch: {i + 1}, weight: {w[0][0].item()}, bias: {b[0].item()}, pred: {y_pred}')
Actual prediction
predict = sNN(torch.tensor([5], dtype=torch.float32))
print(f'prediction for 5: {predict[0].item()}')

How to implement contractive autoencoder in Pytorch?

I'm trying to create a contractive autoencoder in Pytorch. I found this thread and tried according to that. This is the snippet I wrote based on the mentioned thread:
import datetime
import numpy as np
import torch
import torchvision
from torchvision import datasets, transforms
from torchvision.utils import save_image, make_grid
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
%matplotlib inline
dataset_train = datasets.MNIST(root='MNIST',
train=True,
transform = transforms.ToTensor(),
download=True)
dataset_test = datasets.MNIST(root='MNIST',
train=False,
transform = transforms.ToTensor(),
download=True)
batch_size = 128
num_workers = 2
dataloader_train = torch.utils.data.DataLoader(dataset_train,
batch_size = batch_size,
shuffle=True,
num_workers = num_workers,
pin_memory=True)
dataloader_test = torch.utils.data.DataLoader(dataset_test,
batch_size = batch_size,
num_workers = num_workers,
pin_memory=True)
def view_images(imgs, labels, rows = 4, cols =11):
imgs = imgs.detach().cpu().numpy().transpose(0,2,3,1)
fig = plt.figure(figsize=(8,4))
for i in range(imgs.shape[0]):
ax = fig.add_subplot(rows, cols, i+1, xticks=[], yticks=[])
ax.imshow(imgs[i].squeeze(), cmap='Greys_r')
ax.set_title(labels[i].item())
# now let's view some
imgs, labels = next(iter(dataloader_train))
view_images(imgs, labels,13,10)
class Contractive_AutoEncoder(nn.Module):
def __init__(self):
super().__init__()
self.encoder = nn.Linear(784, 512)
self.decoder = nn.Linear(512, 784)
def forward(self, input):
# flatten the input
shape = input.shape
input = input.view(input.size(0), -1)
output_e = F.relu(self.encoder(input))
output = F.sigmoid(self.decoder(output_e))
output = output.view(*shape)
return output_e, output
def loss_function(output_e, outputs, imgs, device):
output_e.backward(torch.ones(output_e.size()).to(device), retain_graph=True)
criterion = nn.MSELoss()
assert outputs.shape == imgs.shape ,f'outputs.shape : {outputs.shape} != imgs.shape : {imgs.shape}'
imgs.grad.requires_grad = True
loss1 = criterion(outputs, imgs)
print(imgs.grad)
loss2 = torch.mean(pow(imgs.grad,2))
loss = loss1 + loss2
return loss
epochs = 50
interval = 2000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Contractive_AutoEncoder().to(device)
optimizer = optim.Adam(model.parameters(), lr =0.001)
for e in range(epochs):
for i, (imgs, labels) in enumerate(dataloader_train):
imgs = imgs.to(device)
labels = labels.to(device)
outputs_e, outputs = model(imgs)
loss = loss_function(outputs_e, outputs, imgs,device)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i%interval:
print('')
print(f'epoch/epoechs: {e}/{epochs} loss : {loss.item():.4f} ')
For the sake of brevity I just used one layer for the encoder and the decoder. It should work regardless of number of layers in either of them obviously!
But the catch here is, aside from the fact that I don't know if this is the correct way of doing this, (calculating gradients with respect to the input), I get an error which makes the former solution wrong/not applicable.
That is:
imgs.grad.requires_grad = True
produces the error :
AttributeError : 'NoneType' object has no attribute 'requires_grad'
I also tried the second method suggested in that thread which is as follows:
class Contractive_Encoder(nn.Module):
def __init__(self):
super().__init__()
self.encoder = nn.Linear(784, 512)
def forward(self, input):
# flatten the input
input = input.view(input.size(0), -1)
output_e = F.relu(self.encoder(input))
return output_e
class Contractive_Decoder(nn.Module):
def __init__(self):
super().__init__()
self.decoder = nn.Linear(512, 784)
def forward(self, input):
# flatten the input
output = F.sigmoid(self.decoder(input))
output = output.view(-1,1,28,28)
return output
epochs = 50
interval = 2000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_enc = Contractive_Encoder().to(device)
model_dec = Contractive_Decoder().to(device)
optimizer = optim.Adam([{"params":model_enc.parameters()},
{"params":model_dec.parameters()}], lr =0.001)
optimizer_cond = optim.Adam(model_enc.parameters(), lr = 0.001)
criterion = nn.MSELoss()
for e in range(epochs):
for i, (imgs, labels) in enumerate(dataloader_train):
imgs = imgs.to(device)
labels = labels.to(device)
outputs_e = model_enc(imgs)
outputs = model_dec(outputs_e)
loss_rec = criterion(outputs, imgs)
optimizer.zero_grad()
loss_rec.backward()
optimizer.step()
imgs.requires_grad_(True)
y = model_enc(imgs)
optimizer_cond.zero_grad()
y.backward(torch.ones(imgs.view(-1,28*28).size()))
imgs.grad.requires_grad = True
loss = torch.mean([pow(imgs.grad,2)])
optimizer_cond.zero_grad()
loss.backward()
optimizer_cond.step()
if i%interval:
print('')
print(f'epoch/epoechs: {e}/{epochs} loss : {loss.item():.4f} ')
but I face the error :
RuntimeError: invalid gradient at index 0 - got [128, 784] but expected shape compatible with [128, 512]
How should I go about this in Pytorch?
Summary
The final implementation for contractive loss that I wrote is as follows:
def loss_function(output_e, outputs, imgs, lamda = 1e-4, device=torch.device('cuda')):
criterion = nn.MSELoss()
assert outputs.shape == imgs.shape ,f'outputs.shape : {outputs.shape} != imgs.shape : {imgs.shape}'
loss1 = criterion(outputs, imgs)
output_e.backward(torch.ones(outputs_e.size()).to(device), retain_graph=True)
# Frobenious norm, the square root of sum of all elements (square value)
# in a jacobian matrix
loss2 = torch.sqrt(torch.sum(torch.pow(imgs.grad,2)))
imgs.grad.data.zero_()
loss = loss1 + (lamda*loss2)
return loss
and inside training loop you need to do:
for e in range(epochs):
for i, (imgs, labels) in enumerate(dataloader_train):
imgs = imgs.to(device)
labels = labels.to(device)
imgs.retain_grad()
imgs.requires_grad_(True)
outputs_e, outputs = model(imgs)
loss = loss_function(outputs_e, outputs, imgs, lam,device)
imgs.requires_grad_(False)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'epoch/epochs: {e}/{epochs} loss: {loss.item():.4f}')
Full explanation
As it turns out and rightfully #akshayk07 pointed out in the comments, the implementation found in Pytorch forum was wrong in multiple places. The notable thing, being it wasn't implementing the actual contractive loss that was introduced in Contractive Auto-Encoders:Explicit Invariance During Feature Extraction paper! and also aside from that, the implementation wouldn't work at all for obvious reasons that will be explained in a moment.
The changes are obvious so I try to explain what's going on here. First of all note that imgs is not a leaf node, so the gradients would not be retained in the image .grad attribute.
In order to retain gradients for non leaf nodes, you should use retain_graph(). grad is only populated for leaf Tensors. Also imgs.retain_grad() should be called before doing forward() as it will instruct the autograd to store grads into non-leaf nodes.
Update
Thanks to #Michael for pointing out that the correct calculation of Frobenius Norm is actually (from ScienceDirect):
the square root of the sum of the squares of all the matrix entries
and not
the the square root of the sum of the absolute values of all the
matrix entries as explained here
In PyTorch 1.5.0, a high level torch.autograd.functional.jacobian API is added. This should make the contractive objective easier to implement for an arbitrary encoder. For torch>=v1.5.0, the contractive loss would look like this:
contractive_loss = torch.norm(torch.autograd.functional.jacobian(self.encoder, imgs, create_graph=True))
The create_graph argument makes the jacobian differentiable.
The main challenge in implementing the contractive autoencoder is in calculating the Frobenius norm of the Jacobian, which is the gradient of the code or bottleneck layer (vector) with respect to the input layer (vector). This is the regularization term in the loss function. Fortunately, you have done the hard work in solving this for me. Thank you! You are using MSE loss for the first term. Cross entropy loss is sometimes used instead. It's worth considering. I think you are almost there with the Frobenius norm, except that you need to take the square root of the sum of the squares of the Jacobian, where you are calculating the square root of the sum of the absolute values. Here's how I'd define the loss function (sorry I changed notation a little to keep myself straight):
def cae_loss_fcn(code, img_out, img_in, lamda=1e-4, device=torch.device('cuda')):
# First term in the loss function, for ensuring representational fidelity
criterion=nn.MSELoss()
assert img_out.shape == img_in.shape, f'img_out.shape : {img_out.shape} != img_in.shape : {img_in.shape}'
loss1 = criterion(img_out, img_in)
# Second term in the loss function, for enforcing contraction of representation
code.backward(torch.ones(code.size()).to(device), retain_graph=True)
# Frobenius norm of Jacobian of code with respect to input image
loss2 = torch.sqrt(torch.sum(torch.pow(img_in.grad, 2))) # THE CORRECTION
img_in.grad.data.zero_()
# Total loss, the sum of the two loss terms, with weight applied to second term
loss = loss1 + (lamda*loss2)
return loss

Categories

Resources