Pytorch optimizer returns empty - python

I already check relevant questions but can not figure out what is wrong with my code. I build a cnn and add residual value but, it returns
"ValueError: optimizer got an empty parameter list"
In below you can see some of my code
def _init_(self):
super()._init_()
self.conv1 = nn.Conv2d(3, 6, 3, 1, 1)
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 10, 3, 1, 1)
self.conv3 = nn.Conv2d(10, 5, 3, 1, 1)
self.conv4 = nn.Conv2d(5, 10, 3, 1, 1)
self.conv5 = nn.Conv2d(10, 10, 3, 1, 1)
self.fc1 = nn.Linear(40960, 15)
self.dropout = nn.Dropout(0.15)
def forward(self, x):
x = (F.relu(self.conv1(x)))
x = (F.relu(self.conv2(x)))
x = (F.relu(self.conv3(x)))
residual = x.clone()
x = (F.relu(self.conv4(x)))
x += residual
x = F.relu(x)
x = (F.relu(self.conv5(x)))
x = torch.flatten(x, 1)
x = F.relu(self.fc1(x))
return x
and it returns error in here:
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=learn_rate, momentum=0.9)
thank you for your helps in advance

What is the type of net.parameters()? optim.SGD() is expecting an "iterable of parameters...". Try this:
net = Net()
criterion = nn.CrossEntropyLoss()
parameters = list(net.parameters())
optimizer = optim.SGD(parameters, lr=learn_rate, momentum=0.9)

Your Net class needs to extend torch.nn.Module
class Net(nn.Module):
def __init__(self):
...
def forward(self, x):
...

Related

How to plot Graph-neural-network model-graph when using tensorflow Model Subclass API with spektral layers?

I am unable to plot graph-neural-networking. I have seen few related questions(1, 2, 3) to this topic but their answers do not apply to graph-neural-networks.
What makes it different is that the input vector include objects of different dimensions e.g. properties matrix dimension is [n_nodes, n_node_features], adjacency matrix dimension is [n_nodes, n_nodes] etc. Here is the example of my Model:
class GIN0(Model):
def __init__(self, channels, n_layers):
super().__init__()
self.conv1 = GINConv(channels, epsilon=0, mlp_hidden=[channels, channels])
self.convs = []
for _ in range(1, n_layers):
self.convs.append(
GINConv(channels, epsilon=0, mlp_hidden=[channels, channels])
)
self.pool = GlobalAvgPool()
self.dense1 = Dense(channels, activation="relu")
self.dropout = Dropout(0.5)
self.dense2 = Dense(channels, activation="relu")
def call(self, inputs):
x, a, i = inputs
x = self.conv1([x, a])
for conv in self.convs:
x = conv([x, a])
x = self.pool([x, i])
x = self.dense1(x)
x = self.dropout(x)
return self.dense2(x)
One of the answers in 2 suggested to add build_graph function as follows:
class my_model(Model):
def __init__(self, dim):
super(my_model, self).__init__()
self.Base = VGG16(input_shape=(dim), include_top = False, weights = 'imagenet')
self.GAP = L.GlobalAveragePooling2D()
self.BAT = L.BatchNormalization()
self.DROP = L.Dropout(rate=0.1)
self.DENS = L.Dense(256, activation='relu', name = 'dense_A')
self.OUT = L.Dense(1, activation='sigmoid')
def call(self, inputs):
x = self.Base(inputs)
g = self.GAP(x)
b = self.BAT(g)
d = self.DROP(b)
d = self.DENS(d)
return self.OUT(d)
# AFAIK: The most convenient method to print model.summary()
# similar to the sequential or functional API like.
def build_graph(self):
x = Input(shape=(dim))
return Model(inputs=[x], outputs=self.call(x))
dim = (124,124,3)
model = my_model((dim))
model.build((None, *dim))
model.build_graph().summary()
However, I am not sure how to define dim or Input Layer using tf.keras.layers.Input for such a hybrid data-structure as described above.
Any suggestions?
Here is the minimal code to plot such subclass multi-input model. Note, as stated in the comment above, there are some issue of your GINConv which is from spektral and it's not related to the main query. So, I will give general soluton of such multi-input modeling scenarios. To make it work with your speckral, please reach to the package author for further discussion.
From specktral repo, here, I got the idea the shape of the input tensors.
x, y = next(iter(loader_tr))
bs_x = list(x[0].shape)
bs_y = list(x[1].shape)
bs_z = list(x[2].shape)
bs_x, bs_y, bs_z
([1067, 4], [1067, 1067], [1067])
Similar model, it also takes same amount of inputs and with same shape. But without GINConv.
class GIN0(Model):
def __init__(self, channels, n_layers):
super().__init__()
self.conv1 = tf.keras.layers.Conv1D(channels, 3, activation='relu')
self.conv2 = tf.keras.layers.Conv1D(channels, 3, activation='relu')
self.dense1 = Dense(channels, activation="relu")
self.dropout = Dropout(0.5)
self.dense2 = Dense(n_out, activation="softmax")
def call(self, inputs):
x, a, i = inputs
x = self.conv1(x)
x = tf.keras.layers.GlobalAveragePooling1D()(x)
a = self.conv2(a)
a = tf.keras.layers.GlobalAveragePooling1D()(a)
x = tf.keras.layers.Concatenate(axis=1)([a, x, i])
x = self.dense1(x)
x = self.dropout(x)
return self.dense2(x)
def build_graph(self):
x = tf.keras.Input(shape=bs_x)
y = tf.keras.Input(shape=bs_y)
z = tf.keras.Input(shape=bs_z)
return tf.keras.Model(
inputs=[x, y, z],
outputs=self.call([x, y, z])
)
model = GIN0(channels, layers)
model.build(
[
(None, *bs_x),
(None, *bs_y),
(None, *bs_z)
]
)
# OK
model.build_graph().summary()
# OK
tf.keras.utils.plot_model(
model.build_graph(), show_shapes=True
)

How to get confidence score from a trained pytorch model

I have a trained PyTorch model and I want to get the confidence score of predictions in range (0-100) or (0-1). The code below is giving me a score but its range is undefined. I want the score in a defined range of (0-1) or (0-100). Any idea how to get this?
conf, classes = torch.max(output, 1)
My code:
model = torch.load(r'best.pt')
model.eval()
def preprocess(imgs):
im = torch.from_numpy(imgs)
im = im.float() # uint8 to fp16/32
im /= 255.0
return im
img_path = cv2.imread("/content/634282.jpg",0)
cropped = cv2.resize(img_path,(28,28))
imgs = preprocess(np.array([[cropped]]))
def predict_allCharacters(imgs):
output = model(imgs)
conf, classes = torch.max(output, 1)
class_names = '0123456789'
return conf, class_names[classes.item()]
Model definition:
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=5,
stride=1,
padding=2,
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
)
self.conv2 = nn.Sequential(
nn.Conv2d(16, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2),
)
# fully connected layer, output 10 classes
self.out = nn.Linear(32 * 7 * 7, 37)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
# flatten the output of conv2 to (batch_size, 32 * 7 * 7)
x = x.view(x.size(0), -1)
output = self.out(x)
return output # return x for visualization
In your case, output represents the logits. One way of getting a probability out of them is to use the Softmax function. As it seems that output contains the outputs from a batch, not a single sample, you can do something like this:
probs = torch.nn.functional.softmax(output, dim=1)
Then, in probs, each row would have the probability (i.e., in range [0, 1], sum=1) of each class for a given sample.
So, your predict_allCharacters could be modified to:
def predict_allCharacters(imgs):
output = model(imgs)
probs = torch.nn.functional.softmax(output, dim=1)
conf, classes = torch.max(probs, 1)
class_names = '0123456789'
return conf, class_names[classes.item()]

Why does requires_grad turns from true to false when doing torch.nn.conv2d operation?

I have Unet network which takes in MRI images of the brain, where the goal is to segment white substance in the brain. The images has the shape 256x256x183 (reshaped to 183x256x256) (FLAIR and T1 images). The problem I am having is that before sending the input to the Unet network, I have requires_grad=True on my pytorch tensor, but after one torch.nn.conv2d operation the requires_grad=False. This is a huge problem since the gradient will not update and learn.
from collections import OrderedDict
import torch
import torch.nn as nn
class UNet(nn.Module):
def __init__(self, in_channels=3, out_channels=1, init_features=32):
super(UNet, self).__init__()
features = init_features
self.encoder1 = UNet._block(in_channels, features, name="enc1")
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.encoder2 = UNet._block(features, features * 2, name="enc2")
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.encoder3 = UNet._block(features * 2, features * 4, name="enc3")
self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
self.encoder4 = UNet._block(features * 4, features * 8, name="enc4")
self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
self.bottleneck = UNet._block(features * 8, features * 16, name="bottleneck")
self.upconv4 = nn.ConvTranspose2d(
features * 16, features * 8, kernel_size=2, stride=2
)
self.decoder4 = UNet._block((features * 8) * 2, features * 8, name="dec4")
self.upconv3 = nn.ConvTranspose2d(
features * 8, features * 4, kernel_size=2, stride=2
)
self.decoder3 = UNet._block((features * 4) * 2, features * 4, name="dec3")
self.upconv2 = nn.ConvTranspose2d(
features * 4, features * 2, kernel_size=2, stride=2
)
self.decoder2 = UNet._block((features * 2) * 2, features * 2, name="dec2")
self.upconv1 = nn.ConvTranspose2d(
features * 2, features, kernel_size=2, stride=2
)
self.decoder1 = UNet._block(features * 2, features, name="dec1")
self.conv = nn.Conv2d(
in_channels=features, out_channels=out_channels, kernel_size=1
)
def forward(self, x):
print(x.requires_grad) #<---- here it is true
enc1 = self.encoder1(x)#<---- where the problem happens
print(enc1.requires_grad) #<---- here it is false
enc2 = self.encoder2(self.pool1(enc1))
print(enc2.requires_grad)
enc3 = self.encoder3(self.pool2(enc2))
print(enc3.requires_grad)
enc4 = self.encoder4(self.pool3(enc3))
print(enc4.requires_grad)
bottleneck = self.bottleneck(self.pool4(enc4))
print(bottleneck.requires_grad)
dec4 = self.upconv4(bottleneck)
print(dec4.requires_grad)
dec4 = torch.cat((dec4, enc4), dim=1)
print(dec4.requires_grad)
dec4 = self.decoder4(dec4)
print(dec4.requires_grad)
dec3 = self.upconv3(dec4)
print(dec3.requires_grad)
dec3 = torch.cat((dec3, enc3), dim=1)
print(dec3.requires_grad)
dec3 = self.decoder3(dec3)
print(dec3.requires_grad)
dec2 = self.upconv2(dec3)
print(dec2.requires_grad)
dec2 = torch.cat((dec2, enc2), dim=1)
print(dec2.requires_grad)
dec2 = self.decoder2(dec2)
print(dec2.requires_grad)
dec1 = self.upconv1(dec2)
print(dec1.requires_grad)
dec1 = torch.cat((dec1, enc1), dim=1)
print(dec1.requires_grad)
dec1 = self.decoder1(dec1)
print(dec1.requires_grad)
print("going out")
return torch.sigmoid(self.conv(dec1))
#staticmethod
def _block(in_channels, features, name):
return nn.Sequential(
OrderedDict(
[
(
name + "conv1",
nn.Conv2d(
in_channels=in_channels,
out_channels=features,
kernel_size=3,
padding=1,
bias=False,
),
),
(name + "norm1", nn.BatchNorm2d(num_features=features)),
(name + "relu1", nn.ReLU(inplace=True)),
(
name + "conv2",
nn.Conv2d(
in_channels=features,
out_channels=features,
kernel_size=3,
padding=1,
bias=False,
),
),
(name + "norm2", nn.BatchNorm2d(num_features=features)),
(name + "relu2", nn.ReLU(inplace=True)),
]
)
)
Edit:
This is the training code
class run_network:
def __init__(self, eta, epoch, batch_size, train_file_path, validation_file_path, shuffle_after_epoch = True):
self.eta = eta
self.epoch = epoch
self.batch_size = batch_size
self.train_file_path = train_file_path
self.validation_file_path = validation_file_path
self.shuffle_after_epoch = shuffle_after_epoch
def __call__(self, is_train = False):
device = torch.device("cpu" if not torch.cuda.is_available() else torch.cuda())
unet = torch.hub.load('mateuszbuda/brain-segmentation-pytorch', 'unet',
in_channels=3, out_channels=1, init_features=32, pretrained=True)
unet.to(device)
unet = unet.double()
optimizer = optim.Adam(unet.parameters(), lr=self.eta)
dsc_loss = DiceLoss()
Load_training = NiftiLoader(self.train_file_path)
Load_validation = NiftiLoader(self.validation_file_path)
mean_flair, mean_t1, std_flair, std_t1 = Load_training.average_mean_and_std(20, 79,99)
total_mean = [mean_flair, mean_t1]
total_std = [std_flair, std_t1]
loss_train = []
loss_validation = []
for current_epoch in tqdm(range(self.epoch)):
for phase in ["train", "validation"]:
if phase == "train":
mini_batch = Load_training.create_batch(self.batch_size, self.shuffle_after_epoch)
unet.train()
print("her22")
if phase == "validation":
print("her")
mini_batch = Load_validation.create_batch(self.batch_size, self.shuffle_after_epoch)
unet.eval()
dim1, dim2, dim3 = mini_batch.shape
for iteration in range(1):
if phase == "train":
current_batch = Load_training.Load_Image_batch(mini_batch, iteration)
image_batch = Load_training.image_zero_mean_normalizer(current_batch)
if phase == "validation":
current_batch = Load_validation.Load_Image_batch(mini_batch, iteration)
image_batch = Load_training.image_zero_mean_normalizer(current_batch, False, mean_list, std_list)
image_dim0, image_dim1, image_dim2, image_dim3, image_dim4 = image_batch.shape
image_batch = image_batch.reshape((
image_dim0,
image_dim1*image_dim2,
image_dim3,
image_dim4
))
image_batch = np.swapaxes(image_batch, 0,1)
image_batch = torch.as_tensor(image_batch)#.requires_grad_(True) #, requires_grad=True)
image_batch = image_batch.to(device)
print(image_batch.requires_grad)
optimizer.zero_grad()
with torch.set_grad_enabled(is_train == "train"):
for j in range(0, 10, 1):
# [183*5, 3, 256, 256] -> [12, 3, 256, 256]
# ANTALL ITERASJONER: (183*5/12) -> en chunk
input_image = image_batch[j:j+2,0:3,:,:]
print(input_image.requires_grad)
print("går inn")
y_predicted = unet(input_image)
print(y_predicted.requires_grad)
print(image_batch[j:j+2,3,:,:].requires_grad)
loss = dsc_loss(y_predicted.squeeze(1), image_batch[j:j+2,3,:,:])
print(loss.requires_grad)
if phase == "train":
loss_train.append(loss.item())
loss.backward()
print(loss.item())
exit()
optimizer.step()
print(loss.item())
exit()
if phase == "validation":
loss_validation.append(loss.item())
Number of iteration and print statement are for experimenting what the cause could be.
It works fine to me.
'''
# I changed your code a little bit to catch up the problem.
def forward(self, x):
print("encoder1", x.requires_grad) #<---- here it is true
enc1 = self.encoder1(x)#<---- where the problem happens
print("encoder2", enc1.requires_grad) #<---- here it is false
'''
a = torch.randn(32, 3, 255, 255, requires_grad=True)
# a.requires_grads = True
print(a)
UNet()(a)
# This is the result:
encoder1 True
encoder2 True
True
True
True
True
True
Can you show me your training source? I guess it's the problem. And why do you need to update the input data?
The training code is fine and the input doesn't need a gradient at all, if you just want to train and update the weights.
The real problem is this line here
with torch.set_grad_enabled(is_train == "train"):
So you want to disable the gradients if you are not training. The thing is is_train is a bool (judging form this: def __call__(self, is_train=False):), so the comparisons will be always false and no gradients will bet set. Just change it to
with torch.set_grad_enabled(is_train):
and you will be fine.

saving model in pytorch with weight decay

this is my model :
# basic LeNet5 network
class LeNet5_mode0 (nn.Module) :
# constructor
def __init__(self):
super(LeNet5_mode0, self).__init__() # call to super constructor
# define layers
# 6 # 28x28
self.conv1 = nn.Sequential(
# Lenet's first conv layer is 3x32x32, squeeze color channels into 1 and pad 2
nn.Conv2d(in_channels = 1, out_channels = 6, kernel_size = 5, stride = 1, padding = 2),
nn.ReLU(),
nn.MaxPool2d(kernel_size = 2, stride = 2)
)
# 16 # 10x10
self.conv2 = nn.Sequential(
nn.Conv2d(in_channels = 6, out_channels = 16, kernel_size = 5, stride = 1, padding = 0),
nn.ReLU(),
nn.MaxPool2d(kernel_size =2, stride = 2)
)
self.fc1 = nn.Sequential(
nn.Linear(in_features = 16*5*5, out_features = 120),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(in_features = 120, out_features = 84),
nn.ReLU()
)
self.classifier = nn.Sequential(
nn.Linear(in_features = 84,out_features = 10),
nn.Softmax(dim = 1) # dim =1 meaning do softmax on the colums of 84x10
)
# define forward function
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(-1, 16*5*5) # reshape the tensor to [-1,16*5*5]
x = self.fc1(x)
x = self.fc2(x)
x = self.classifier(x)
return x
and I train this model once with :
criterion = nn.CrossEntropyLoss() # aka, LogLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5,10,15], gamma=0.5)
and then save with with
torch.save(model.state_dict(), savepath)
and load it with
model.load_state_dict(torch.load(loadpath))
so far no problem . but when i change the optimizer a little to
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay = 0.0005)
and use the same save & load method
I receive the following error:
in loading state_dict for LeNet5_mode0:
Unexpected key(s) in state_dict: "conv1.1.weight", "conv1.1.bias", "conv1.1.running_mean", "conv1.1.running_var", "conv1.1.num_batches_tracked", "conv2.1.weight", "conv2.1.bias", "conv2.1.running_mean", "conv2.1.running_var", "conv2.1.num_batches_tracked".
how can it be fixed? why different optimizer have that effect on the saving of the trained network?

Why does the pytorch model perform poorly after setting eval()?

I used pytorch to build a segmentation model that uses the BatchNormalization layer. I found that when I set model.eval() on the test, the test result will be 0. If I don't set model.eval(), it will perform well.
I tried to search for related questions, but I got the conclusion that model.eval() can fix the parameters of BN, but I am still confused about how to solve this problem.
My batchsize is 1 and this is my model:
import torch
import torch.nn as nn
class Encode_Block(nn.Module):
def __init__(self, in_feat, out_feat):
super(Encode_Block, self).__init__()
self.conv1 = Res_Block(in_feat, out_feat)
self.conv2 = Res_Block_identity(out_feat, out_feat)
def forward(self, inputs):
outputs = self.conv1(inputs)
outputs = self.conv2(outputs)
return outputs
class Decode_Block(nn.Module):
def __init__(self, in_feat, out_feat):
super(Decode_Block, self).__init__()
self.conv1 = Res_Block(in_feat, out_feat)
self.conv2 = Res_Block_identity(out_feat, out_feat)
def forward(self, inputs):
outputs = self.conv1(inputs)
outputs = self.conv2(outputs)
return outputs
class Conv_Block(nn.Module):
def __init__(self, in_feat, out_feat):
super(Conv_Block, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(in_feat, out_feat, kernel_size=3, stride=1, padding=1),
nn.LeakyReLU(),
)
def forward(self, inputs):
outputs = self.conv1(inputs)
return outputs
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(
in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
)
def conv1x1(in_planes, out_planes, stride=1):
"""1x1 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class Res_Block(nn.Module):
def __init__(self, inplanes, planes, stride=1):
super(Res_Block, self).__init__()
self.conv_input = conv1x1(inplanes, planes)
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn = nn.BatchNorm2d(planes)
self.relu = nn.LeakyReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.conv3 = conv1x1(planes, planes)
self.stride = stride
def forward(self, x):
residual = self.conv_input(x)
out = self.conv1(x)
out = self.bn(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn(out)
out += residual
out = self.relu(out)
return out
class Res_Block_identity(nn.Module):
def __init__(self, inplanes, planes, stride=1):
super(Res_Block_identity, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn = nn.BatchNorm2d(planes)
self.relu = nn.LeakyReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.conv3 = conv1x1(planes, planes)
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn(out)
out += residual
out = self.relu(out)
return out
class UpConcat(nn.Module):
def __init__(self, in_feat, out_feat):
super(UpConcat, self).__init__()
self.de_conv = nn.ConvTranspose2d(in_feat, out_feat, kernel_size=2, stride=2)
def forward(self, inputs, down_outputs):
outputs = self.de_conv(inputs)
out = torch.cat([down_outputs, outputs], 1)
return out
class Res_UNet(nn.Module):
def __init__(self, num_channels=1, num_classes=1):
super(Res_UNet, self).__init__()
flt = 64
self.down1 = Encode_Block(num_channels, flt)
self.down2 = Encode_Block(flt, flt * 2)
self.down3 = Encode_Block(flt * 2, flt * 4)
self.down4 = Encode_Block(flt * 4, flt * 8)
self.down_pool = nn.MaxPool2d(kernel_size=2)
self.bottom = Encode_Block(flt * 8, flt * 16)
self.up_cat1 = UpConcat(flt * 16, flt * 8)
self.up_conv1 = Decode_Block(flt * 16, flt * 8)
self.up_cat2 = UpConcat(flt * 8, flt * 4)
self.up_conv2 = Decode_Block(flt * 8, flt * 4)
self.up_cat3 = UpConcat(flt * 4, flt * 2)
self.up_conv3 = Decode_Block(flt * 4, flt * 2)
self.up_cat4 = UpConcat(flt * 2, flt)
self.up_conv4 = Decode_Block(flt * 2, flt)
self.final = nn.Sequential(
nn.Conv2d(flt, num_classes, kernel_size=1), nn.Sigmoid()
)
def forward(self, inputs):
down1_feat = self.down1(inputs)
pool1_feat = self.down_pool(down1_feat)
down2_feat = self.down2(pool1_feat)
pool2_feat = self.down_pool(down2_feat)
down3_feat = self.down3(pool2_feat)
pool3_feat = self.down_pool(down3_feat)
down4_feat = self.down4(pool3_feat)
pool4_feat = self.down_pool(down4_feat)
bottom_feat = self.bottom(pool4_feat)
up1_feat = self.up_cat1(bottom_feat, down4_feat)
up1_feat = self.up_conv1(up1_feat)
up2_feat = self.up_cat2(up1_feat, down3_feat)
up2_feat = self.up_conv2(up2_feat)
up3_feat = self.up_cat3(up2_feat, down2_feat)
up3_feat = self.up_conv3(up3_feat)
up4_feat = self.up_cat4(up3_feat, down1_feat)
up4_feat = self.up_conv4(up4_feat)
outputs = self.final(up4_feat)
return outputs
The model completely fails to segmentation after setting model.eval(), but the model is good after model.eval() is removed. I am confused about this, and is model.eval() necessary in the test?
BatchNorm layers keeps running estimates of its computed mean and variance during training model.train(), which are then used for normalization during evaluation model.eval().
Each layer has it own statistics of the mean and variance of its outputs/activations.
Since you are reusing your BatchNorm layer self.bn = nn.BatchNorm2d(planes) multiple times, the statics get mixed up and don't represent the actual mean and variance.
So you should create a new BatchNorm layer for every time you use it.
EDIT: I just read that your batch_size is 1, which could also be the core of your problem: see Tensorflow and Batch Normalization with Batch Size==1 => Outputs all zeros

Categories

Resources