Related
I have a GAN that aims to reproduce the paper CycleGAN. However, even though everything is implemented correctly (as for some basic dataset it works), with the Facade Dataset the discriminator predicts the following:
print(f"Segm: {tf.reduce_mean(discriminator_segm(segmented_test[0:10])).numpy()}, {tf.reduce_mean(discriminator_segm(generator_real_to_segm(real_test[0:10]))).numpy()}")
print(f"Real: {tf.reduce_mean(discriminator_real(real_test[0:10])).numpy()}, {tf.reduce_mean(discriminator_real(generator_segm_to_real(segmented_test[0:10]))).numpy()}")
# Segm: 0.9463781714439392, 0.9564124941825867
# Real: 0.9635561108589172, 0.8240727782249451
In other words, it considers the generated output as good as the ground truth
This makes no sense, as the best guess, if the generated images were identical to the ground truth (which are not, are very far from being similar), should be to predict 0.5
At this point, I tried to train the discriminator many more times than the generator, with no difference (still get the accuracy near 90%)
If you need, even though it's a bit long, this is the training loop:
def train(EPOCHS = 400, batch_size = 4):
real_train_batched = real_train.reshape((-1, batch_size, *real_train.shape[1:]))
segmented_train_batched = segmented_train.reshape((-1, batch_size, *segmented_train.shape[1:]))
for e in range(EPOCHS):
print(f"Epoch: {e+1}/{EPOCHS}")
gen_perm = lambda : np.random.permutation(range(len(real_train_batched)))
perm = gen_perm()
real_train_batched_gen = real_train_batched[perm]
segmented_train_batched_gen = segmented_train_batched[perm]
real_train_batched_disc = real_train_batched[perm]
segmented_train_batched_disc = segmented_train_batched[perm]
pack = zip(real_train_batched_gen, segmented_train_batched_gen, real_train_batched_disc, segmented_train_batched_disc)
for i, (b_real_gen, b_segm_gen, b_real_disc, b_segm_disc) in enumerate(pack):
print(".", end="")
let_the_magic_happen(b_segm_gen, b_real_gen, b_segm_disc, b_real_disc, alpha=tf.constant(0.00000005))
#tf.function
def let_the_magic_happen_discriminator(batch_segmented_gen, batch_real_gen, batch_segmented_disc, batch_real_disc):
fake_real_images = generator_segm_to_real(batch_segmented_gen, training = False)
fake_segmented_images = generator_real_to_segm(batch_real_gen, training = False)
with tf.GradientTape(persistent=True) as disc_tape:
real_real_pred = discriminator_real(batch_real_disc, training=True)
real_fake_pred = discriminator_real(fake_real_images, training=True)
segm_real_pred = discriminator_segm(batch_segmented_disc, training=True)
segm_fake_pred = discriminator_segm(fake_segmented_images, training=True)
segm_disc_loss = discriminator_loss(segm_real_pred, segm_fake_pred)
real_disc_loss = discriminator_loss(real_real_pred, real_fake_pred)
segm_disc_grad = disc_tape.gradient(segm_disc_loss, discriminator_segm.trainable_weights)
real_disc_grad = disc_tape.gradient(real_disc_loss, discriminator_real.trainable_weights)
segm_disc_optimizer.apply_gradients(zip(segm_disc_grad , discriminator_segm.trainable_weights))
real_disc_optimizer.apply_gradients(zip(real_disc_grad , discriminator_real.trainable_weights))
#tf.function
def let_the_magic_happen_generator(batch_segmented_gen, batch_real_gen, alpha):
with tf.GradientTape(persistent=True) as gen_tape:
fake_real_images = generator_segm_to_real(batch_segmented_gen, training = True)
fake_segmented_images = generator_real_to_segm(batch_real_gen, training = True)
fake_real_images_pred = discriminator_real(fake_real_images, training=False)
fake_segmented_images_pred = discriminator_segm(fake_segmented_images, training=False)
batch_real_gen_reconstruction = generator_segm_to_real(fake_segmented_images, training=True)
batch_segmented_gen_reconstruction = generator_real_to_segm(fake_real_images, training=True)
reconstruction_loss = alpha * (
tf.keras.losses.MeanAbsoluteError()(batch_segmented_gen, batch_segmented_gen_reconstruction) +
tf.keras.losses.MeanAbsoluteError()(batch_real_gen, batch_real_gen_reconstruction)
)
# tf.print(reconstruction_loss)
# tf.print(generator_loss(fake_real_images_pred))
# tf.print(generator_loss(fake_segmented_images_pred))
segm_to_real_gen_loss = generator_loss(fake_real_images_pred) + reconstruction_loss
real_to_segm_gen_loss = generator_loss(fake_segmented_images_pred) + reconstruction_loss
real_to_segm_gen_grad = gen_tape.gradient(real_to_segm_gen_loss, generator_real_to_segm.trainable_weights)
segm_to_real_gen_grad = gen_tape.gradient(segm_to_real_gen_loss, generator_segm_to_real.trainable_weights)
real_to_segm_gen_optimizer.apply_gradients(zip(real_to_segm_gen_grad , generator_real_to_segm.trainable_weights))
segm_to_real_gen_optimizer.apply_gradients(zip(segm_to_real_gen_grad , generator_segm_to_real.trainable_weights))
def let_the_magic_happen(batch_segmented_gen, batch_real_gen, batch_segmented_disc, batch_real_disc, alpha):
let_the_magic_happen_discriminator(batch_segmented_gen, batch_real_gen, batch_segmented_disc, batch_real_disc)
if tf.random.uniform([1]) < 0.2 :
let_the_magic_happen_generator(batch_segmented_gen, batch_real_gen, alpha = alpha)
let_the_magic_happen_discriminator(batch_segmented_gen, batch_real_gen, batch_segmented_disc, batch_real_disc)
#tf.function
def generator_loss(fake_pred) :
return tf.keras.losses.BinaryCrossentropy()(tf.ones_like(fake_pred), fake_pred)
#tf.function
def discriminator_loss(real_pred, fake_pred):
return tf.keras.losses.BinaryCrossentropy()(
tf.concat((tf.ones_like(real_pred) - 5e-2, tf.zeros_like(fake_pred)),axis=0),
tf.concat((real_pred, fake_pred),axis=0),
)
The generators are 2 U-Net where the discriminator are 2 LeNet (also tried with PatchGAN, same thing happens)
I've also already tried with MAE instead of BCE
I am trying to get the gradient with respect to a specific images (sitting in adv_loader). These images are loaded in adv_loader. I tried just taking the code that was calculating the gradient in the backprop step but Yolo3 doesn’t seem to have a grad attribute. Any ideas on how to get that?
import mxnet as mx
import gluoncv as gcv
import numpy as np
import matplotlib.pyplot as plt
from mxnet import gluon
from gluoncv.loss import YOLOV3Loss
import time
ctx = [mx.gpu(i) for i in range(mx.context.num_gpus())] if mx.context.num_gpus()>0 else [mx.cpu()]
#Datasets
train = gcv.data.RecordFileDetection('./data/train.rec',coord_normalized=False)
val = gcv.data.RecordFileDetection('./data/val.rec',coord_normalized=False)
classes = ['Passenger Vehicle','Small Car','Bus']
net = gcv.model_zoo.yolo3_mobilenet0_25_custom(pretrained_base=False,classes=classes,ctx=ctx)
net.collect_params().initialize(force_reinit=True,ctx=ctx)
import multiprocessing as mp
### The following looks nasty, and I apologize for the difficulty but basically what we are doing is
# transforming our data so it is in the correct format for yolo3
batch_size = 32 #This can be changed, but it determines how often we update our model.
num_workers = mp.cpu_count()//2
### We will import the following to reduce what i need to type (I am not sure about you but I like being lazy)
from mxnet import autograd
### 416 is our width/height we want the network to train on
sizes = 328
train_transform = gcv.data.transforms.presets.yolo.YOLO3DefaultTrainTransform(sizes,sizes, net)
# return stacked images, center_targets, scale_targets, gradient weights, objectness_targets, class_targets
# additionally, return padded ground truth bboxes, so there are 7 components returned by dataloader
batchify_fn = gcv.data.batchify.Tuple(*([gcv.data.batchify.Stack() for _ in range(6)] + [gcv.data.batchify.Pad(axis=0, pad_val=-1) for _ in range(1)]))
train_loader = mx.gluon.data.DataLoader(train.transform(train_transform), batch_size, shuffle=True,
batchify_fn=batchify_fn, last_batch='rollover',num_workers=num_workers,prefetch=num_workers + num_workers//2)
val_batchify_fn = gcv.data.batchify.Tuple(gcv.data.batchify.Stack(), gcv.data.batchify.Pad(pad_val=-1))
val_transform = gcv.data.transforms.presets.yolo.YOLO3DefaultValTransform(sizes,sizes)
val_loader = mx.gluon.data.DataLoader(
val.transform(val_transform),
batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep',num_workers=num_workers,prefetch=num_workers + num_workers//2)
adv_loader = mx.gluon.data.DataLoader(train.transform(train_transform), batch_size, shuffle=True,
batchify_fn=batchify_fn, last_batch='rollover',num_workers=num_workers,prefetch=num_workers + num_workers//2) #this is a placeholder for data that will generate advarsaial examples
#How we will validate our model
def validate(net, val_data, ctx, eval_metric):
"""Test on validation dataset."""
eval_metric.reset()
# set nms threshold and topk constraint
net.set_nms(nms_thresh=0.45, nms_topk=455)
mx.nd.waitall()
net.hybridize()
for batch in val_data:
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
label = mx.gluon.utils.split_and_load(batch[1], ctx_list=ctx,batch_axis=0, even_split=False)
det_bboxes = []
det_ids = []
det_scores = []
gt_bboxes = []
gt_ids = []
gt_difficults = []
for x, y in zip(data, label):
# get prediction results
ids, scores, bboxes = net(x)
det_ids.append(ids)
det_scores.append(scores)
# clip to image size
det_bboxes.append(bboxes.clip(0, batch[0].shape[2]))
# split ground truths
gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))
gt_difficults.append(y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None)
# update metric
eval_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficults)
return eval_metric.get()
eval_metric = gcv.utils.metrics.voc_detection.VOCMApMetric(class_names=classes)
nepochs=11
net.initialize(force_reinit=True)
net.collect_params().reset_ctx(ctx)
#Grab a trainer or optimizer to perform the optimization
trainer = mx.gluon.Trainer(net.collect_params(),'adam',
{'learning_rate':0.001},
kvstore='device')
for i in range(nepochs):
now = time.time()
mx.nd.waitall()
net.hybridize(static_alloc=True,static_shape=True)
for ixl,batch in enumerate(train_loader):
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
# objectness, center_targets, scale_targets, weights, class_targets
fixed_targets = [mx.gluon.utils.split_and_load(batch[it], ctx_list=ctx, batch_axis=0) for it in range(1, 6)]
gt_boxes = mx.gluon.utils.split_and_load(batch[6], ctx_list=ctx, batch_axis=0)
sum_losses = []
with autograd.record():
for ix, x in enumerate(data):
obj_loss, center_loss, scale_loss, cls_loss = net(x, gt_boxes[ix], *[ft[ix] for ft in fixed_targets])
sum_losses.append(obj_loss + center_loss + scale_loss + cls_loss)
autograd.backward(sum_losses)
trainer.step(batch_size)
for ixl,batch in enumerate(adv_loader):
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
# objectness, center_targets, scale_targets, weights, class_targets
fixed_targets = [mx.gluon.utils.split_and_load(batch[it], ctx_list=ctx, batch_axis=0) for it in range(1, 6)]
gt_boxes = mx.gluon.utils.split_and_load(batch[6], ctx_list=ctx, batch_axis=0)
sum_losses = []
with autograd.record():
for ix, x in enumerate(data):
obj_loss, center_loss, scale_loss, cls_loss = net(x, gt_boxes[ix], *[ft[ix] for ft in fixed_targets])
sum_losses.append(obj_loss + center_loss + scale_loss + cls_loss)
autograd.backward(sum_losses,retain_graph=True)
print(net.grad.asnumpy())
I am getting told here that AttributeError: 'YOLOV3' object has no attribute 'grad'. I am using this documentation https://mxnet.apache.org/versions/1.6/api/python/docs/tutorials/packages/autograd/index.html
Any idea how to get an image and pass it through to the gradient of the yolo loss function?
I have already installed the module "art" yesterday, but when I try to run my code, it happens:
Traceback (most recent call last):
File "D:/Desktop/captcha/src1/adv_ex.py", line 10, in <module>
from art.attacks.evasion import FastGradientMethod
ModuleNotFoundError: No module named 'art.attacks'
I try to search it on Internet, but there is little information about this error.
I am sure that the module "art" (version 5.1) is already installed.
I hope you can help me. Many thanks.
Here is the full code:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
import torch
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from art.attacks.evasion import FastGradientMethod
from art.attacks.evasion import BasicIterativeMethod
from art.estimators.classification import PyTorchClassifier
from art.utils import load_mnist
from model import Net
from model import CaptchaData
from model import DataLoader
from model import vec2text
# Step1: Load the original dataset
transform = transforms.Compose([transforms.ToTensor()]) # 不做数据增强和标准化了
test_data = CaptchaData('./testset/', transform=transform)
test_data_loader = DataLoader(test_data, batch_size=128, num_workers=0, shuffle=True, drop_last=True)
#Step 2: Load the model
net = Net()
#更换设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('current device:' , device)
print("right")
net.to(device)
#Define the loss function and optimizer
criterion = nn.MultiLabelSoftMarginLoss()
criterion.requires_grad = True #loss function
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
#Load the model
model_path = './module_build/model.pth'
checkpoint = torch.load(model_path)
net.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
net.eval()
# Step 3: Create the ART classifier for attacking
classifier = PyTorchClassifier(
model=net,
clip_values=(0.0,255.0),
loss=criterion,
optimizer=optimizer,
input_shape=(3,140,44),
nb_classes=222,
)
# Step 4: Define the eval function for evaulating the result
captcha_list = list('0123456789abcdefghijklmnopqrstuvwxcyz_')
captcha_length =6
def calculat_acc(output, target):
output, target = output.view(-1,len(captcha_list)),target.view(-1,len(captcha_list))
output = nn.functional.softmax(output, dim=1)
output = torch.argmax(output,dim=1)
target = torch.argmax(target,dim=1)
output,target = output.view(-1, captcha_length),target.view(-1,captcha_length)
c=0
for i, j in zip(target,output):
if torch.equal(i,j):
c += 1
acc = c / output.size()[0]* 100
return acc
# Step 6: craft attack with FGSM and show the adversarial picture
acc,i =0,0
# with torch.no_grad( ) :
for inputs, labels in test_data_loader:
# Before
plt.figure()
plt.imshow(inputs[124].permute(1,2,0))
attack = FastGradientMethod(estimator=classifier, eps=0.1)
inputs_adv = attack.generate(x=inputs)
inputs_adv = torch.as_tensor(inputs_adv)
print(vec2text(labels[124].view(6,-1)))
# After
plt.figure()
plt.imshow(inputs_adv[124].permute(1,2,0))
for i in range(128):
result =transforms.ToPILImage()(inputs_adv[i])
result.save("./input_adv/"+vec2text(labels[i].view(6,-1))+ ".jpg")
outputs = net(inputs_adv)
print(vec2text(outputs[124].view(6,-1)))
acc += calculat_acc(outputs, labels)
i += 1
break
print ('Accuracy: %.3f %%' % (acc/i))
'''
# Step 6: craft attack with BIM and show the adversarial picture
acc,i =0,0
# with torch.no_grad( ) :
for inputs, labels in test_data_loader:
# Before
plt.figure()
plt.imshow(inputs[124]. permute(1,2,0))
attack = BasicIterativeMethod(estimator=classifier, eps=0.1, eps_step=0.01)
inputs_adv = attack.generate(x=inputs)
inputs_adv = torch.as_tensor(inputs_adv)
# After
plt.figure()
plt.imshow(inputs_adv[124].permute(1,2,0))
outputs = net(inputs_adv)
acc += calculat_acc(outputs, labels)
i += 1
break
print ('Accuracy: %.3f %%' % (acc/i))
'''
#Step 7: crop the image
for inputs, labels in test_data_loader:
# Before
plt.figure()
plt.imshows(inputs[124].permute(1,2,0))
#Transform the tensor to image for further operation
image = transforms.ToPILImage()(inputs[124])
# crop the left part
image_left = image.crop((28,0,140,44))
plt.figure()
plt.imshow(np.asarray(image_left))
attack = FastGradientMethod(estimator = classifier,eps=0.1)
inputs_adv = attack.generate(x = inputs)
inputs_adv = torch.as_tensor(inputs_adv)
#After
plt.figure()
plt.imshow(inputs_adv[124].permute(1,2,0))
#Crop the one part of image.
image = transforms.ToPILImage()(inputs_adv[124])
(left, upper, right, lower) = (0,0,28,44)
image_crop = image.crop((left, upper, right,lower))
plt.figure()
plt.imshow(np.asarray(image_crop))
#combine the final image.
dst = Image.new('RGB',(image_crop.width + image_left.width, image_crop.height))
dst.paste(image_crop,(0, 0))
dst.paste(image_left, (image_crop.width, 0))
dst.save("./input_adv/"+ str(i) +".jpg")
# Show the final result
plt.figure()
plt.imshow(np.asarray(dst))
# Test it on pre-trained model
outputs = net(inputs_adv)
break
# save cropped image
for inputs, labels in test_data_loader:
for i in range(128):
# Transform the tensor to image for further operation
image = transforms.ToPILImage()(inputs[i])
#crop the left part
image_left =image.crop((28,8,140,44))
attack = FastGradientMethod(estimator=classifier, eps=0.1)
inputs_adv = attack.generate(x=inputs)
inputs_adv = torch.as_tensor(inputs_adv)
#crop the one part of image.
image = transforms.ToPILImage()(inputs_adv[i])
(left, upper, right, lower) = (0,8,28,44)
image_crop = image.crop((left,upper,right,lower))
# combine the final image.
dst = Image.new('RGB',(image_crop.width + image_left.width, image_crop.height))
dst.paste(image_crop, (0,0))
dst.paste(image_left,(image_crop.width,0))
# Save the final results
dst.save("./input_adv_cropped/"+vec2text(labels[i].view(6,-1))+ ".jpg")
break
# Test accuracy on cropped images
test_data_cropped = CaptchaData('./input_adv_cropped/', transform=transform)
test_data_loader_cropped = DataLoader(test_data_cropped , batch_size=8, num_workers=0,shuffle=True,drop_last=True)
acc,i=0, 0
with torch.no_grad():
for inputs, labels in test_data_loader_cropped:
outputs = net(inputs)
for j in range(8):
plt.figure()
print(vec2text(labels[j].view(6.-1)))
print(vec2text(outputs[j].vieu( 6.-1)))
plt.imshow(inputs[j].permute(1,2,0))
print("\n")
acc += calculat_acc(outputs, labels)
i +=1
break
print('Accuracy: %.3f %%' %(acc/i))
I have solved this problem.
It is because I mixed art with adversarial-robustness-toolbox.
After I use
pip install adversarial-robustness-toolbox
to install it, my code can run normally.
We are using tensorflow library for face recognition. Our code works fine for single images. But when we run it as an API, the prediction time increases for every subsequent request. This happens because it searches for previously predicted images as well which should ideally not happen. Please find below the code I am using.
def train:
with tf.Session(config=tf.ConfigProto(log_device_placement=False)) as sess:
test_set = _get_test_data(input_directory)
images, labels = _load_images_and_labels(test_set, image_size=160, batch_size=batch_size,
num_threads=4, num_epochs=1)
_load_model(model_filepath=model_path)
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embedding_layer = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
emb_array, label_array = _create_embeddings(embedding_layer, images, labels, images_placeholder,
phase_train_placeholder, sess)
classifier_filename = classifier_output_path
class_name, prob = _evaluate_classifier(emb_array, label_array, classifier_filename)
coord.request_stop()
coord.join(threads)
def _create_embeddings(embedding_layer, images, labels, images_placeholder, phase_train_placeholder, sess):
emb_array = None
label_array = None
try:
i = 0
while True:
print("batch images")
batch_images, batch_labels = sess.run([images, labels])
print('Processing iteration {} batch of size: {}'.format(i, len(batch_labels)))
emb = sess.run(embedding_layer,
feed_dict={images_placeholder: batch_images, phase_train_placeholder: False})
emb_array = np.concatenate([emb_array, emb]) if emb_array is not None else emb
label_array = np.concatenate([label_array, batch_labels]) if label_array is not None else batch_labels
i += 1
except tf.errors.OutOfRangeError:
pass
return emb_array, label_array
It searches for previously predicted images at
`batch_images, batch_labels = sess.run([images, labels])`
in the create embedding function. I think this is the problem of some unclosed threads because of which sess.run runs for all queued threads. Can anyone help me with this
During debugging I found that previously predicted images information was in default graph which is picked during execution of following lines
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embedding_layer = tf.get_default_graph().get_tensor_by_name("embeddings:0")
so by resetting graph before start of session will solve the problem of scanning previously predicted images as
tf.reset_default_graph()
This is part of my current python code for NN training in python using CNTK module
batch_axis = C.Axis.default_batch_axis()
input_seq_axis = C.Axis.default_dynamic_axis()
input_dynamic_axes = [batch_axis, input_seq_axis]
input_dynamic_axes2 = [batch_axis, input_seq_axis]
input = C.input_variable(n_ins, dynamic_axes=input_dynamic_axes, dtype=numpy.float32)
output = C.input_variable(n_outs, dynamic_axes=input_dynamic_axes2, dtype=numpy.float32)
dnn_model = cntk_model.create_model(input, hidden_layer_type, hidden_layer_size, n_outs)
loss = C.squared_error(dnn_model, output)
error = C.squared_error(dnn_model, output)
lr_schedule = C.learning_rate_schedule(current_finetune_lr, C.UnitType.minibatch)
momentum_schedule = C.momentum_schedule(current_momentum)
learner = C.adam(dnn_model.parameters, lr_schedule, momentum_schedule, unit_gain = False, l1_regularization_weight=l1_reg, l2_regularization_weight= l2_reg)
trainer = C.Trainer(dnn_model, (loss, error), [learner])
And here is code for creating NN model
def create_model(features, hidden_layer_type, hidden_layer_size, n_out):
logger.debug('Creating cntk model')
assert len(hidden_layer_size) == len(hidden_layer_type)
n_layers = len(hidden_layer_size)
my_layers = list()
for i in xrange(n_layers):
if(hidden_layer_type[i] == 'TANH'):
my_layers.append(C.layers.Dense(hidden_layer_size[i], activation=C.tanh, init=C.layers.glorot_uniform()))
elif (hidden_layer_type[i] == 'LSTM'):
my_layers.append(C.layers.Recurrence(C.layers.LSTM(hidden_layer_size[i])))
else:
raise Exception('Unknown hidden layer type')
my_layers.append(C.layers.Dense(n_out, activation=None))
my_model = C.layers.Sequential([my_layers])
my_model = my_model(features)
return my_model
Now, I would like to change a backpropagation, so when the error is calculated not direct network output is used, but the output after some additional calculation. I tried to define something like this
def create_error_function(self, prediction, target):
prediction_denorm = C.element_times(prediction, self.std_vector)
prediction_denorm = C.plus(prediction_denorm, self.mean_vector)
prediction_denorm_rounded = C.round(C.element_times(prediction_denorm[0:5], C.round(prediction_denorm[5])))
prediction_denorm_rounded = C.element_divide(prediction_denorm_rounded, C.round(prediction_denorm[5]))
prediction_norm = C.minus(prediction_denorm_rounded, self.mean_vector[0:5])
prediction_norm = C.element_divide(prediction_norm, self.std_vector[0:5])
first = C.squared_error(prediction_norm, target[0:5])
second = C.minus(C.round(prediction_denorm[5]), self.mean_vector[5])
second = C.element_divide(second, self.std_vector[5])
return C.plus(first, C.squared_error(second, target[5]))
and use it instead standard squared_error.
And the part for NN training
dnn_model = cntk_model.create_model(input, hidden_layer_type, hidden_layer_size, n_outs)
error_function = cntk_model.ErrorFunction(cmp_mean_vector, cmp_std_vector)
loss = error_function.create_error_function(dnn_model, output)
error = error_function.create_error_function(dnn_model, output)
lr_schedule = C.learning_rate_schedule(current_finetune_lr, C.UnitType.minibatch)
momentum_schedule = C.momentum_schedule(current_momentum)
learner = C.adam(dnn_model.parameters, lr_schedule, momentum_schedule, unit_gain = False, l1_regularization_weight=l1_reg,
l2_regularization_weight= l2_reg)
trainer = C.Trainer(dnn_model, (loss, error), [learner])
trainer.train_minibatch({input: temp_train_x, output: temp_train_y})
But after two epochs I start gettting always the same average loss, as my network is not learning
Every time you want to change how backprop works, you need to use stop_gradient. This is the only function whose gradient is different from the gradient of the operation of the forward pass. In the forward pass stop_gradient acts as identity. In the backward pass it blocks the gradient from propagating.
To do an operation f(x) on some x in the forward pass and pretend as if it never happened in the backward pass you need to do something like:
C.stop_gradient(f(x) - x) + x. In your case that would be
norm_features = C.stop_gradient(features/normalization - features) + features