Related
I’m currently trying to adapt a PyTorch Wave-U-Net implementation (https://github.com/f90/Wave-U-Net-Pytorch) so that it’ll work for audio mixing rather than source separation. I’m still quite new to PyTorch so I’m not exactly sure how one would go about structuring a forward pass for multiple inputs (8 audio tracks, each corresponding to a separate stem or instrument) and a single output (a mixture track, produced from the inputs).
The network is below, I don’t think the functions in the middle are relevant or need changing. The output is a dictionary with a key for each instrument matching the estimated audio source, however this is obviously not necessary for my task, I think the only changes needed are in __init__ and forward, but I’m unclear what changes are necessary and how to implement them.
class Waveunet(nn.Module):
def __init__(self, num_inputs, num_channels, num_outputs, instruments, kernel_size, target_output_size, conv_type, res, separate=False, depth=1, strides=2):
super(Waveunet, self).__init__()
self.num_levels = len(num_channels)
self.strides = strides
self.kernel_size = kernel_size
self.num_inputs = num_inputs
self.num_outputs = num_outputs
self.depth = depth
self.instruments = instruments
self.separate = separate
# Only odd filter kernels allowed
assert(kernel_size % 2 == 1)
self.waveunets = nn.ModuleDict()
model_list = instruments if separate else ["ALL"]
# Create a model for each source if we separate sources separately, otherwise only one (model_list=["ALL"])
for instrument in model_list:
module = nn.Module()
module.downsampling_blocks = nn.ModuleList()
module.upsampling_blocks = nn.ModuleList()
for i in range(self.num_levels - 1):
in_ch = num_inputs if i == 0 else num_channels[i]
module.downsampling_blocks.append(
DownsamplingBlock(in_ch, num_channels[i], num_channels[i+1], kernel_size, strides, depth, conv_type, res))
for i in range(0, self.num_levels - 1):
module.upsampling_blocks.append(
UpsamplingBlock(num_channels[-1-i], num_channels[-2-i], num_channels[-2-i], kernel_size, strides, depth, conv_type, res))
module.bottlenecks = nn.ModuleList(
[ConvLayer(num_channels[-1], num_channels[-1], kernel_size, 1, conv_type) for _ in range(depth)])
# Output conv
outputs = num_outputs if separate else num_outputs * len(instruments)
module.output_conv = nn.Conv1d(num_channels[0], outputs, 1)
self.waveunets[instrument] = module
self.set_output_size(target_output_size)
def set_output_size(self, target_output_size):
self.target_output_size = target_output_size
self.input_size, self.output_size = self.check_padding(target_output_size)
print("Using valid convolutions with " + str(self.input_size) + " inputs and " + str(self.output_size) + " outputs")
assert((self.input_size - self.output_size) % 2 == 0)
self.shapes = {"output_start_frame" : (self.input_size - self.output_size) // 2,
"output_end_frame" : (self.input_size - self.output_size) // 2 + self.output_size,
"output_frames" : self.output_size,
"input_frames" : self.input_size}
def check_padding(self, target_output_size):
# Ensure number of outputs covers a whole number of cycles so each output in the cycle is weighted equally during training
bottleneck = 1
while True:
out = self.check_padding_for_bottleneck(bottleneck, target_output_size)
if out is not False:
return out
bottleneck += 1
def check_padding_for_bottleneck(self, bottleneck, target_output_size):
module = self.waveunets[[k for k in self.waveunets.keys()][0]]
try:
curr_size = bottleneck
for idx, block in enumerate(module.upsampling_blocks):
curr_size = block.get_output_size(curr_size)
output_size = curr_size
# Bottleneck-Conv
curr_size = bottleneck
for block in reversed(module.bottlenecks):
curr_size = block.get_input_size(curr_size)
for idx, block in enumerate(reversed(module.downsampling_blocks)):
curr_size = block.get_input_size(curr_size)
assert(output_size >= target_output_size)
return curr_size, output_size
except AssertionError as e:
return False
def forward_module(self, x, module):
'''
A forward pass through a single Wave-U-Net (multiple Wave-U-Nets might be used, one for each source)
:param x: Input mix
:param module: Network module to be used for prediction
:return: Source estimates
'''
shortcuts = []
out = x
# DOWNSAMPLING BLOCKS
for block in module.downsampling_blocks:
out, short = block(out)
shortcuts.append(short)
# BOTTLENECK CONVOLUTION
for conv in module.bottlenecks:
out = conv(out)
# UPSAMPLING BLOCKS
for idx, block in enumerate(module.upsampling_blocks):
out = block(out, shortcuts[-1 - idx])
# OUTPUT CONV
out = module.output_conv(out)
if not self.training: # At test time clip predictions to valid amplitude range
out = out.clamp(min=-1.0, max=1.0)
return out
def forward(self, x, inst=None):
curr_input_size = x.shape[-1]
assert(curr_input_size == self.input_size) # User promises to feed the proper input himself, to get the pre-calculated (NOT the originally desired) output size
if self.separate:
return {inst : self.forward_module(x, self.waveunets[inst])}
else:
assert(len(self.waveunets) == 1)
out = self.forward_module(x, self.waveunets["ALL"])
out_dict = {}
for idx, inst in enumerate(self.instruments):
out_dict[inst] = out[:, idx * self.num_outputs:(idx + 1) * self.num_outputs]
return out_dict'
The following training curve is generated using the same Tensorflow + Keras script written in Python:
RED line uses five features.
GREEN line uses seven features.
BLUE line uses nine features.
Can anyone tell me the probable cause of the oscillation of the GREEN line so that I can troubleshoot my script?
Source code:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Use both gpus for training.
import sys, random
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
import numpy as np
from lxml import etree, objectify
# <editor-fold desc="GPU">
# resolve GPU related issues.
try:
physical_devices = tf.config.list_physical_devices('GPU')
for gpu_instance in physical_devices:
tf.config.experimental.set_memory_growth(gpu_instance, True)
except Exception as e:
pass
# END of try
# </editor-fold>
# <editor-fold desc="Lxml helper">
class LxmlHelper:
#classmethod
def objectify_xml(cls, input_path_dir):
file_dom = etree.parse(input_path_dir) # parse xml and convert it into DOM
file_xml_bin = etree.tostring(file_dom, pretty_print=False, encoding="ascii") # encode DOM into ASCII object
file_xml_text = file_xml_bin.decode() # convert binary ASCII object into ASCII text
objectified_xml = objectify.fromstring(file_xml_text) # convert text into a Doxygen object
return objectified_xml
# </editor-fold>
# <editor-fold desc="def encode(letter)">
def encode(letter: str):
if letter == 'H':
return [1.0, 0.0, 0.0]
elif letter == 'E':
return [0.0, 1.0, 0.0]
elif letter == 'C':
return [0.0, 0.0, 1.0]
elif letter == '-':
return [0.0, 0.0, 0.0]
# END of function
def encode_string_1(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
try:
one_hot_binary_str = one_hot_binary_str + encode(ch)
except Exception as e:
print(pattern_str, one_hot_binary_str, ch)
# END of for loop
return one_hot_binary_str
# END of function
def encode_string_2(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
temp_encoded_vect = [encode(ch)]
one_hot_binary_str = one_hot_binary_str + temp_encoded_vect
# END of for loop
return one_hot_binary_str
# END of function
# </editor-fold>
# <editor-fold desc="def load_data()">
def load_data_k(fname: str, class_index: int, feature_start_index: int, **selection):
"""Loads data for training and validation
:param fname: (``string``) - name of the file with the data
:param selection: (``kwargs``) - see below
:return: four tensorflow tensors: training input, training output, validation input and validation output
:Keyword Arguments:
* *top_n_lines* (``number``) --
take top N lines of the input and disregard the rest
* *random_n_lines* (``number``) --
take random N lines of the input and disregard the rest
* *validation_part* (``float``) --
separate N_lines * given_fraction of the input lines from the training set and use
them for validation. When the given_fraction = 1.0, then the same input set of
N_lines is used both for training and validation (this is the default)
"""
i = 0
file = open(fname)
if "top_n_lines" in selection:
lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
elif "random_n_lines" in selection:
tmp_lines = file.readlines()
lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
else:
lines = file.readlines()
data_x, data_y, data_z = [], [], []
for l in lines:
row = l.strip().split() # return a list of words from the line.
x = [float(ix) for ix in row[feature_start_index:]] # convert 3rd to 20th word into a vector of float numbers.
y = encode(row[class_index]) # convert the 3rd word into binary.
z = encode_string_1(row[class_index+1])
data_x.append(x) # append the vector into 'data_x'
data_y.append(y) # append the vector into 'data_y'
data_z.append(z) # append the vector into 'data_z'
# END for l in lines
num_rows = len(data_x)
given_fraction = selection.get("validation_part", 1.0)
if given_fraction > 0.9999:
valid_x, valid_y, valid_z = data_x, data_y, data_z
else:
n = int(num_rows * given_fraction)
data_x, data_y, data_z = data_x[n:], data_y[n:], data_z[n:]
valid_x, valid_y, valid_z = data_x[:n], data_y[:n], data_z[:n]
# END of if-else block
tx = tf.convert_to_tensor(data_x, np.float32)
ty = tf.convert_to_tensor(data_y, np.float32)
tz = tf.convert_to_tensor(data_z, np.float32)
vx = tf.convert_to_tensor(valid_x, np.float32)
vy = tf.convert_to_tensor(valid_y, np.float32)
vz = tf.convert_to_tensor(valid_z, np.float32)
return tx, ty, tz, vx, vy, vz
# END of the function
# </editor-fold>
# <editor-fold desc="def create_model()">
def create_model(n_hidden_1, n_hidden_2, num_classes, num_features):
# create the model
model = Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=(num_features,)))
model.add(tf.keras.layers.Dense(n_hidden_1, activation='sigmoid'))
model.add(tf.keras.layers.Dense(n_hidden_2, activation='sigmoid'))
###model.add(tf.keras.layers.Dense(n_hidden_3, activation='sigmoid'))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
# instantiate the optimizer
opt = keras.optimizers.SGD(learning_rate=LEARNING_RATE)
# compile the model
model.compile(
optimizer=opt,
loss="categorical_crossentropy",
metrics="categorical_accuracy"
)
# return model
return model
# </editor-fold>
if __name__ == "__main__":
# <editor-fold desc="(input/output parameters)">
my_project_routine = LxmlHelper.objectify_xml("my_project_evaluate.xml")
# input data
INPUT_DATA_FILE = str(my_project_routine.input.input_data_file)
INPUT_PATH = str(my_project_routine.input.input_path)
CLASS_INDEX = int(my_project_routine.input.class_index)
FEATURE_INDEX = int(my_project_routine.input.feature_index)
# output data
OUTPUT_PATH = str(my_project_routine.output.output_path)
MODEL_FILE = str(my_project_routine.output.model_file)
TRAINING_PROGRESS_FILE = str(my_project_routine.output.training_progress_file)
# Learning parameters
LEARNING_RATE = float(my_project_routine.training_params.learning_rate)
EPOCH_SIZE = int(my_project_routine.training_params.epoch_size)
BATCH_SIZE = int(my_project_routine.training_params.batch_size)
INPUT_LINES_COUNT = int(my_project_routine.input.input_lines_count)
VALIDATION_PART = float(my_project_routine.training_params.validation_part)
SAVE_PERIOD = str(my_project_routine.output.save_period)
# NN parameters
HIDDEN_LAYER_1_NEURON_COUNT = int(my_project_routine.hidden_layers.one)
HIDDEN_LAYER_2_NEURON_COUNT = int(my_project_routine.hidden_layers.two)
###HIDDEN_LAYER_3_NEURON_COUNT = int(my_project_routine.hidden_layers.three)
CLASS_COUNT = int(my_project_routine.class_count)
FEATURES_COUNT = int(my_project_routine.features_count)
input_file_path_str = os.path.join(INPUT_PATH, INPUT_DATA_FILE)
training_progress_file_path_str = os.path.join(OUTPUT_PATH, TRAINING_PROGRESS_FILE)
model_file_path = os.path.join(OUTPUT_PATH, MODEL_FILE)
# command-line arg processing
input_file_name_str = None
if len(sys.argv) > 1:
input_file_name_str = sys.argv[1]
else:
input_file_name_str = input_file_path_str
# END of if-else
# </editor-fold>
# <editor-fold desc="(load data from file)">
# load training data from the disk
train_x, train_y, _, validate_x, validate_y, _ = \
load_data_k(
fname=input_file_name_str,
class_index=CLASS_INDEX,
feature_start_index=FEATURE_INDEX,
random_n_lines=INPUT_LINES_COUNT,
validation_part=VALIDATION_PART
)
print("training data size : ", len(train_x))
print("validation data size : ", len(validate_x))
# </editor-fold>
### STEPS_PER_EPOCH = len(train_x) // BATCH_SIZE
### VALIDATION_STEPS = len(validate_x) // BATCH_SIZE
# <editor-fold desc="(model creation)">
# load previously saved NN model
model = None
try:
model = keras.models.load_model(model_file_path)
print("Loading NN model from file.")
model.summary()
except Exception as ex:
print("No NN model found for loading.")
# END of try-except
# </editor-fold>
# <editor-fold desc="(model run)">
# # if there is no model loaded, create a new model
if model is None:
csv_logger = keras.callbacks.CSVLogger(training_progress_file_path_str)
checkpoint = ModelCheckpoint(
model_file_path,
monitor='loss',
verbose=1,
save_best_only=True,
mode='auto',
save_freq='epoch'
)
callbacks_vector = [
csv_logger,
checkpoint
]
# Set mirror strategy
#strategy = tf.distribute.MirroredStrategy(devices=["/device:GPU:0","/device:GPU:1"])
#with strategy.scope():
print("New NN model created.")
# create sequential NN model
model = create_model(
n_hidden_1=HIDDEN_LAYER_1_NEURON_COUNT,
n_hidden_2=HIDDEN_LAYER_2_NEURON_COUNT,
##n_hidden_3=HIDDEN_LAYER_3_NEURON_COUNT,
num_classes=CLASS_COUNT,
num_features=FEATURES_COUNT
)
# Train the model with the new callback
history = model.fit(
train_x, train_y,
validation_data=(validate_x, validate_y),
batch_size=BATCH_SIZE,
epochs=EPOCH_SIZE,
callbacks=[callbacks_vector],
shuffle=True,
verbose=2
)
print(history.history.keys())
# END of ... with
# END of ... if
# </editor-fold>
Plotting Script
import os
from argparse import ArgumentParser
import random
from typing import List
import matplotlib.pyplot as plt
import numpy as np
import math
import sys
import datetime
class Quad:
def __init__(self, x_vector, y_vector, color_char, label_str):
self.__x_vector = x_vector
self.__y_vector = y_vector
self.__color_char = color_char
self.__label_str = label_str
def get_x_vector(self):
return self.__x_vector
def get_y_vector(self):
return self.__y_vector
def get_color_char(self):
return self.__color_char
def get_label_str(self):
return self.__label_str
class HecaPlotClass:
def __init__(self):
self.__x_label_str: str = None
self.__y_label_str: str = None
self.__title_str: str = None
self.__trio_vector: List[Quad] = []
self.__plotter = plt
#property
def x_label_str(self):
return self.__x_label_str
#x_label_str.setter
def x_label_str(self, t):
self.__x_label_str = t
#property
def y_label_str(self):
return self.__y_label_str
#y_label_str.setter
def y_label_str(self, t):
self.__y_label_str = t
#property
def title_str(self):
return self.__title_str
#title_str.setter
def title_str(self, t):
self.__title_str = t
def add_y_axes(self, trio_obj: Quad):
self.__trio_vector.append(trio_obj)
def generate_plot(self):
for obj in self.__trio_vector:
x_vector = obj.get_x_vector()
y_vector = obj.get_y_vector()
label_str = obj.get_label_str()
# print(label_str)
# print(len(x_vector))
# print(len(y_vector))
self.__plotter.plot(
x_vector,
y_vector,
color=obj.get_color_char(),
label=label_str
)
# END of ... for loop
# Naming the x-axis, y_1_vector-axis and the whole graph
self.__plotter.xlabel(self.__x_label_str)
self.__plotter.ylabel(self.__y_label_str)
self.__plotter.title(self.__title_str)
# Adding legend, which helps us recognize the curve according to it's color
self.__plotter.legend()
# To load the display window
#self.__plotter.show()
def save_png(self, output_directory_str):
output_file_str = os.path.join(output_directory_str, self.__title_str + '.png')
self.__plotter.savefig(output_file_str)
def save_pdf(self, output_directory_str):
output_file_str = os.path.join(output_directory_str, self.__title_str + '.pdf')
self.__plotter.savefig(output_file_str)
class MainClass(object):
__colors_vector = ['red', 'green', 'blue', 'cyan', 'magenta', 'yellow', 'orange', 'lightgreen', 'crimson']
__working_dir = r"."
__file_names_vector = ["training_progress-32.txt", "training_progress-64.txt", "training_progress-128.txt"]
__input_files_vector = []
__output_directory = None
__column_no_int = 0
__split_percentage_at_tail_int = 100
__is_pdf_output = False
__is_png_output = False
# <editor-fold desc="def load_data()">
#classmethod
def __load_data(cls, fname: str, percetage_int:int, column_no_int:int):
np_array = np.loadtxt(
fname,
# usecols=range(1,11),
dtype=np.float32,
skiprows=1,
delimiter=","
)
size_vector = np_array.shape
array_len_int = size_vector[0]
rows_count_int = int(percetage_int * array_len_int / 100)
np_array = np_array[-rows_count_int:]
x = np_array[:, 0]
y = np_array[:, column_no_int]
return x, y
# END of the function
# </editor-fold>
# <editor-fold desc="(__parse_args())">
#classmethod
def __parse_args(cls):
# initialize argument parser
my_parser = ArgumentParser()
my_parser.add_argument("-c", help="column no.", type=int)
my_parser.add_argument('-i', nargs='+', help='a list of input files', required=True)
my_parser.add_argument("-o", help="output directory", type=str)
my_parser.add_argument("-n", help="percentage of data to split from tail", type=float)
my_parser.add_argument("--pdf", help="PDF output", action='store_true')
my_parser.add_argument("--png", help="PNG output", action='store_true')
# parse the argument
args = my_parser.parse_args()
cls.__input_files_vector = args.i
cls.__output_directory = args.o
cls.__split_percentage_at_tail_int = args.n
cls.__column_no_int = args.c
cls.__is_pdf_output = args.pdf
cls.__is_png_output = args.png
# </editor-fold>
#classmethod
def main(cls):
cls.__parse_args()
if cls.__input_files_vector is None:
cls.__input_files_vector = cls.__file_names_vector
if cls.__output_directory is None:
cls.__output_directory = cls.__working_dir
if cls.__split_percentage_at_tail_int is None:
cls.__split_percentage_at_tail_int = 100
if cls.__column_no_int is None:
cls.__column_no_int = 1
my_project_plot_obj = HecaPlotClass()
i = 0
for file_path_str in cls.__input_files_vector:
print(file_path_str)
x_vector, y_vector = cls.__load_data(os.path.join(cls.__working_dir, file_path_str), cls.__split_percentage_at_tail_int, cls.__column_no_int)
my_project_plot_obj.x_label_str = "Epoch"
my_project_plot_obj.y_label_str = "Accuracy"
my_project_plot_obj.title_str = "training_plot-{date:%Y-%m-%d_%H:%M:%S}".format(date=datetime.datetime.now())
my_project_plot_obj.x_axis_vector = x_vector
if i == 0:
random_int = 0
else:
random_int = i % (len(cls.__colors_vector)-1)
# END of ... if
print("random_int : ", random_int)
my_project_plot_obj.add_y_axes(Quad(x_vector, y_vector, cls.__colors_vector[random_int], file_path_str))
i = i + 1
# END of ... for loop
my_project_plot_obj.generate_plot()
my_project_plot_obj.save_png(cls.__output_directory)
my_project_plot_obj.save_pdf(cls.__output_directory)
if __name__ == "__main__":
MainClass.main()
The primary reason could be improper (non-random ~ ordered) distribution of data.
If you notice the accuracy beyond epoch 180, there is a orderly switching between the accuracy between ~0.43 (approx.) and ~0.33 (~approx.), and occasionally ~0.23 (approx.). The more important thing to notice is that the accuracy is decreasing (there's no improvement in validation accuracy) as we increase the epochs.
The accuracy can increase in such cases if you (1) reduce batch size, or (2) use a better optimizer like Adam. And check the learning rate.
These changes can help the shift and oscillation, as well.
Additionally, Running average of the accuracy can be plotted to avoid the oscillation. This is again a mitigation scheme rather than a correction scheme. But, what it does is removes the order (partition of the data) and mixes the nearby data.
Lastly, I would also reshuffle the data and normalize after each layer. See if that helps.
Generally, sharp jumps and flat lines in the accuracy usually mean that a group of examples is classified as a given class at a same time. If your dataset contains, say, 50 examples with the same combination of 7 features then they would go into the same class at the same time. This is what probably causes sharp jumps - identical or similar examples clustered together.
So for example, if you have 50 men aged 64, and a decision boundary to classify them as more prone to an illness shifts from >65 to >63, then accuracy changes rapidly as all of them change classification at the same time.
Regarding the oscillation of the curve - due to the fact above, oscillation will be amplified by small changes in learning. Your network learns based on cross entropy, which means that it minimizes the difference between target and your predictions. This means that it operates on the difference between probability and target (say, 0.3 vs class 0) instead of class and target like accuracy (so, 0 vs 0) in the same example. Cross entropy is much more smooth as it is not affected by the issue outlined above.
I am trying to get the gradient with respect to a specific images (sitting in adv_loader). These images are loaded in adv_loader. I tried just taking the code that was calculating the gradient in the backprop step but Yolo3 doesn’t seem to have a grad attribute. Any ideas on how to get that?
import mxnet as mx
import gluoncv as gcv
import numpy as np
import matplotlib.pyplot as plt
from mxnet import gluon
from gluoncv.loss import YOLOV3Loss
import time
ctx = [mx.gpu(i) for i in range(mx.context.num_gpus())] if mx.context.num_gpus()>0 else [mx.cpu()]
#Datasets
train = gcv.data.RecordFileDetection('./data/train.rec',coord_normalized=False)
val = gcv.data.RecordFileDetection('./data/val.rec',coord_normalized=False)
classes = ['Passenger Vehicle','Small Car','Bus']
net = gcv.model_zoo.yolo3_mobilenet0_25_custom(pretrained_base=False,classes=classes,ctx=ctx)
net.collect_params().initialize(force_reinit=True,ctx=ctx)
import multiprocessing as mp
### The following looks nasty, and I apologize for the difficulty but basically what we are doing is
# transforming our data so it is in the correct format for yolo3
batch_size = 32 #This can be changed, but it determines how often we update our model.
num_workers = mp.cpu_count()//2
### We will import the following to reduce what i need to type (I am not sure about you but I like being lazy)
from mxnet import autograd
### 416 is our width/height we want the network to train on
sizes = 328
train_transform = gcv.data.transforms.presets.yolo.YOLO3DefaultTrainTransform(sizes,sizes, net)
# return stacked images, center_targets, scale_targets, gradient weights, objectness_targets, class_targets
# additionally, return padded ground truth bboxes, so there are 7 components returned by dataloader
batchify_fn = gcv.data.batchify.Tuple(*([gcv.data.batchify.Stack() for _ in range(6)] + [gcv.data.batchify.Pad(axis=0, pad_val=-1) for _ in range(1)]))
train_loader = mx.gluon.data.DataLoader(train.transform(train_transform), batch_size, shuffle=True,
batchify_fn=batchify_fn, last_batch='rollover',num_workers=num_workers,prefetch=num_workers + num_workers//2)
val_batchify_fn = gcv.data.batchify.Tuple(gcv.data.batchify.Stack(), gcv.data.batchify.Pad(pad_val=-1))
val_transform = gcv.data.transforms.presets.yolo.YOLO3DefaultValTransform(sizes,sizes)
val_loader = mx.gluon.data.DataLoader(
val.transform(val_transform),
batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep',num_workers=num_workers,prefetch=num_workers + num_workers//2)
adv_loader = mx.gluon.data.DataLoader(train.transform(train_transform), batch_size, shuffle=True,
batchify_fn=batchify_fn, last_batch='rollover',num_workers=num_workers,prefetch=num_workers + num_workers//2) #this is a placeholder for data that will generate advarsaial examples
#How we will validate our model
def validate(net, val_data, ctx, eval_metric):
"""Test on validation dataset."""
eval_metric.reset()
# set nms threshold and topk constraint
net.set_nms(nms_thresh=0.45, nms_topk=455)
mx.nd.waitall()
net.hybridize()
for batch in val_data:
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
label = mx.gluon.utils.split_and_load(batch[1], ctx_list=ctx,batch_axis=0, even_split=False)
det_bboxes = []
det_ids = []
det_scores = []
gt_bboxes = []
gt_ids = []
gt_difficults = []
for x, y in zip(data, label):
# get prediction results
ids, scores, bboxes = net(x)
det_ids.append(ids)
det_scores.append(scores)
# clip to image size
det_bboxes.append(bboxes.clip(0, batch[0].shape[2]))
# split ground truths
gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))
gt_difficults.append(y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None)
# update metric
eval_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficults)
return eval_metric.get()
eval_metric = gcv.utils.metrics.voc_detection.VOCMApMetric(class_names=classes)
nepochs=11
net.initialize(force_reinit=True)
net.collect_params().reset_ctx(ctx)
#Grab a trainer or optimizer to perform the optimization
trainer = mx.gluon.Trainer(net.collect_params(),'adam',
{'learning_rate':0.001},
kvstore='device')
for i in range(nepochs):
now = time.time()
mx.nd.waitall()
net.hybridize(static_alloc=True,static_shape=True)
for ixl,batch in enumerate(train_loader):
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
# objectness, center_targets, scale_targets, weights, class_targets
fixed_targets = [mx.gluon.utils.split_and_load(batch[it], ctx_list=ctx, batch_axis=0) for it in range(1, 6)]
gt_boxes = mx.gluon.utils.split_and_load(batch[6], ctx_list=ctx, batch_axis=0)
sum_losses = []
with autograd.record():
for ix, x in enumerate(data):
obj_loss, center_loss, scale_loss, cls_loss = net(x, gt_boxes[ix], *[ft[ix] for ft in fixed_targets])
sum_losses.append(obj_loss + center_loss + scale_loss + cls_loss)
autograd.backward(sum_losses)
trainer.step(batch_size)
for ixl,batch in enumerate(adv_loader):
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
# objectness, center_targets, scale_targets, weights, class_targets
fixed_targets = [mx.gluon.utils.split_and_load(batch[it], ctx_list=ctx, batch_axis=0) for it in range(1, 6)]
gt_boxes = mx.gluon.utils.split_and_load(batch[6], ctx_list=ctx, batch_axis=0)
sum_losses = []
with autograd.record():
for ix, x in enumerate(data):
obj_loss, center_loss, scale_loss, cls_loss = net(x, gt_boxes[ix], *[ft[ix] for ft in fixed_targets])
sum_losses.append(obj_loss + center_loss + scale_loss + cls_loss)
autograd.backward(sum_losses,retain_graph=True)
print(net.grad.asnumpy())
I am getting told here that AttributeError: 'YOLOV3' object has no attribute 'grad'. I am using this documentation https://mxnet.apache.org/versions/1.6/api/python/docs/tutorials/packages/autograd/index.html
Any idea how to get an image and pass it through to the gradient of the yolo loss function?
I want to include a Sparse Gaussian Process model (from GPflow library) into another project. The problem is that I can't call the prediction function for several inputs once, but I have to call it sequentially. I've checked the predictive function predict_F in SGPR class (https://github.com/GPflow/GPflow/blob/master/gpflow/models/sgpr.py) and found that I could precompute a lot of things in advance. Thus, I made a child class of SGPR and wrote a method precompute, modified predictive function:
#params_as_tensors
def precompute(self):
p_num_inducing = len(self.feature)
p_err = self.Y - self.mean_function(self.X)
p_Kuf = self.feature.Kuf(self.kern, self.X)
p_Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
p_sigma = tf.sqrt(self.likelihood.variance)
self.p_L = tf.cholesky(p_Kuu)
p_A = tf.matrix_triangular_solve(self.p_L, p_Kuf, lower=True) / p_sigma
p_B = tf.matmul(p_A, p_A, transpose_b=True) + tf.eye(p_num_inducing, dtype=settings.tf_float)
self.p_LB = tf.cholesky(p_B)
p_Aerr = tf.matmul(p_A, p_err)
self.p_c = tf.matrix_triangular_solve(self.p_LB, p_Aerr, lower=True) / p_sigma
#params_as_tensors
def _build_predict(self, Xnew, full_cov=False):
"""
Compute the mean and variance of the latent function at some new points
Xnew. For a derivation of the terms in here, see the associated SGPR
notebook.
"""
Kus = self.feature.Kuf(self.kern, Xnew)
tmp1 = tf.matrix_triangular_solve(self.p_L, Kus, lower=True)
tmp2 = tf.matrix_triangular_solve(self.p_LB, tmp1, lower=True)
mean = tf.matmul(tmp2, self.p_c, transpose_a=True)
if full_cov:
var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
- tf.matmul(tmp1, tmp1, transpose_a=True)
shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 2), shape)
else:
var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
- tf.reduce_sum(tf.square(tmp1), 0)
shape = tf.stack([1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 1), shape)
return mean + self.mean_function(Xnew), var
But when I run the code, there is no difference in speed. I suppose that tensorflow executes all the expressions only when I call predict_f, but I have no idea how to explicitly precompute some tensors. Hope tensorflow gurus can help me, thanks in advance!
I've found a silly but straightforward solution to this problem. Here is a wrapper for the SGPR class, that precomputes some common matrices and then uses them for predictions.
from gpflow.models import GPModel, SGPR
from gpflow.decors import params_as_tensors, autoflow
from gpflow import settings
from gpflow.params import Parameter, DataHolder
import tensorflow as tf
class fastSGPR(SGPR, GPModel):
def __init__(self,X_tr, Y_tr, kernel, Zp):
gpflow.models.SGPR.__init__(self,X_tr, Y_tr, kern=kernel, Z=Zp)
print("Model has been initialized")
#autoflow()
#params_as_tensors
def precompute(self):
print("Precomputing required tensors...")
p_num_inducing = len(self.feature)
p_err = self.Y - self.mean_function(self.X)
p_Kuf = self.feature.Kuf(self.kern, self.X)
p_Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
p_sigma = tf.sqrt(self.likelihood.variance)
self.p_L = tf.cholesky(p_Kuu)
p_A = tf.matrix_triangular_solve(self.p_L, p_Kuf, lower=True) / p_sigma
p_B = tf.matmul(p_A, p_A, transpose_b=True) + tf.eye(p_num_inducing, dtype=settings.tf_float)
self.p_LB = tf.cholesky(p_B)
p_Aerr = tf.matmul(p_A, p_err)
self.p_c = tf.matrix_triangular_solve(self.p_LB, p_Aerr, lower=True) / p_sigma
print("Tensors have been precomputed")
return self.p_L, self.p_LB, self.p_c
#autoflow((settings.float_type, [None, None]), (settings.float_type, [None, None]), (settings.float_type, [None, None]), (settings.float_type, [None, None]))
def predict_f(self, Xnew, L, LB, c):
"""
Compute the mean and variance of the latent function(s) at the points
Xnew.
"""
return self._build_predict(Xnew, L, LB, c)
#params_as_tensors
def _build_predict(self, Xnew, L, LB, c, full_cov=False):
"""
Compute the mean and variance of the latent function at some new points
Xnew. For a derivation of the terms in here, see the associated SGPR
notebook.
"""
Kus = self.feature.Kuf(self.kern, Xnew)
tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
mean = tf.matmul(tmp2, c, transpose_a=True)
if full_cov:
var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
- tf.matmul(tmp1, tmp1, transpose_a=True)
shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 2), shape)
else:
var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
- tf.reduce_sum(tf.square(tmp1), 0)
shape = tf.stack([1, tf.shape(self.Y)[1]])
var = tf.tile(tf.expand_dims(var, 1), shape)
return mean + self.mean_function(Xnew), var
def assign(self, params):
params1 = dict(params)
params1["fastSGPR/kern/variance"] = params1.pop("SGPR/kern/variance")
params1["fastSGPR/kern/lengthscales"] = params1.pop("SGPR/kern/lengthscales")
params1["fastSGPR/likelihood/variance"] = params1.pop("SGPR/likelihood/variance")
params1["fastSGPR/feature/Z"] = params1.pop("SGPR/feature/Z")
SGPR.assign(self,params1)
I'm working on a seq2seq RNN generating an output sequence of labels given a seed label. During the inference step I'd like to generate sequences containing only unique labels (i.e. skip labels that have already been added to the output sequence). To do this I created a sampler object that tries to remember the labels that have been added to the output and reduce their logit value to -np.inf.
Here is the sampler code:
class InferenceSampler(object):
def __init__(self, out_weights, out_biases):
self._out_weights = tf.transpose(out_weights)
self._out_biases = out_biases
self._n_tracks = out_weights.shape[0]
self.ids_mask = tf.zeros([self._n_tracks], name="playlist_mask")
def __call__(self, decoder_outputs):
_logits = tf.matmul(decoder_outputs, self._out_weights)
_logits = tf.nn.bias_add(_logits, self._out_biases)
# apply mask
_logits = _logits + self.ids_mask
_sample_ids = tf.cast(tf.argmax(_logits, axis=-1), tf.int32)
# update mask
step_ids_mask = tf.sparse_to_dense(_sample_ids, [self._n_tracks], -np.inf)
self.ids_mask = self.ids_mask + step_ids_mask
return _sample_ids
The code of the inference graph looks like this:
self._max_playlist_len = tf.placeholder(tf.int32, ())
self._start_tokens = tf.placeholder(tf.int32, [None])
sample_fn = InferenceSampler(out_weights, out_biases)
with tf.name_scope("inf_decoder"):
def _end_fn(sample_ids):
return tf.equal(sample_ids, PAD_ITEM_ID)
def _next_inputs_fn(sample_ids):
return tf.nn.embedding_lookup(
track_embs,
sample_ids
)
_start_inputs = tf.nn.embedding_lookup(
track_embs,
self._start_tokens
)
helper = tf.contrib.seq2seq.InferenceHelper(
sample_fn=sample_fn,
sample_shape=[],
sample_dtype=tf.int32,
start_inputs=_start_inputs,
end_fn=_end_fn,
next_inputs_fn=_next_inputs_fn
)
decoder = tf.contrib.seq2seq.BasicDecoder(
rnn_cell,
helper,
rnn_cell.zero_state(tf.shape(self._start_tokens)[0], tf.float32),
output_layer=projection_layer
)
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
decoder,
maximum_iterations=self._max_playlist_len
)
self.playlists = outputs.sample_id
Unfortunately, the results still have duplicated labels. Moreover, when I try to get access to the sample_fn.ids_mask I receive an error message: ValueError: Operation 'inf_decoder/decoder/while/BasicDecoderStep/add_1' has been marked as not fetchable.
What am I doing wrong? And how legal is to create such sample_fn?
Trying to overcome the problem, I updated the inference in such a way that at each RNN step I output the vector of embeddings instead of item_id. After the inference is finished, I convert embeddings to item_ids.
First of all, this solution minimizes the number of operations. Secondly, since I use LSTM/GRU cells, they minimize the probability to observe two absolutely similar outputs on different steps of RNN's inference.
The new code looks like this:
with tf.name_scope("inf_decoder"):
def _sample_fn(decoder_outputs):
return decoder_outputs
def _end_fn(sample_ids):
# infinite
return tf.tile([False], [n_seeds])
_start_inputs = tf.nn.embedding_lookup(
track_embs,
self._seed_items
)
helper = tf.contrib.seq2seq.InferenceHelper(
sample_fn=_sample_fn,
sample_shape=[self.emb_size],
sample_dtype=tf.float32,
start_inputs=_start_inputs,
end_fn=_end_fn,
)
decoder = tf.contrib.seq2seq.BasicDecoder(
rnn_cell,
helper,
rnn_cell.zero_state(n_seeds, tf.float32),
output_layer=projection_layer
)
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
decoder,
maximum_iterations=self._max_playlist_len
)
flat_rnn_output = tf.reshape(outputs.rnn_output, [-1, self.emb_size])
flat_logits = tf.matmul(flat_rnn_output, out_weights, transpose_b=True)
flat_logits = tf.nn.bias_add(flat_logits, out_biases)
item_ids = tf.cast(tf.argmax(flat_logits, axis=-1), tf.int32)
playlists = tf.reshape(item_ids, [n_seeds, -1])
self.playlists = playlists
So, after some investigation I found answers to all my questions related to this thread. The main question was: why self.ids_mask in InferenceSampler does not update? The reason is in the internals of dynamic_decode. According to this answer in Tensorflow's issue tracker:
... only tensors defined inside the loop will be evaluated
every loop iteration. All tensors defined outside a loop will be
evaluated exactly once.
In my case, self.ids_mask is specified outside the loop. That means that I need to re-write dynamic_decode to get what I want. The code below is a bit modified version of the initial task, but it does almost the same.
Let's start with a new dynamic_decode which should create and update the mask collecting sample_ids that have been already predicted. I removed the code which i didn't modify, follow the initial_mask and mask variables.
New dynamic_decode:
def dynamic_decode(decoder,
output_time_major=False,
impute_finished=False,
maximum_iterations=None,
parallel_iterations=32,
swap_memory=False,
scope=None):
...
initial_finished, initial_inputs, initial_mask, initial_state = decoder.initialize()
...
def body(time, outputs_ta, state, inputs, finished, sequence_lengths, mask):
"""Internal while_loop body.
Args:
time: scalar int32 tensor.
outputs_ta: structure of TensorArray.
state: (structure of) state tensors and TensorArrays.
inputs: (structure of) input tensors.
finished: bool tensor (keeping track of what's finished).
sequence_lengths: int32 tensor (keeping track of time of finish).
mask: SparseTensor to remove already predicted items
Returns:
`(time + 1, outputs_ta, next_state, next_inputs, next_finished,
next_sequence_lengths, next_mask)`.
```
"""
(next_outputs, decoder_state, next_inputs, next_mask,
decoder_finished) = decoder.step(time, inputs, state, mask)
...
nest.assert_same_structure(state, decoder_state)
nest.assert_same_structure(outputs_ta, next_outputs)
nest.assert_same_structure(inputs, next_inputs)
nest.assert_same_structure(mask, next_mask)
...
return (time + 1, outputs_ta, next_state, next_inputs, next_finished,
next_sequence_lengths, next_mask)
res = control_flow_ops.while_loop(
condition,
body,
loop_vars=[
initial_time, initial_outputs_ta, initial_state, initial_inputs,
initial_finished, initial_sequence_lengths, initial_mask,
],
parallel_iterations=parallel_iterations,
swap_memory=swap_memory)
...
return final_outputs, final_state, final_sequence_lengths
At the next step mask should be passed to Decoder and Helper. Here are the updated versions of BasicDecoder and InferenceHelper:
MaskedDecoder:
class MaskedDecoder(BasicDecoder):
def step(self, time, inputs, state, mask, name=None):
with ops.name_scope(name, "MaskedDecoderStep", (time, inputs, state, mask)):
cell_outputs, cell_state = self._cell(inputs, state)
if self._output_layer is not None:
cell_outputs = self._output_layer(cell_outputs)
sample_ids = self._helper.sample(
time=time,
outputs=cell_outputs,
state=cell_state,
mask=mask)
(finished, next_inputs, next_state, next_mask) = self._helper.next_inputs(
time=time,
outputs=cell_outputs,
state=cell_state,
mask=mask,
sample_ids=sample_ids)
outputs = BasicDecoderOutput(cell_outputs, sample_ids)
return (outputs, next_state, next_inputs, next_mask, finished)
MaskedInferenceHelper:
class MaskedInferenceHelper(Helper):
"""A helper to use during inference with a custom sampling function."""
def __init__(self, norm_track_embs, features, start_sample_ids):
self._norm_track_embs = norm_track_embs
self._batch_size = tf.shape(start_sample_ids)[0]
self._n_tracks = tf.shape(norm_track_embs)[0]
self._start_sample_ids = start_sample_ids
self._sample_shape = tf.TensorShape([])
self._sample_dtype = tf.int32
self._features = features
def _get_sparse_mask(self, sample_ids):
_mask_shape = tf.convert_to_tensor([
tf.cast(self._batch_size, dtype=tf.int64),
tf.cast(self._n_tracks, dtype=tf.int64)
])
_st_rows = tf.range(0, self._batch_size)
_st_cols = sample_ids
_st_indices = tf.cast(tf.stack([_st_rows, _st_cols], axis=1), dtype=tf.int64)
_st_values = tf.fill([self._batch_size], np.inf)
return tf.SparseTensor(_st_indices, _st_values, _mask_shape)
...
def initialize(self, name=None):
finished = tf.tile([False], [self._batch_size])
start_embs = tf.nn.embedding_lookup(self._norm_track_embs, self._start_sample_ids)
start_inputs = tf.concat([start_embs, self._features], axis=1)
mask = self._get_sparse_mask(self._start_sample_ids)
return finished, start_inputs, mask
def sample(self, time, outputs, state, mask, name=None):
del time, state # unused by sample
outputs = tf.nn.l2_normalize(outputs, axis=-1)
cos_sims = tf.matmul(outputs, self._norm_track_embs, transpose_b=True)
cos_sims = cos_sims - tf.sparse_tensor_to_dense(mask)
sample_ids = tf.cast(tf.argmax(cos_sims, axis=-1), tf.int32)
return sample_ids
def next_inputs(self, time, outputs, state, sample_ids, mask, name=None):
del time, outputs # unused by next_inputs
finished = tf.tile([False], [self._batch_size])
next_embs = tf.nn.embedding_lookup(self._norm_track_embs, sample_ids)
next_inputs = tf.concat([next_embs, self._features], axis=1)
next_mask = tf.sparse_add(mask, self._get_sparse_mask(sample_ids))
return finished, next_inputs, state, next_mask
So, now I can generate inferences without repetition of already predicted items.