Here, I have LSTM Autoencoder written in Keras. I want to convert the code to Chainer.
import numpy as np
from keras.layers import Input, GRU
from keras.models import Model
input_feat = Input(shape=(30, 2000))
l = GRU( 100, return_sequences=True, activation="tanh", recurrent_activation="hard_sigmoid")(input_feat)
l = GRU(2000, return_sequences=True, activation="tanh", recurrent_activation="hard_sigmoid")(l)
model = Model(input_feat, l)
model.compile(optimizer="RMSprop", loss="mean_squared_error")
feat = np.load("feat.npy")
model.fit(feat, feat[:, ::-1, :], epochs=200, batch_size=250)
feat is numpy whose dimension is (269, 30, 2000). I could run above code and the result was reasonable. I had written below Chainer code.
import numpy as np
from chainer import Chain, Variable, optimizers
import chainer.functions as F
import chainer.links as L
class GRUAutoEncoder(Chain):
def __init__(self):
super().__init__()
with self.init_scope():
self.encode = L.GRU(2000, 100)
self.decode = L.GRU(100, 2000)
def __call__(self, h, mode):
if mode == "encode":
h = F.tanh(self.encode(h))
return h
if mode == "decode":
h = F.tanh(self.decode(h))
return h
def reset(self):
self.encode.reset_state()
self.decode.reset_state()
def main():
feat = np.load("feat.npy") #(269, 30, 2000)
gru_autoencoder = GRUAutoEncoder()
optimizer = optimizers.RMSprop(lr=0.01).setup(gru_autoencoder)
N = len(feat)
batch_size = 250
for epoch in range(200):
index = np.random.randint(0, N-batch_size+1)
input_splices = feat[index:index+batch_size] #(250, 30, 2000)
#Encoding
input_vector = np.zeros((30, batch_size, 2000), dtype="float32")
h = []
for i in range(frame_rate):
input_vector[i] = input_splices[:, i, :] #(250, 1, 2000)
tmp = Variable(input_vector[i])
h.append(gru_autoencoder(tmp, "encode")) #(250, 100)
#Decoding
output_vector = []
for i in range(frame_rate):
tmp = h[i]
output_vector.append(gru_autoencoder(tmp, "decode"))
x = input_vector[0]
t = output_vector[0]
for i in range(len(output_vector)):
x = F.concat((x,input_vector[i]), axis=1)
t = F.concat((t,output_vector[i]), axis=1)
loss = F.mean_squared_error(x, t)
gru_autoencoder.cleargrads()
loss.backward()
optimizer.update()
gru_autoencoder.reset()
if __name__ == "__main__":
main()
But the result of above code was not reasonable. I think the Chainer code has something wrong but I cannot find where it is.
In Keras code,
model.fit(feat, feat[:, ::-1, :])
So, I tried to reverse output_vector in Chainer code,
output_vector.reverse()
but the result was not still reasonable.
.. note: This answer is a translation of [Japanese SO].(https://ja.stackoverflow.com/questions/52162/keras%E3%81%AE%E3%82%B3%E3%83%BC%E3%83%89%E3%82%92chainer%E3%81%AB%E6%9B%B8%E3%81%8D%E6%8F%9B%E3%81%88%E3%81%9F%E3%81%84lstm-autoencoder%E3%81%AE%E5%AE%9F%E8%A3%85/52213#52213)
You should avoid using L.GRU and should use L.NStepGRU, because for L.GRU you have to write "recurrence-aware" code. In other words, you have to apply L.GRU multiple times to one timeseries, therefore "batch" must be treated with great care. L.NStepGRU (with n_layers=1) wraps the batch-processing, so it would be user-friendly.
An instance of L.StepGRU takes two input arguments: one is initial state, and the other is a list of timeserieses, which composes a batch. Conventionally, the initial state is None.
Therefore, the whole answer for your question is as follows.
### dataset.py
from chainer.dataset import DatasetMixin
import numpy as np
class MyDataset(DatasetMixin):
N_SAMPLES = 269
N_TIMESERIES = 30
N_DIMS = 2000
def __init__(self):
super().__init__()
self.data = np.random.randn(self.N_SAMPLES, self.N_TIMESERIES, self.N_DIMS) \
.astype(np.float32)
def __len__(self):
return self.N_SAMPLES
def get_example(self, i):
return self.data[i, :, :]
### model.py
import chainer
from chainer import links as L
from chainer import functions as F
from chainer.link import Chain
class MyModel(Chain):
N_IN_CHANNEL = 2000
N_HIDDEN_CHANNEL = 100
N_OUT_CHANNEL = 2000
def __init__(self):
super().__init__()
self.encoder = L.NStepGRU(n_layers=1, in_size=self.N_IN_CHANNEL, out_size=self.N_HIDDEN_CHANNEL, dropout=0)
self.decoder = L.NStepGRU(n_layers=1, in_size=self.N_HIDDEN_CHANNEL, out_size=self.N_OUT_CHANNEL, dropout=0)
def to_gpu(self, device=None):
self.encoder.to_gpu(device)
self.decoder.to_gpu(device)
def to_cpu(self):
self.encoder.to_cpu()
self.decoder.to_cpu()
#staticmethod
def flip_list(source_list):
return [F.flip(source, axis=1) for source in source_list]
def __call__(self, source_list):
"""
.. note:
This implementation makes use of "auto-encoding"
by avoiding redundant copy in GPU device.
In the typical implementation, this function should receive
both of ``source_list`` and ``target_list``.
"""
target_list = self.flip_list(source_list)
_, h_list = self.encoder(hx=None, xs=source_list)
_, predicted_list = self.decoder(hx=None, xs=h_list)
diff_list = [F.mean_squared_error(target, predicted).reshape((1,)) for target, predicted in zip(target_list, predicted_list)]
loss = F.sum(F.concat(diff_list, axis=0))
chainer.report({'loss': loss}, self)
return loss
### converter.py (referring examples/seq2seq/seq2seq.py)
from chainer.dataset import to_device
def convert(batch, device):
"""
.. note:
batch must be list(batch_size) of array
"""
if device is None:
return batch
else:
return [to_device(device, x) for x in batch]
### train.py
from chainer.iterators import SerialIterator
from chainer.optimizers import RMSprop
from chainer.training.updaters import StandardUpdater
from chainer.training.trainer import Trainer
dataset = MyDataset()
BATCH_SIZE = 32
iterator = SerialIterator(dataset, BATCH_SIZE)
model = MyModel()
optimizer = RMSprop()
optimizer.setup(model)
updater = StandardUpdater(iterator, optimizer, convert, device=0)
trainer = Trainer(updater, (100, 'iteration'))
from chainer.training.extensions import snapshot_object
trainer.extend(snapshot_object(model, "model_iter_{.updater.iteration}"), trigger=(10, 'iteration'))
from chainer.training.extensions import LogReport, PrintReport, ProgressBar
trainer.extend(LogReport(['epoch', 'iteration', 'main/loss'], (1, 'iteration')))
trainer.extend(PrintReport(['epoch', 'iteration', 'main/loss']), trigger=(1, 'iteration'))
trainer.extend(ProgressBar(update_interval=1))
trainer.run()
Related
Iam trying to implement DDPG algorithm that take a state of 8 values and output action of size=4.
The actions are lower bounded by [5,5,0,0] and upper bounded by [40,40,15,15].
When I train my DDPG it always choose one of the boundaries for example [5,40,0,15] or [40,40,0,0].
I implemented SAC algorithm after that and it works, knowing that I tried my DDPG agent on a gym game and it works. Maybe the problem is with upscaling the actions in the policy agent.
here have a look on the model I have
class Buffers:
def __init__(self, buffer_capacity=100000, batch_size=64):
# Number of "experiences" to store at max
self.buffer_capacity = buffer_capacity
num_states = 8
num_actions = 4
# Num of tuples to train on.
self.batch_size = batch_size
# Its tells us num of times record() was called.
self.buffer_counter = 0
# Instead of list of tuples as the exp.replay concept go
# We use different np.arrays for each tuple element
self.state_buffer = np.zeros((self.buffer_capacity, num_states))
self.action_buffer = np.zeros((self.buffer_capacity, num_actions))
self.reward_buffer = np.zeros((self.buffer_capacity, 1))
self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
# Takes (s,a,r,s') obervation tuple as input
def record(self, obs_tuple):
# Set index to zero if buffer_capacity is exceeded,
# replacing old records
index = self.buffer_counter % self.buffer_capacity
self.state_buffer[index] = obs_tuple[0]
self.action_buffer[index] = obs_tuple[1]
self.reward_buffer[index] = obs_tuple[2]
self.next_state_buffer[index] = obs_tuple[3]
self.buffer_counter += 1
import random
import numpy as np
from collections import deque
import tensorflow as tf
from keras.models import Sequential
from keras.callbacks import History
from keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from keras import backend as K
from tensorflow.keras import layers
import keras.backend as K
import import_ipynb
from Noise import OUActionNoise
import tensorflow as tf
keras = tf.keras
#tf.compat.v1.disable_eager_execution()
class DQLearningAgent:
def __init__(self, seed ,discount_factor =0.95):
self.tau = 0.05
self.gamma = discount_factor
self.critic_lr = 0.002
self.actor_lr = 0.001
self.std_dev = [0.7,0.7,0.2,0.2]
self.buffer = Buffers(50000, 64)
self.M = 16
self.upper_bound = [40,40,self.M-1 ,self.M-1 ]
self.lower_bound = [5,5,0,0]
self.action_scale = (np.array(self.upper_bound) - np.array(self.lower_bound)) / 2.0
self.action_bias = (np.array(self.upper_bound) + np.array(self.lower_bound)) / 2.0
self._state_size = 8 # unchange
self._action_size = 4
self.seed = seed
# random.seed(self.seed)
# np.random.seed(self.seed)
self.actor_model = self.get_actor()
self.critic_model = self.get_critic()
self.target_actor = self.get_actor()
self.target_critic = self.get_critic()
# Making the weights equal initially
self.target_actor.set_weights(self.actor_model.get_weights())
self.target_critic.set_weights(self.critic_model.get_weights())
self.critic_optimizer = tf.keras.optimizers.Adam(self.critic_lr)
self.actor_optimizer = tf.keras.optimizers.Adam(self.actor_lr)
self.ou_noise = OUActionNoise(mean=np.zeros(self._action_size ), std_deviation=np.array(self.std_dev))
def get_actor(self):
# Initialize weights between -3e-3 and 3-e3
last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
inputs = layers.Input(shape=(self._state_size,))
out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(inputs)
# out = layers.Dense(28,activation=keras.layers.LeakyReLU(alpha=0.01))(out)
# out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
outputs = layers.Dense(self._action_size, activation="tanh", kernel_initializer=last_init)(out)
def antirectifier(x):
outputs = self.action_scale*x + self.action_bias
return outputs
outputs = layers.Lambda(antirectifier )(outputs)
model = tf.keras.Model(inputs, outputs)
return model
def get_critic(self):
# State as input
state_input = layers.Input(shape=(self._state_size))
# state_out = layers.Dense(28, activation="relu")(state_input)
# Action as input
action_input = layers.Input(shape=(self._action_size))
# action_out = layers.Dense(16, activation="relu")(action_input)
# Both are passed through seperate layer before concatenating
concat = layers.Concatenate()([state_input, action_input])
out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(concat)
# out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
# out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
outputs = layers.Dense(1)(out)
# Outputs single value for give state-action
model = tf.keras.Model([state_input, action_input], outputs)
return model
def learn(self):
# Get sampling range
record_range = min(self.buffer.buffer_counter, self.buffer.buffer_capacity)
# Randomly sample indices
batch_indices = np.random.choice(record_range, self.buffer.batch_size)
# print(self.buffer.action_buffer[batch_indices].shape)
# Convert to tensors
state_batch = tf.convert_to_tensor(self.buffer.state_buffer[batch_indices])
action_batch = tf.convert_to_tensor(self.buffer.action_buffer[batch_indices])
reward_batch = tf.convert_to_tensor(self.buffer.reward_buffer[batch_indices])
reward_batch = tf.cast(reward_batch, dtype=tf.float32)
next_state_batch = tf.convert_to_tensor(self.buffer.next_state_buffer[batch_indices])
return self.update(state_batch, action_batch, reward_batch, next_state_batch)
def update(self, state_batch, action_batch, reward_batch, next_state_batch):
with tf.GradientTape() as tape:
target_actions_new = self.target_actor(next_state_batch)
y = reward_batch + self.gamma * self.target_critic([next_state_batch,target_actions_new])
q = self.critic_model([state_batch,action_batch])
critic_loss = tf.math.reduce_mean(tf.math.square(y - q))
critic_grad = tape.gradient(critic_loss, self.critic_model.trainable_variables)
self.critic_optimizer.apply_gradients( zip(critic_grad, self.critic_model.trainable_variables))
with tf.GradientTape() as tape:
actions = self.actor_model(state_batch)
critic_value = self.critic_model([state_batch , actions])
actor_loss = -tf.math.reduce_mean(critic_value)
actor_grad = tape.gradient(actor_loss , self.actor_model.trainable_variables)
self.actor_optimizer.apply_gradients( zip(actor_grad, self.actor_model.trainable_variables))
self.update_target(self.target_actor.variables , self.actor_model.variables)
self.update_target(self.target_critic.variables , self.critic_model.variables)
return actor_loss,critic_loss
def update_target(self,target_weights, weights):
for (a, b) in zip(target_weights, weights):
a.assign(b * self.tau + a * (1 - self.tau))
def policy(self,state):
sampled_actions = tf.squeeze(self.actor_model(state))
noise = self.ou_noise()
# Adding noise to action
sampled_actions = sampled_actions.numpy() + noise
# We make sure action is within bounds
legal_action = np.clip(sampled_actions, self.lower_bound, self.upper_bound)
return [np.squeeze(legal_action)]
The following training curve is generated using the same Tensorflow + Keras script written in Python:
RED line uses five features.
GREEN line uses seven features.
BLUE line uses nine features.
Can anyone tell me the probable cause of the oscillation of the GREEN line so that I can troubleshoot my script?
Source code:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Use both gpus for training.
import sys, random
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
import numpy as np
from lxml import etree, objectify
# <editor-fold desc="GPU">
# resolve GPU related issues.
try:
physical_devices = tf.config.list_physical_devices('GPU')
for gpu_instance in physical_devices:
tf.config.experimental.set_memory_growth(gpu_instance, True)
except Exception as e:
pass
# END of try
# </editor-fold>
# <editor-fold desc="Lxml helper">
class LxmlHelper:
#classmethod
def objectify_xml(cls, input_path_dir):
file_dom = etree.parse(input_path_dir) # parse xml and convert it into DOM
file_xml_bin = etree.tostring(file_dom, pretty_print=False, encoding="ascii") # encode DOM into ASCII object
file_xml_text = file_xml_bin.decode() # convert binary ASCII object into ASCII text
objectified_xml = objectify.fromstring(file_xml_text) # convert text into a Doxygen object
return objectified_xml
# </editor-fold>
# <editor-fold desc="def encode(letter)">
def encode(letter: str):
if letter == 'H':
return [1.0, 0.0, 0.0]
elif letter == 'E':
return [0.0, 1.0, 0.0]
elif letter == 'C':
return [0.0, 0.0, 1.0]
elif letter == '-':
return [0.0, 0.0, 0.0]
# END of function
def encode_string_1(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
try:
one_hot_binary_str = one_hot_binary_str + encode(ch)
except Exception as e:
print(pattern_str, one_hot_binary_str, ch)
# END of for loop
return one_hot_binary_str
# END of function
def encode_string_2(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
temp_encoded_vect = [encode(ch)]
one_hot_binary_str = one_hot_binary_str + temp_encoded_vect
# END of for loop
return one_hot_binary_str
# END of function
# </editor-fold>
# <editor-fold desc="def load_data()">
def load_data_k(fname: str, class_index: int, feature_start_index: int, **selection):
"""Loads data for training and validation
:param fname: (``string``) - name of the file with the data
:param selection: (``kwargs``) - see below
:return: four tensorflow tensors: training input, training output, validation input and validation output
:Keyword Arguments:
* *top_n_lines* (``number``) --
take top N lines of the input and disregard the rest
* *random_n_lines* (``number``) --
take random N lines of the input and disregard the rest
* *validation_part* (``float``) --
separate N_lines * given_fraction of the input lines from the training set and use
them for validation. When the given_fraction = 1.0, then the same input set of
N_lines is used both for training and validation (this is the default)
"""
i = 0
file = open(fname)
if "top_n_lines" in selection:
lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
elif "random_n_lines" in selection:
tmp_lines = file.readlines()
lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
else:
lines = file.readlines()
data_x, data_y, data_z = [], [], []
for l in lines:
row = l.strip().split() # return a list of words from the line.
x = [float(ix) for ix in row[feature_start_index:]] # convert 3rd to 20th word into a vector of float numbers.
y = encode(row[class_index]) # convert the 3rd word into binary.
z = encode_string_1(row[class_index+1])
data_x.append(x) # append the vector into 'data_x'
data_y.append(y) # append the vector into 'data_y'
data_z.append(z) # append the vector into 'data_z'
# END for l in lines
num_rows = len(data_x)
given_fraction = selection.get("validation_part", 1.0)
if given_fraction > 0.9999:
valid_x, valid_y, valid_z = data_x, data_y, data_z
else:
n = int(num_rows * given_fraction)
data_x, data_y, data_z = data_x[n:], data_y[n:], data_z[n:]
valid_x, valid_y, valid_z = data_x[:n], data_y[:n], data_z[:n]
# END of if-else block
tx = tf.convert_to_tensor(data_x, np.float32)
ty = tf.convert_to_tensor(data_y, np.float32)
tz = tf.convert_to_tensor(data_z, np.float32)
vx = tf.convert_to_tensor(valid_x, np.float32)
vy = tf.convert_to_tensor(valid_y, np.float32)
vz = tf.convert_to_tensor(valid_z, np.float32)
return tx, ty, tz, vx, vy, vz
# END of the function
# </editor-fold>
# <editor-fold desc="def create_model()">
def create_model(n_hidden_1, n_hidden_2, num_classes, num_features):
# create the model
model = Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=(num_features,)))
model.add(tf.keras.layers.Dense(n_hidden_1, activation='sigmoid'))
model.add(tf.keras.layers.Dense(n_hidden_2, activation='sigmoid'))
###model.add(tf.keras.layers.Dense(n_hidden_3, activation='sigmoid'))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
# instantiate the optimizer
opt = keras.optimizers.SGD(learning_rate=LEARNING_RATE)
# compile the model
model.compile(
optimizer=opt,
loss="categorical_crossentropy",
metrics="categorical_accuracy"
)
# return model
return model
# </editor-fold>
if __name__ == "__main__":
# <editor-fold desc="(input/output parameters)">
my_project_routine = LxmlHelper.objectify_xml("my_project_evaluate.xml")
# input data
INPUT_DATA_FILE = str(my_project_routine.input.input_data_file)
INPUT_PATH = str(my_project_routine.input.input_path)
CLASS_INDEX = int(my_project_routine.input.class_index)
FEATURE_INDEX = int(my_project_routine.input.feature_index)
# output data
OUTPUT_PATH = str(my_project_routine.output.output_path)
MODEL_FILE = str(my_project_routine.output.model_file)
TRAINING_PROGRESS_FILE = str(my_project_routine.output.training_progress_file)
# Learning parameters
LEARNING_RATE = float(my_project_routine.training_params.learning_rate)
EPOCH_SIZE = int(my_project_routine.training_params.epoch_size)
BATCH_SIZE = int(my_project_routine.training_params.batch_size)
INPUT_LINES_COUNT = int(my_project_routine.input.input_lines_count)
VALIDATION_PART = float(my_project_routine.training_params.validation_part)
SAVE_PERIOD = str(my_project_routine.output.save_period)
# NN parameters
HIDDEN_LAYER_1_NEURON_COUNT = int(my_project_routine.hidden_layers.one)
HIDDEN_LAYER_2_NEURON_COUNT = int(my_project_routine.hidden_layers.two)
###HIDDEN_LAYER_3_NEURON_COUNT = int(my_project_routine.hidden_layers.three)
CLASS_COUNT = int(my_project_routine.class_count)
FEATURES_COUNT = int(my_project_routine.features_count)
input_file_path_str = os.path.join(INPUT_PATH, INPUT_DATA_FILE)
training_progress_file_path_str = os.path.join(OUTPUT_PATH, TRAINING_PROGRESS_FILE)
model_file_path = os.path.join(OUTPUT_PATH, MODEL_FILE)
# command-line arg processing
input_file_name_str = None
if len(sys.argv) > 1:
input_file_name_str = sys.argv[1]
else:
input_file_name_str = input_file_path_str
# END of if-else
# </editor-fold>
# <editor-fold desc="(load data from file)">
# load training data from the disk
train_x, train_y, _, validate_x, validate_y, _ = \
load_data_k(
fname=input_file_name_str,
class_index=CLASS_INDEX,
feature_start_index=FEATURE_INDEX,
random_n_lines=INPUT_LINES_COUNT,
validation_part=VALIDATION_PART
)
print("training data size : ", len(train_x))
print("validation data size : ", len(validate_x))
# </editor-fold>
### STEPS_PER_EPOCH = len(train_x) // BATCH_SIZE
### VALIDATION_STEPS = len(validate_x) // BATCH_SIZE
# <editor-fold desc="(model creation)">
# load previously saved NN model
model = None
try:
model = keras.models.load_model(model_file_path)
print("Loading NN model from file.")
model.summary()
except Exception as ex:
print("No NN model found for loading.")
# END of try-except
# </editor-fold>
# <editor-fold desc="(model run)">
# # if there is no model loaded, create a new model
if model is None:
csv_logger = keras.callbacks.CSVLogger(training_progress_file_path_str)
checkpoint = ModelCheckpoint(
model_file_path,
monitor='loss',
verbose=1,
save_best_only=True,
mode='auto',
save_freq='epoch'
)
callbacks_vector = [
csv_logger,
checkpoint
]
# Set mirror strategy
#strategy = tf.distribute.MirroredStrategy(devices=["/device:GPU:0","/device:GPU:1"])
#with strategy.scope():
print("New NN model created.")
# create sequential NN model
model = create_model(
n_hidden_1=HIDDEN_LAYER_1_NEURON_COUNT,
n_hidden_2=HIDDEN_LAYER_2_NEURON_COUNT,
##n_hidden_3=HIDDEN_LAYER_3_NEURON_COUNT,
num_classes=CLASS_COUNT,
num_features=FEATURES_COUNT
)
# Train the model with the new callback
history = model.fit(
train_x, train_y,
validation_data=(validate_x, validate_y),
batch_size=BATCH_SIZE,
epochs=EPOCH_SIZE,
callbacks=[callbacks_vector],
shuffle=True,
verbose=2
)
print(history.history.keys())
# END of ... with
# END of ... if
# </editor-fold>
Plotting Script
import os
from argparse import ArgumentParser
import random
from typing import List
import matplotlib.pyplot as plt
import numpy as np
import math
import sys
import datetime
class Quad:
def __init__(self, x_vector, y_vector, color_char, label_str):
self.__x_vector = x_vector
self.__y_vector = y_vector
self.__color_char = color_char
self.__label_str = label_str
def get_x_vector(self):
return self.__x_vector
def get_y_vector(self):
return self.__y_vector
def get_color_char(self):
return self.__color_char
def get_label_str(self):
return self.__label_str
class HecaPlotClass:
def __init__(self):
self.__x_label_str: str = None
self.__y_label_str: str = None
self.__title_str: str = None
self.__trio_vector: List[Quad] = []
self.__plotter = plt
#property
def x_label_str(self):
return self.__x_label_str
#x_label_str.setter
def x_label_str(self, t):
self.__x_label_str = t
#property
def y_label_str(self):
return self.__y_label_str
#y_label_str.setter
def y_label_str(self, t):
self.__y_label_str = t
#property
def title_str(self):
return self.__title_str
#title_str.setter
def title_str(self, t):
self.__title_str = t
def add_y_axes(self, trio_obj: Quad):
self.__trio_vector.append(trio_obj)
def generate_plot(self):
for obj in self.__trio_vector:
x_vector = obj.get_x_vector()
y_vector = obj.get_y_vector()
label_str = obj.get_label_str()
# print(label_str)
# print(len(x_vector))
# print(len(y_vector))
self.__plotter.plot(
x_vector,
y_vector,
color=obj.get_color_char(),
label=label_str
)
# END of ... for loop
# Naming the x-axis, y_1_vector-axis and the whole graph
self.__plotter.xlabel(self.__x_label_str)
self.__plotter.ylabel(self.__y_label_str)
self.__plotter.title(self.__title_str)
# Adding legend, which helps us recognize the curve according to it's color
self.__plotter.legend()
# To load the display window
#self.__plotter.show()
def save_png(self, output_directory_str):
output_file_str = os.path.join(output_directory_str, self.__title_str + '.png')
self.__plotter.savefig(output_file_str)
def save_pdf(self, output_directory_str):
output_file_str = os.path.join(output_directory_str, self.__title_str + '.pdf')
self.__plotter.savefig(output_file_str)
class MainClass(object):
__colors_vector = ['red', 'green', 'blue', 'cyan', 'magenta', 'yellow', 'orange', 'lightgreen', 'crimson']
__working_dir = r"."
__file_names_vector = ["training_progress-32.txt", "training_progress-64.txt", "training_progress-128.txt"]
__input_files_vector = []
__output_directory = None
__column_no_int = 0
__split_percentage_at_tail_int = 100
__is_pdf_output = False
__is_png_output = False
# <editor-fold desc="def load_data()">
#classmethod
def __load_data(cls, fname: str, percetage_int:int, column_no_int:int):
np_array = np.loadtxt(
fname,
# usecols=range(1,11),
dtype=np.float32,
skiprows=1,
delimiter=","
)
size_vector = np_array.shape
array_len_int = size_vector[0]
rows_count_int = int(percetage_int * array_len_int / 100)
np_array = np_array[-rows_count_int:]
x = np_array[:, 0]
y = np_array[:, column_no_int]
return x, y
# END of the function
# </editor-fold>
# <editor-fold desc="(__parse_args())">
#classmethod
def __parse_args(cls):
# initialize argument parser
my_parser = ArgumentParser()
my_parser.add_argument("-c", help="column no.", type=int)
my_parser.add_argument('-i', nargs='+', help='a list of input files', required=True)
my_parser.add_argument("-o", help="output directory", type=str)
my_parser.add_argument("-n", help="percentage of data to split from tail", type=float)
my_parser.add_argument("--pdf", help="PDF output", action='store_true')
my_parser.add_argument("--png", help="PNG output", action='store_true')
# parse the argument
args = my_parser.parse_args()
cls.__input_files_vector = args.i
cls.__output_directory = args.o
cls.__split_percentage_at_tail_int = args.n
cls.__column_no_int = args.c
cls.__is_pdf_output = args.pdf
cls.__is_png_output = args.png
# </editor-fold>
#classmethod
def main(cls):
cls.__parse_args()
if cls.__input_files_vector is None:
cls.__input_files_vector = cls.__file_names_vector
if cls.__output_directory is None:
cls.__output_directory = cls.__working_dir
if cls.__split_percentage_at_tail_int is None:
cls.__split_percentage_at_tail_int = 100
if cls.__column_no_int is None:
cls.__column_no_int = 1
my_project_plot_obj = HecaPlotClass()
i = 0
for file_path_str in cls.__input_files_vector:
print(file_path_str)
x_vector, y_vector = cls.__load_data(os.path.join(cls.__working_dir, file_path_str), cls.__split_percentage_at_tail_int, cls.__column_no_int)
my_project_plot_obj.x_label_str = "Epoch"
my_project_plot_obj.y_label_str = "Accuracy"
my_project_plot_obj.title_str = "training_plot-{date:%Y-%m-%d_%H:%M:%S}".format(date=datetime.datetime.now())
my_project_plot_obj.x_axis_vector = x_vector
if i == 0:
random_int = 0
else:
random_int = i % (len(cls.__colors_vector)-1)
# END of ... if
print("random_int : ", random_int)
my_project_plot_obj.add_y_axes(Quad(x_vector, y_vector, cls.__colors_vector[random_int], file_path_str))
i = i + 1
# END of ... for loop
my_project_plot_obj.generate_plot()
my_project_plot_obj.save_png(cls.__output_directory)
my_project_plot_obj.save_pdf(cls.__output_directory)
if __name__ == "__main__":
MainClass.main()
The primary reason could be improper (non-random ~ ordered) distribution of data.
If you notice the accuracy beyond epoch 180, there is a orderly switching between the accuracy between ~0.43 (approx.) and ~0.33 (~approx.), and occasionally ~0.23 (approx.). The more important thing to notice is that the accuracy is decreasing (there's no improvement in validation accuracy) as we increase the epochs.
The accuracy can increase in such cases if you (1) reduce batch size, or (2) use a better optimizer like Adam. And check the learning rate.
These changes can help the shift and oscillation, as well.
Additionally, Running average of the accuracy can be plotted to avoid the oscillation. This is again a mitigation scheme rather than a correction scheme. But, what it does is removes the order (partition of the data) and mixes the nearby data.
Lastly, I would also reshuffle the data and normalize after each layer. See if that helps.
Generally, sharp jumps and flat lines in the accuracy usually mean that a group of examples is classified as a given class at a same time. If your dataset contains, say, 50 examples with the same combination of 7 features then they would go into the same class at the same time. This is what probably causes sharp jumps - identical or similar examples clustered together.
So for example, if you have 50 men aged 64, and a decision boundary to classify them as more prone to an illness shifts from >65 to >63, then accuracy changes rapidly as all of them change classification at the same time.
Regarding the oscillation of the curve - due to the fact above, oscillation will be amplified by small changes in learning. Your network learns based on cross entropy, which means that it minimizes the difference between target and your predictions. This means that it operates on the difference between probability and target (say, 0.3 vs class 0) instead of class and target like accuracy (so, 0 vs 0) in the same example. Cross entropy is much more smooth as it is not affected by the issue outlined above.
I am trying to get the gradient with respect to a specific images (sitting in adv_loader). These images are loaded in adv_loader. I tried just taking the code that was calculating the gradient in the backprop step but Yolo3 doesn’t seem to have a grad attribute. Any ideas on how to get that?
import mxnet as mx
import gluoncv as gcv
import numpy as np
import matplotlib.pyplot as plt
from mxnet import gluon
from gluoncv.loss import YOLOV3Loss
import time
ctx = [mx.gpu(i) for i in range(mx.context.num_gpus())] if mx.context.num_gpus()>0 else [mx.cpu()]
#Datasets
train = gcv.data.RecordFileDetection('./data/train.rec',coord_normalized=False)
val = gcv.data.RecordFileDetection('./data/val.rec',coord_normalized=False)
classes = ['Passenger Vehicle','Small Car','Bus']
net = gcv.model_zoo.yolo3_mobilenet0_25_custom(pretrained_base=False,classes=classes,ctx=ctx)
net.collect_params().initialize(force_reinit=True,ctx=ctx)
import multiprocessing as mp
### The following looks nasty, and I apologize for the difficulty but basically what we are doing is
# transforming our data so it is in the correct format for yolo3
batch_size = 32 #This can be changed, but it determines how often we update our model.
num_workers = mp.cpu_count()//2
### We will import the following to reduce what i need to type (I am not sure about you but I like being lazy)
from mxnet import autograd
### 416 is our width/height we want the network to train on
sizes = 328
train_transform = gcv.data.transforms.presets.yolo.YOLO3DefaultTrainTransform(sizes,sizes, net)
# return stacked images, center_targets, scale_targets, gradient weights, objectness_targets, class_targets
# additionally, return padded ground truth bboxes, so there are 7 components returned by dataloader
batchify_fn = gcv.data.batchify.Tuple(*([gcv.data.batchify.Stack() for _ in range(6)] + [gcv.data.batchify.Pad(axis=0, pad_val=-1) for _ in range(1)]))
train_loader = mx.gluon.data.DataLoader(train.transform(train_transform), batch_size, shuffle=True,
batchify_fn=batchify_fn, last_batch='rollover',num_workers=num_workers,prefetch=num_workers + num_workers//2)
val_batchify_fn = gcv.data.batchify.Tuple(gcv.data.batchify.Stack(), gcv.data.batchify.Pad(pad_val=-1))
val_transform = gcv.data.transforms.presets.yolo.YOLO3DefaultValTransform(sizes,sizes)
val_loader = mx.gluon.data.DataLoader(
val.transform(val_transform),
batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep',num_workers=num_workers,prefetch=num_workers + num_workers//2)
adv_loader = mx.gluon.data.DataLoader(train.transform(train_transform), batch_size, shuffle=True,
batchify_fn=batchify_fn, last_batch='rollover',num_workers=num_workers,prefetch=num_workers + num_workers//2) #this is a placeholder for data that will generate advarsaial examples
#How we will validate our model
def validate(net, val_data, ctx, eval_metric):
"""Test on validation dataset."""
eval_metric.reset()
# set nms threshold and topk constraint
net.set_nms(nms_thresh=0.45, nms_topk=455)
mx.nd.waitall()
net.hybridize()
for batch in val_data:
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
label = mx.gluon.utils.split_and_load(batch[1], ctx_list=ctx,batch_axis=0, even_split=False)
det_bboxes = []
det_ids = []
det_scores = []
gt_bboxes = []
gt_ids = []
gt_difficults = []
for x, y in zip(data, label):
# get prediction results
ids, scores, bboxes = net(x)
det_ids.append(ids)
det_scores.append(scores)
# clip to image size
det_bboxes.append(bboxes.clip(0, batch[0].shape[2]))
# split ground truths
gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))
gt_difficults.append(y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None)
# update metric
eval_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficults)
return eval_metric.get()
eval_metric = gcv.utils.metrics.voc_detection.VOCMApMetric(class_names=classes)
nepochs=11
net.initialize(force_reinit=True)
net.collect_params().reset_ctx(ctx)
#Grab a trainer or optimizer to perform the optimization
trainer = mx.gluon.Trainer(net.collect_params(),'adam',
{'learning_rate':0.001},
kvstore='device')
for i in range(nepochs):
now = time.time()
mx.nd.waitall()
net.hybridize(static_alloc=True,static_shape=True)
for ixl,batch in enumerate(train_loader):
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
# objectness, center_targets, scale_targets, weights, class_targets
fixed_targets = [mx.gluon.utils.split_and_load(batch[it], ctx_list=ctx, batch_axis=0) for it in range(1, 6)]
gt_boxes = mx.gluon.utils.split_and_load(batch[6], ctx_list=ctx, batch_axis=0)
sum_losses = []
with autograd.record():
for ix, x in enumerate(data):
obj_loss, center_loss, scale_loss, cls_loss = net(x, gt_boxes[ix], *[ft[ix] for ft in fixed_targets])
sum_losses.append(obj_loss + center_loss + scale_loss + cls_loss)
autograd.backward(sum_losses)
trainer.step(batch_size)
for ixl,batch in enumerate(adv_loader):
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
# objectness, center_targets, scale_targets, weights, class_targets
fixed_targets = [mx.gluon.utils.split_and_load(batch[it], ctx_list=ctx, batch_axis=0) for it in range(1, 6)]
gt_boxes = mx.gluon.utils.split_and_load(batch[6], ctx_list=ctx, batch_axis=0)
sum_losses = []
with autograd.record():
for ix, x in enumerate(data):
obj_loss, center_loss, scale_loss, cls_loss = net(x, gt_boxes[ix], *[ft[ix] for ft in fixed_targets])
sum_losses.append(obj_loss + center_loss + scale_loss + cls_loss)
autograd.backward(sum_losses,retain_graph=True)
print(net.grad.asnumpy())
I am getting told here that AttributeError: 'YOLOV3' object has no attribute 'grad'. I am using this documentation https://mxnet.apache.org/versions/1.6/api/python/docs/tutorials/packages/autograd/index.html
Any idea how to get an image and pass it through to the gradient of the yolo loss function?
I am using the following script predictor.py in order to get predictions from a Keras model hosted in GCP AI Platform.
import os
import pickle
import tensorflow as tf
import numpy as np
import logging
class MyPredictor(object):
def __init__(self, model, bow_model):
self._model = model
self._bow_model = bow_model
def predict(self, instances, **kwargs):
vectors = self.embedding([instances])
vectors = vectors.tolist()
output = self._model.predict(vectors)
return output
def embedding(self, statement):
vector = self._bow_model.transform(statement).toarray()
#vector = vector.to_list()
return vector
#classmethod
def from_path(cls, model_dir):
model_path = os.path.join(model_dir, 'model.h5')
model = tf.keras.models.load_model(model_path, compile = False)
preprocessor_path = os.path.join(model_dir, 'bow.pkl')
with open(preprocessor_path, 'rb') as f:
bow_model = pickle.load(f)
return cls(model, bow_model)
However i get
Prediction failed: Error when checking input: expected dense_input to have shape (2898,) but got array with shape (1,)
The problem seems to be due to the dimensions of my input data when trying to do the actual predictions, in line output = self._model.predict([vectors]). The model is expecting a vector of shape (2898, )
I am finding this quite odd... since when I print the shape and dimensions of the vector I get the following
This is the shape
(1, 2898)
This is the dim number
2
This is the vector
[[0 0 0 ... 0 0 0]]
So the dimensions and the shape is fine and it should really be working....
Furthermore, I did a test to get the predictions of the model stored locally and it works fine. This is the test file:
import os
import pickle
import tensorflow as tf
import numpy as np
class MyPredictor(object):
def __init__(self, model, bow_model):
self._model = model
self._bow_model = bow_model
def predict(self, instances, **kwargs):
print("These are the instances ", instances)
vector = self.embedding([instances])
output = self._model.predict(vector)
return output
def embedding(self, statement):
vector = self._bow_model.transform(statement).toarray()
#vector = vector.to_list()
return vector
model_path = 'model.h5'
model = tf.keras.models.load_model(model_path, compile = False)
preprocessor_path = 'bow.pkl'
with open(preprocessor_path, 'rb') as f:
bow_model = pickle.load(f)
instances = 'test'
predictor = MyPredictor(model, bow_model)
outputs = predictor.predict(instances)
print(outputs)
Solved it!
It was as silly as adding a set of parenthesis to this line output = self._model.predict([vectors])
After that I got another error regarding the output of the prediction not being json serializable. This I solved simply by adding .tolist() to the return return output.to_list()
import os
import pickle
import tensorflow as tf
import numpy as np
import logging
class MyPredictor(object):
def __init__(self, model, bow_model):
self._model = model
self._bow_model = bow_model
def predict(self, instances, **kwargs):
vectors = self.embedding([instances])
vectors = vectors.tolist()
output = self._model.predict([vectors])
return output.to_list()
def embedding(self, statement):
vector = self._bow_model.transform(statement).toarray()
#vector = vector.to_list()
return vector
#classmethod
def from_path(cls, model_dir):
model_path = os.path.join(model_dir, 'model.h5')
model = tf.keras.models.load_model(model_path, compile = False)
preprocessor_path = os.path.join(model_dir, 'bow.pkl')
with open(preprocessor_path, 'rb') as f:
bow_model = pickle.load(f)
return cls(model, bow_model)
When i calculate inception score, i got NaN most of the time.
Trying to investigate why it happen i found that running the network twice on the same images can lead for some of the images to totally different results (difference greater than 0.9 while the maximum difference can be 1), the images which got high difference changed from run to run.
My GPU is 2080ti, i use Ubuntu with tensorflow=1.13.1.
i try to change drivers, tensorflow version, run form docker, the same problem happen all the time.
I have another server at the university which has the same GPU (2080ti), and when i try to run there the problem disappear.
Thanks for the help.
my script
# Code derived from tensorflow/tensorflow/models/image/imagenet/classify_image.py
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import tarfile
import numpy as np
from six.moves import urllib
import tensorflow as tf
import sys
import warnings
from scipy import linalg
MODEL_DIR = '/tmp/imagenet'
DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
softmax = None
pool3 = None
# Call this function with list of images. Each of elements should be a
# numpy array with values ranging from 0 to 255.
def get_features(images):
assert ((images.shape[3]) == 3)
assert (np.max(images) > 10)
assert (np.min(images) >= 0.0)
images = images.astype(np.float32)
bs = 100
sess = tf.get_default_session()
preds = []
for inp in np.array_split(images, round(images.shape[0] / bs)):
sys.stdout.write(".")
sys.stdout.flush()
pred = sess.run(softmax, {'InputTensor:0': inp})
preds.append(pred)
preds = np.concatenate(preds, 0)
return preds
# This function is called automatically.
def _init_inception():
global softmax
global pool3
if not os.path.exists(MODEL_DIR):
os.makedirs(MODEL_DIR)
filename = DATA_URL.split('/')[-1]
filepath = os.path.join(MODEL_DIR, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (
filename, float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
tarfile.open(filepath, 'r:gz').extractall(MODEL_DIR)
with tf.gfile.GFile(os.path.join(
MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
# Import model with a modification in the input tensor to accept arbitrary
# batch size.
input_tensor = tf.placeholder(tf.float32, shape=[None, None, None, 3],
name='InputTensor')
_ = tf.import_graph_def(graph_def, name='inception_v3',
input_map={'ExpandDims:0': input_tensor})
# Works with an arbitrary minibatch size.
pool3 = tf.get_default_graph().get_tensor_by_name('inception_v3/pool_3:0')
ops = pool3.graph.get_operations()
for op_idx, op in enumerate(ops):
if 'inception_v3' in op.name:
for o in op.outputs:
shape = o.get_shape()
shape = [s.value for s in shape]
new_shape = []
for j, s in enumerate(shape):
if s == 1 and j == 0:
new_shape.append(None)
else:
new_shape.append(s)
o.set_shape(tf.TensorShape(new_shape))
w = tf.get_default_graph().get_operation_by_name("inception_v3/softmax/logits/MatMul").inputs[1]
logits = tf.matmul(tf.squeeze(pool3, [1, 2]), w)
softmax = tf.nn.softmax(logits)
_init_inception()
if __name__ =='__main__':
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
with tf.Session() as sess:
preds1 = get_features(x_train)
preds2 = get_features(x_train)
print(abs(preds1-preds2).max())