Why does an ANN validation accuracy oscillate? - python

The following training curve is generated using the same Tensorflow + Keras script written in Python:
RED line uses five features.
GREEN line uses seven features.
BLUE line uses nine features.
Can anyone tell me the probable cause of the oscillation of the GREEN line so that I can troubleshoot my script?
Source code:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0" # Use both gpus for training.
import sys, random
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
import numpy as np
from lxml import etree, objectify
# <editor-fold desc="GPU">
# resolve GPU related issues.
try:
physical_devices = tf.config.list_physical_devices('GPU')
for gpu_instance in physical_devices:
tf.config.experimental.set_memory_growth(gpu_instance, True)
except Exception as e:
pass
# END of try
# </editor-fold>
# <editor-fold desc="Lxml helper">
class LxmlHelper:
#classmethod
def objectify_xml(cls, input_path_dir):
file_dom = etree.parse(input_path_dir) # parse xml and convert it into DOM
file_xml_bin = etree.tostring(file_dom, pretty_print=False, encoding="ascii") # encode DOM into ASCII object
file_xml_text = file_xml_bin.decode() # convert binary ASCII object into ASCII text
objectified_xml = objectify.fromstring(file_xml_text) # convert text into a Doxygen object
return objectified_xml
# </editor-fold>
# <editor-fold desc="def encode(letter)">
def encode(letter: str):
if letter == 'H':
return [1.0, 0.0, 0.0]
elif letter == 'E':
return [0.0, 1.0, 0.0]
elif letter == 'C':
return [0.0, 0.0, 1.0]
elif letter == '-':
return [0.0, 0.0, 0.0]
# END of function
def encode_string_1(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
try:
one_hot_binary_str = one_hot_binary_str + encode(ch)
except Exception as e:
print(pattern_str, one_hot_binary_str, ch)
# END of for loop
return one_hot_binary_str
# END of function
def encode_string_2(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
temp_encoded_vect = [encode(ch)]
one_hot_binary_str = one_hot_binary_str + temp_encoded_vect
# END of for loop
return one_hot_binary_str
# END of function
# </editor-fold>
# <editor-fold desc="def load_data()">
def load_data_k(fname: str, class_index: int, feature_start_index: int, **selection):
"""Loads data for training and validation
:param fname: (``string``) - name of the file with the data
:param selection: (``kwargs``) - see below
:return: four tensorflow tensors: training input, training output, validation input and validation output
:Keyword Arguments:
* *top_n_lines* (``number``) --
take top N lines of the input and disregard the rest
* *random_n_lines* (``number``) --
take random N lines of the input and disregard the rest
* *validation_part* (``float``) --
separate N_lines * given_fraction of the input lines from the training set and use
them for validation. When the given_fraction = 1.0, then the same input set of
N_lines is used both for training and validation (this is the default)
"""
i = 0
file = open(fname)
if "top_n_lines" in selection:
lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
elif "random_n_lines" in selection:
tmp_lines = file.readlines()
lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
else:
lines = file.readlines()
data_x, data_y, data_z = [], [], []
for l in lines:
row = l.strip().split() # return a list of words from the line.
x = [float(ix) for ix in row[feature_start_index:]] # convert 3rd to 20th word into a vector of float numbers.
y = encode(row[class_index]) # convert the 3rd word into binary.
z = encode_string_1(row[class_index+1])
data_x.append(x) # append the vector into 'data_x'
data_y.append(y) # append the vector into 'data_y'
data_z.append(z) # append the vector into 'data_z'
# END for l in lines
num_rows = len(data_x)
given_fraction = selection.get("validation_part", 1.0)
if given_fraction > 0.9999:
valid_x, valid_y, valid_z = data_x, data_y, data_z
else:
n = int(num_rows * given_fraction)
data_x, data_y, data_z = data_x[n:], data_y[n:], data_z[n:]
valid_x, valid_y, valid_z = data_x[:n], data_y[:n], data_z[:n]
# END of if-else block
tx = tf.convert_to_tensor(data_x, np.float32)
ty = tf.convert_to_tensor(data_y, np.float32)
tz = tf.convert_to_tensor(data_z, np.float32)
vx = tf.convert_to_tensor(valid_x, np.float32)
vy = tf.convert_to_tensor(valid_y, np.float32)
vz = tf.convert_to_tensor(valid_z, np.float32)
return tx, ty, tz, vx, vy, vz
# END of the function
# </editor-fold>
# <editor-fold desc="def create_model()">
def create_model(n_hidden_1, n_hidden_2, num_classes, num_features):
# create the model
model = Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=(num_features,)))
model.add(tf.keras.layers.Dense(n_hidden_1, activation='sigmoid'))
model.add(tf.keras.layers.Dense(n_hidden_2, activation='sigmoid'))
###model.add(tf.keras.layers.Dense(n_hidden_3, activation='sigmoid'))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
# instantiate the optimizer
opt = keras.optimizers.SGD(learning_rate=LEARNING_RATE)
# compile the model
model.compile(
optimizer=opt,
loss="categorical_crossentropy",
metrics="categorical_accuracy"
)
# return model
return model
# </editor-fold>
if __name__ == "__main__":
# <editor-fold desc="(input/output parameters)">
my_project_routine = LxmlHelper.objectify_xml("my_project_evaluate.xml")
# input data
INPUT_DATA_FILE = str(my_project_routine.input.input_data_file)
INPUT_PATH = str(my_project_routine.input.input_path)
CLASS_INDEX = int(my_project_routine.input.class_index)
FEATURE_INDEX = int(my_project_routine.input.feature_index)
# output data
OUTPUT_PATH = str(my_project_routine.output.output_path)
MODEL_FILE = str(my_project_routine.output.model_file)
TRAINING_PROGRESS_FILE = str(my_project_routine.output.training_progress_file)
# Learning parameters
LEARNING_RATE = float(my_project_routine.training_params.learning_rate)
EPOCH_SIZE = int(my_project_routine.training_params.epoch_size)
BATCH_SIZE = int(my_project_routine.training_params.batch_size)
INPUT_LINES_COUNT = int(my_project_routine.input.input_lines_count)
VALIDATION_PART = float(my_project_routine.training_params.validation_part)
SAVE_PERIOD = str(my_project_routine.output.save_period)
# NN parameters
HIDDEN_LAYER_1_NEURON_COUNT = int(my_project_routine.hidden_layers.one)
HIDDEN_LAYER_2_NEURON_COUNT = int(my_project_routine.hidden_layers.two)
###HIDDEN_LAYER_3_NEURON_COUNT = int(my_project_routine.hidden_layers.three)
CLASS_COUNT = int(my_project_routine.class_count)
FEATURES_COUNT = int(my_project_routine.features_count)
input_file_path_str = os.path.join(INPUT_PATH, INPUT_DATA_FILE)
training_progress_file_path_str = os.path.join(OUTPUT_PATH, TRAINING_PROGRESS_FILE)
model_file_path = os.path.join(OUTPUT_PATH, MODEL_FILE)
# command-line arg processing
input_file_name_str = None
if len(sys.argv) > 1:
input_file_name_str = sys.argv[1]
else:
input_file_name_str = input_file_path_str
# END of if-else
# </editor-fold>
# <editor-fold desc="(load data from file)">
# load training data from the disk
train_x, train_y, _, validate_x, validate_y, _ = \
load_data_k(
fname=input_file_name_str,
class_index=CLASS_INDEX,
feature_start_index=FEATURE_INDEX,
random_n_lines=INPUT_LINES_COUNT,
validation_part=VALIDATION_PART
)
print("training data size : ", len(train_x))
print("validation data size : ", len(validate_x))
# </editor-fold>
### STEPS_PER_EPOCH = len(train_x) // BATCH_SIZE
### VALIDATION_STEPS = len(validate_x) // BATCH_SIZE
# <editor-fold desc="(model creation)">
# load previously saved NN model
model = None
try:
model = keras.models.load_model(model_file_path)
print("Loading NN model from file.")
model.summary()
except Exception as ex:
print("No NN model found for loading.")
# END of try-except
# </editor-fold>
# <editor-fold desc="(model run)">
# # if there is no model loaded, create a new model
if model is None:
csv_logger = keras.callbacks.CSVLogger(training_progress_file_path_str)
checkpoint = ModelCheckpoint(
model_file_path,
monitor='loss',
verbose=1,
save_best_only=True,
mode='auto',
save_freq='epoch'
)
callbacks_vector = [
csv_logger,
checkpoint
]
# Set mirror strategy
#strategy = tf.distribute.MirroredStrategy(devices=["/device:GPU:0","/device:GPU:1"])
#with strategy.scope():
print("New NN model created.")
# create sequential NN model
model = create_model(
n_hidden_1=HIDDEN_LAYER_1_NEURON_COUNT,
n_hidden_2=HIDDEN_LAYER_2_NEURON_COUNT,
##n_hidden_3=HIDDEN_LAYER_3_NEURON_COUNT,
num_classes=CLASS_COUNT,
num_features=FEATURES_COUNT
)
# Train the model with the new callback
history = model.fit(
train_x, train_y,
validation_data=(validate_x, validate_y),
batch_size=BATCH_SIZE,
epochs=EPOCH_SIZE,
callbacks=[callbacks_vector],
shuffle=True,
verbose=2
)
print(history.history.keys())
# END of ... with
# END of ... if
# </editor-fold>
Plotting Script
import os
from argparse import ArgumentParser
import random
from typing import List
import matplotlib.pyplot as plt
import numpy as np
import math
import sys
import datetime
class Quad:
def __init__(self, x_vector, y_vector, color_char, label_str):
self.__x_vector = x_vector
self.__y_vector = y_vector
self.__color_char = color_char
self.__label_str = label_str
def get_x_vector(self):
return self.__x_vector
def get_y_vector(self):
return self.__y_vector
def get_color_char(self):
return self.__color_char
def get_label_str(self):
return self.__label_str
class HecaPlotClass:
def __init__(self):
self.__x_label_str: str = None
self.__y_label_str: str = None
self.__title_str: str = None
self.__trio_vector: List[Quad] = []
self.__plotter = plt
#property
def x_label_str(self):
return self.__x_label_str
#x_label_str.setter
def x_label_str(self, t):
self.__x_label_str = t
#property
def y_label_str(self):
return self.__y_label_str
#y_label_str.setter
def y_label_str(self, t):
self.__y_label_str = t
#property
def title_str(self):
return self.__title_str
#title_str.setter
def title_str(self, t):
self.__title_str = t
def add_y_axes(self, trio_obj: Quad):
self.__trio_vector.append(trio_obj)
def generate_plot(self):
for obj in self.__trio_vector:
x_vector = obj.get_x_vector()
y_vector = obj.get_y_vector()
label_str = obj.get_label_str()
# print(label_str)
# print(len(x_vector))
# print(len(y_vector))
self.__plotter.plot(
x_vector,
y_vector,
color=obj.get_color_char(),
label=label_str
)
# END of ... for loop
# Naming the x-axis, y_1_vector-axis and the whole graph
self.__plotter.xlabel(self.__x_label_str)
self.__plotter.ylabel(self.__y_label_str)
self.__plotter.title(self.__title_str)
# Adding legend, which helps us recognize the curve according to it's color
self.__plotter.legend()
# To load the display window
#self.__plotter.show()
def save_png(self, output_directory_str):
output_file_str = os.path.join(output_directory_str, self.__title_str + '.png')
self.__plotter.savefig(output_file_str)
def save_pdf(self, output_directory_str):
output_file_str = os.path.join(output_directory_str, self.__title_str + '.pdf')
self.__plotter.savefig(output_file_str)
class MainClass(object):
__colors_vector = ['red', 'green', 'blue', 'cyan', 'magenta', 'yellow', 'orange', 'lightgreen', 'crimson']
__working_dir = r"."
__file_names_vector = ["training_progress-32.txt", "training_progress-64.txt", "training_progress-128.txt"]
__input_files_vector = []
__output_directory = None
__column_no_int = 0
__split_percentage_at_tail_int = 100
__is_pdf_output = False
__is_png_output = False
# <editor-fold desc="def load_data()">
#classmethod
def __load_data(cls, fname: str, percetage_int:int, column_no_int:int):
np_array = np.loadtxt(
fname,
# usecols=range(1,11),
dtype=np.float32,
skiprows=1,
delimiter=","
)
size_vector = np_array.shape
array_len_int = size_vector[0]
rows_count_int = int(percetage_int * array_len_int / 100)
np_array = np_array[-rows_count_int:]
x = np_array[:, 0]
y = np_array[:, column_no_int]
return x, y
# END of the function
# </editor-fold>
# <editor-fold desc="(__parse_args())">
#classmethod
def __parse_args(cls):
# initialize argument parser
my_parser = ArgumentParser()
my_parser.add_argument("-c", help="column no.", type=int)
my_parser.add_argument('-i', nargs='+', help='a list of input files', required=True)
my_parser.add_argument("-o", help="output directory", type=str)
my_parser.add_argument("-n", help="percentage of data to split from tail", type=float)
my_parser.add_argument("--pdf", help="PDF output", action='store_true')
my_parser.add_argument("--png", help="PNG output", action='store_true')
# parse the argument
args = my_parser.parse_args()
cls.__input_files_vector = args.i
cls.__output_directory = args.o
cls.__split_percentage_at_tail_int = args.n
cls.__column_no_int = args.c
cls.__is_pdf_output = args.pdf
cls.__is_png_output = args.png
# </editor-fold>
#classmethod
def main(cls):
cls.__parse_args()
if cls.__input_files_vector is None:
cls.__input_files_vector = cls.__file_names_vector
if cls.__output_directory is None:
cls.__output_directory = cls.__working_dir
if cls.__split_percentage_at_tail_int is None:
cls.__split_percentage_at_tail_int = 100
if cls.__column_no_int is None:
cls.__column_no_int = 1
my_project_plot_obj = HecaPlotClass()
i = 0
for file_path_str in cls.__input_files_vector:
print(file_path_str)
x_vector, y_vector = cls.__load_data(os.path.join(cls.__working_dir, file_path_str), cls.__split_percentage_at_tail_int, cls.__column_no_int)
my_project_plot_obj.x_label_str = "Epoch"
my_project_plot_obj.y_label_str = "Accuracy"
my_project_plot_obj.title_str = "training_plot-{date:%Y-%m-%d_%H:%M:%S}".format(date=datetime.datetime.now())
my_project_plot_obj.x_axis_vector = x_vector
if i == 0:
random_int = 0
else:
random_int = i % (len(cls.__colors_vector)-1)
# END of ... if
print("random_int : ", random_int)
my_project_plot_obj.add_y_axes(Quad(x_vector, y_vector, cls.__colors_vector[random_int], file_path_str))
i = i + 1
# END of ... for loop
my_project_plot_obj.generate_plot()
my_project_plot_obj.save_png(cls.__output_directory)
my_project_plot_obj.save_pdf(cls.__output_directory)
if __name__ == "__main__":
MainClass.main()

The primary reason could be improper (non-random ~ ordered) distribution of data.
If you notice the accuracy beyond epoch 180, there is a orderly switching between the accuracy between ~0.43 (approx.) and ~0.33 (~approx.), and occasionally ~0.23 (approx.). The more important thing to notice is that the accuracy is decreasing (there's no improvement in validation accuracy) as we increase the epochs.
The accuracy can increase in such cases if you (1) reduce batch size, or (2) use a better optimizer like Adam. And check the learning rate.
These changes can help the shift and oscillation, as well.
Additionally, Running average of the accuracy can be plotted to avoid the oscillation. This is again a mitigation scheme rather than a correction scheme. But, what it does is removes the order (partition of the data) and mixes the nearby data.
Lastly, I would also reshuffle the data and normalize after each layer. See if that helps.

Generally, sharp jumps and flat lines in the accuracy usually mean that a group of examples is classified as a given class at a same time. If your dataset contains, say, 50 examples with the same combination of 7 features then they would go into the same class at the same time. This is what probably causes sharp jumps - identical or similar examples clustered together.
So for example, if you have 50 men aged 64, and a decision boundary to classify them as more prone to an illness shifts from >65 to >63, then accuracy changes rapidly as all of them change classification at the same time.
Regarding the oscillation of the curve - due to the fact above, oscillation will be amplified by small changes in learning. Your network learns based on cross entropy, which means that it minimizes the difference between target and your predictions. This means that it operates on the difference between probability and target (say, 0.3 vs class 0) instead of class and target like accuracy (so, 0 vs 0) in the same example. Cross entropy is much more smooth as it is not affected by the issue outlined above.

Related

DDPG always choosing the boundaries actions

Iam trying to implement DDPG algorithm that take a state of 8 values and output action of size=4.
The actions are lower bounded by [5,5,0,0] and upper bounded by [40,40,15,15].
When I train my DDPG it always choose one of the boundaries for example [5,40,0,15] or [40,40,0,0].
I implemented SAC algorithm after that and it works, knowing that I tried my DDPG agent on a gym game and it works. Maybe the problem is with upscaling the actions in the policy agent.
here have a look on the model I have
class Buffers:
def __init__(self, buffer_capacity=100000, batch_size=64):
# Number of "experiences" to store at max
self.buffer_capacity = buffer_capacity
num_states = 8
num_actions = 4
# Num of tuples to train on.
self.batch_size = batch_size
# Its tells us num of times record() was called.
self.buffer_counter = 0
# Instead of list of tuples as the exp.replay concept go
# We use different np.arrays for each tuple element
self.state_buffer = np.zeros((self.buffer_capacity, num_states))
self.action_buffer = np.zeros((self.buffer_capacity, num_actions))
self.reward_buffer = np.zeros((self.buffer_capacity, 1))
self.next_state_buffer = np.zeros((self.buffer_capacity, num_states))
# Takes (s,a,r,s') obervation tuple as input
def record(self, obs_tuple):
# Set index to zero if buffer_capacity is exceeded,
# replacing old records
index = self.buffer_counter % self.buffer_capacity
self.state_buffer[index] = obs_tuple[0]
self.action_buffer[index] = obs_tuple[1]
self.reward_buffer[index] = obs_tuple[2]
self.next_state_buffer[index] = obs_tuple[3]
self.buffer_counter += 1
import random
import numpy as np
from collections import deque
import tensorflow as tf
from keras.models import Sequential
from keras.callbacks import History
from keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from keras import backend as K
from tensorflow.keras import layers
import keras.backend as K
import import_ipynb
from Noise import OUActionNoise
import tensorflow as tf
keras = tf.keras
#tf.compat.v1.disable_eager_execution()
class DQLearningAgent:
def __init__(self, seed ,discount_factor =0.95):
self.tau = 0.05
self.gamma = discount_factor
self.critic_lr = 0.002
self.actor_lr = 0.001
self.std_dev = [0.7,0.7,0.2,0.2]
self.buffer = Buffers(50000, 64)
self.M = 16
self.upper_bound = [40,40,self.M-1 ,self.M-1 ]
self.lower_bound = [5,5,0,0]
self.action_scale = (np.array(self.upper_bound) - np.array(self.lower_bound)) / 2.0
self.action_bias = (np.array(self.upper_bound) + np.array(self.lower_bound)) / 2.0
self._state_size = 8 # unchange
self._action_size = 4
self.seed = seed
# random.seed(self.seed)
# np.random.seed(self.seed)
self.actor_model = self.get_actor()
self.critic_model = self.get_critic()
self.target_actor = self.get_actor()
self.target_critic = self.get_critic()
# Making the weights equal initially
self.target_actor.set_weights(self.actor_model.get_weights())
self.target_critic.set_weights(self.critic_model.get_weights())
self.critic_optimizer = tf.keras.optimizers.Adam(self.critic_lr)
self.actor_optimizer = tf.keras.optimizers.Adam(self.actor_lr)
self.ou_noise = OUActionNoise(mean=np.zeros(self._action_size ), std_deviation=np.array(self.std_dev))
def get_actor(self):
# Initialize weights between -3e-3 and 3-e3
last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
inputs = layers.Input(shape=(self._state_size,))
out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(inputs)
# out = layers.Dense(28,activation=keras.layers.LeakyReLU(alpha=0.01))(out)
# out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
outputs = layers.Dense(self._action_size, activation="tanh", kernel_initializer=last_init)(out)
def antirectifier(x):
outputs = self.action_scale*x + self.action_bias
return outputs
outputs = layers.Lambda(antirectifier )(outputs)
model = tf.keras.Model(inputs, outputs)
return model
def get_critic(self):
# State as input
state_input = layers.Input(shape=(self._state_size))
# state_out = layers.Dense(28, activation="relu")(state_input)
# Action as input
action_input = layers.Input(shape=(self._action_size))
# action_out = layers.Dense(16, activation="relu")(action_input)
# Both are passed through seperate layer before concatenating
concat = layers.Concatenate()([state_input, action_input])
out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(concat)
# out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
# out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
out = layers.Dense(28, activation=keras.layers.LeakyReLU(alpha=0.01))(out)
outputs = layers.Dense(1)(out)
# Outputs single value for give state-action
model = tf.keras.Model([state_input, action_input], outputs)
return model
def learn(self):
# Get sampling range
record_range = min(self.buffer.buffer_counter, self.buffer.buffer_capacity)
# Randomly sample indices
batch_indices = np.random.choice(record_range, self.buffer.batch_size)
# print(self.buffer.action_buffer[batch_indices].shape)
# Convert to tensors
state_batch = tf.convert_to_tensor(self.buffer.state_buffer[batch_indices])
action_batch = tf.convert_to_tensor(self.buffer.action_buffer[batch_indices])
reward_batch = tf.convert_to_tensor(self.buffer.reward_buffer[batch_indices])
reward_batch = tf.cast(reward_batch, dtype=tf.float32)
next_state_batch = tf.convert_to_tensor(self.buffer.next_state_buffer[batch_indices])
return self.update(state_batch, action_batch, reward_batch, next_state_batch)
def update(self, state_batch, action_batch, reward_batch, next_state_batch):
with tf.GradientTape() as tape:
target_actions_new = self.target_actor(next_state_batch)
y = reward_batch + self.gamma * self.target_critic([next_state_batch,target_actions_new])
q = self.critic_model([state_batch,action_batch])
critic_loss = tf.math.reduce_mean(tf.math.square(y - q))
critic_grad = tape.gradient(critic_loss, self.critic_model.trainable_variables)
self.critic_optimizer.apply_gradients( zip(critic_grad, self.critic_model.trainable_variables))
with tf.GradientTape() as tape:
actions = self.actor_model(state_batch)
critic_value = self.critic_model([state_batch , actions])
actor_loss = -tf.math.reduce_mean(critic_value)
actor_grad = tape.gradient(actor_loss , self.actor_model.trainable_variables)
self.actor_optimizer.apply_gradients( zip(actor_grad, self.actor_model.trainable_variables))
self.update_target(self.target_actor.variables , self.actor_model.variables)
self.update_target(self.target_critic.variables , self.critic_model.variables)
return actor_loss,critic_loss
def update_target(self,target_weights, weights):
for (a, b) in zip(target_weights, weights):
a.assign(b * self.tau + a * (1 - self.tau))
def policy(self,state):
sampled_actions = tf.squeeze(self.actor_model(state))
noise = self.ou_noise()
# Adding noise to action
sampled_actions = sampled_actions.numpy() + noise
# We make sure action is within bounds
legal_action = np.clip(sampled_actions, self.lower_bound, self.upper_bound)
return [np.squeeze(legal_action)]

RuntimeError: Found dtype Double but expected Float - Pytorch RL

I am trying to get an actor critic variant of the pendulum running, but I seem to be running into a particular problem.
RuntimeError: Found dtype Double but expected Float
I saw this had come up multiple times before so I have been through those and have attempted to change the data types of my loss (kept in comments) but it is still not working. Could anyone point out how to resolve this so that I can learn from it?
Full code below
import gym, os
import numpy as np
from itertools import count
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions import Normal
from collections import namedtuple
SavedAction = namedtuple('SavedAction', ['log_prob', 'value'])
LOG_SIG_MAX = 2
LOG_SIG_MIN = -20
class ActorCritic(nn.Module):
"""
Implementing both heads of the actor critic model
"""
def __init__(self, state_space, action_space):
super(ActorCritic, self).__init__()
self.state_space = state_space
self.action_space = action_space
# HL 1
self.linear1 = nn.Linear(self.state_space, 128)
# HL 2
self.linear2 = nn.Linear(128, 256)
# Outputs
self.critic_head = nn.Linear(256, 1)
self.action_mean = nn.Linear(256, self.action_space)
self.action_std = nn.Linear(256, self.action_space)
# Saving
self.saved_actions = []
self.rewards = []
# Optimizer
self.optimizer = optim.Adam(self.parameters(), lr = 1e-3)
self.eps = np.finfo(np.float32).eps.item()
def forward(self, state):
"""
Forward pass for both actor and critic
"""
# State to Layer 1
l1_output = F.relu(self.linear1(state))
# Layer 1 to Layer 2
l2_output = F.relu(self.linear2(l1_output))
# Layer 2 to Action
mean = self.action_mean(l2_output)
std = self.action_std(l2_output)
std = torch.clamp(std, min=LOG_SIG_MIN, max = LOG_SIG_MAX)
std = std.exp()
# Layer 2 to Value
value_est = self.critic_head(l2_output)
return value_est, mean, std
def select_action(self,state):
state = torch.from_numpy(state).float().unsqueeze(0)
value_est, mean, std = self.forward(state)
value_est = value_est.reshape(-1)
# Make prob Normal dist
dist = Normal(mean, std)
action = dist.sample()
action = torch.tanh(action)
ln_prob = dist.log_prob(action)
ln_prob = ln_prob.sum()
self.saved_actions.append(SavedAction(ln_prob, value_est))
action = action.numpy()
return action[0]
def compute_returns(self, gamma): # This is the error causing code
"""
Calculate losses and do backprop
"""
R = 0
saved_actions = self.saved_actions
policy_losses = []
value_losses = []
returns = []
for r in self.rewards[::-1]:
# Discount value
R = r + gamma*R
returns.insert(0,R)
returns = torch.tensor(returns)
returns = (returns - returns.mean())/(returns.std()+self.eps)
for (log_prob, value), R in zip(saved_actions, returns):
advantage = R - value.item()
advantage = advantage.type(torch.FloatTensor)
policy_losses.append(-log_prob*advantage)
value_losses.append(F.mse_loss(value, torch.tensor([R])))
self.optimizer.zero_grad()
loss = torch.stack(policy_losses).sum() + torch.stack(value_losses).sum()
loss = loss.type(torch.FloatTensor)
loss.backward()
self.optimizer.step()
del self.rewards[:]
del self.saved_actions[:]
env = gym.make("Pendulum-v0")
state_space = env.observation_space.shape[0]
action_space = env.action_space.shape[0]
# Train Expert AC
model = ActorCritic(state_space, action_space)
train = True
if train == True:
# Main loop
window = 50
reward_history = []
for ep in count():
state = env.reset()
ep_reward = 0
for t in range(1,1000):
if ep%50 == 0:
env.render()
action = model.select_action(state)
state, reward, done, _ = env.step(action)
model.rewards.append(reward)
ep_reward += reward
if done:
break
print(reward)
model.compute_returns(0.99) # Error begins here
reward_history.append(ep_reward)
# Result information
if ep % 50 == 0:
mean = np.mean(reward_history[-window:])
print(f"Episode: {ep} Last Reward: {ep_reward} Rolling Mean: {mean}")
if np.mean(reward_history[-100:])>199:
print(f"Environment solved at episode {ep}, average run length > 200")
break
Complete error log, some elements redacted for privacy. Originally the loop actor critic and main loop were in separate files. Comments added to appropriate error causing lines.
Traceback (most recent call last):
File "pendulum.py", line 59, in <module>
model.compute_returns(0.99)
File "/home/x/Software/git/x/x/solvers/actorcritic_cont.py", line 121, in compute_returns
loss.backward()
File "/home/x/.local/lib/python3.8/site-packages/torch/_tensor.py", line 255, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/home/x/.local/lib/python3.8/site-packages/torch/autograd/__init__.py", line 147, in backward
Variable._execution_engine.run_backward(
RuntimeError: Found dtype Double but expected Float
Answering here in case anyone has similar issues in the future.
The output of the reward in OpenAI Gym Pendulum-v0 is a double, so when you compute the return over the episode you need to change that to a float tensor.
I did this just by:
returns = torch.tensor(returns)
returns = (returns - returns.mean())/(returns.std()+self.eps)
returns = returns.type(torch.FloatTensor)

Pass image to gradient function for yolo3 from GlounCV model Zoo

I am trying to get the gradient with respect to a specific images (sitting in adv_loader). These images are loaded in adv_loader. I tried just taking the code that was calculating the gradient in the backprop step but Yolo3 doesn’t seem to have a grad attribute. Any ideas on how to get that?
import mxnet as mx
import gluoncv as gcv
import numpy as np
import matplotlib.pyplot as plt
from mxnet import gluon
from gluoncv.loss import YOLOV3Loss
import time
ctx = [mx.gpu(i) for i in range(mx.context.num_gpus())] if mx.context.num_gpus()>0 else [mx.cpu()]
#Datasets
train = gcv.data.RecordFileDetection('./data/train.rec',coord_normalized=False)
val = gcv.data.RecordFileDetection('./data/val.rec',coord_normalized=False)
classes = ['Passenger Vehicle','Small Car','Bus']
net = gcv.model_zoo.yolo3_mobilenet0_25_custom(pretrained_base=False,classes=classes,ctx=ctx)
net.collect_params().initialize(force_reinit=True,ctx=ctx)
import multiprocessing as mp
### The following looks nasty, and I apologize for the difficulty but basically what we are doing is
# transforming our data so it is in the correct format for yolo3
batch_size = 32 #This can be changed, but it determines how often we update our model.
num_workers = mp.cpu_count()//2
### We will import the following to reduce what i need to type (I am not sure about you but I like being lazy)
from mxnet import autograd
### 416 is our width/height we want the network to train on
sizes = 328
train_transform = gcv.data.transforms.presets.yolo.YOLO3DefaultTrainTransform(sizes,sizes, net)
# return stacked images, center_targets, scale_targets, gradient weights, objectness_targets, class_targets
# additionally, return padded ground truth bboxes, so there are 7 components returned by dataloader
batchify_fn = gcv.data.batchify.Tuple(*([gcv.data.batchify.Stack() for _ in range(6)] + [gcv.data.batchify.Pad(axis=0, pad_val=-1) for _ in range(1)]))
train_loader = mx.gluon.data.DataLoader(train.transform(train_transform), batch_size, shuffle=True,
batchify_fn=batchify_fn, last_batch='rollover',num_workers=num_workers,prefetch=num_workers + num_workers//2)
val_batchify_fn = gcv.data.batchify.Tuple(gcv.data.batchify.Stack(), gcv.data.batchify.Pad(pad_val=-1))
val_transform = gcv.data.transforms.presets.yolo.YOLO3DefaultValTransform(sizes,sizes)
val_loader = mx.gluon.data.DataLoader(
val.transform(val_transform),
batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep',num_workers=num_workers,prefetch=num_workers + num_workers//2)
adv_loader = mx.gluon.data.DataLoader(train.transform(train_transform), batch_size, shuffle=True,
batchify_fn=batchify_fn, last_batch='rollover',num_workers=num_workers,prefetch=num_workers + num_workers//2) #this is a placeholder for data that will generate advarsaial examples
#How we will validate our model
def validate(net, val_data, ctx, eval_metric):
"""Test on validation dataset."""
eval_metric.reset()
# set nms threshold and topk constraint
net.set_nms(nms_thresh=0.45, nms_topk=455)
mx.nd.waitall()
net.hybridize()
for batch in val_data:
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
label = mx.gluon.utils.split_and_load(batch[1], ctx_list=ctx,batch_axis=0, even_split=False)
det_bboxes = []
det_ids = []
det_scores = []
gt_bboxes = []
gt_ids = []
gt_difficults = []
for x, y in zip(data, label):
# get prediction results
ids, scores, bboxes = net(x)
det_ids.append(ids)
det_scores.append(scores)
# clip to image size
det_bboxes.append(bboxes.clip(0, batch[0].shape[2]))
# split ground truths
gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))
gt_difficults.append(y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None)
# update metric
eval_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficults)
return eval_metric.get()
eval_metric = gcv.utils.metrics.voc_detection.VOCMApMetric(class_names=classes)
nepochs=11
net.initialize(force_reinit=True)
net.collect_params().reset_ctx(ctx)
#Grab a trainer or optimizer to perform the optimization
trainer = mx.gluon.Trainer(net.collect_params(),'adam',
{'learning_rate':0.001},
kvstore='device')
for i in range(nepochs):
now = time.time()
mx.nd.waitall()
net.hybridize(static_alloc=True,static_shape=True)
for ixl,batch in enumerate(train_loader):
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
# objectness, center_targets, scale_targets, weights, class_targets
fixed_targets = [mx.gluon.utils.split_and_load(batch[it], ctx_list=ctx, batch_axis=0) for it in range(1, 6)]
gt_boxes = mx.gluon.utils.split_and_load(batch[6], ctx_list=ctx, batch_axis=0)
sum_losses = []
with autograd.record():
for ix, x in enumerate(data):
obj_loss, center_loss, scale_loss, cls_loss = net(x, gt_boxes[ix], *[ft[ix] for ft in fixed_targets])
sum_losses.append(obj_loss + center_loss + scale_loss + cls_loss)
autograd.backward(sum_losses)
trainer.step(batch_size)
for ixl,batch in enumerate(adv_loader):
data = mx.gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
# objectness, center_targets, scale_targets, weights, class_targets
fixed_targets = [mx.gluon.utils.split_and_load(batch[it], ctx_list=ctx, batch_axis=0) for it in range(1, 6)]
gt_boxes = mx.gluon.utils.split_and_load(batch[6], ctx_list=ctx, batch_axis=0)
sum_losses = []
with autograd.record():
for ix, x in enumerate(data):
obj_loss, center_loss, scale_loss, cls_loss = net(x, gt_boxes[ix], *[ft[ix] for ft in fixed_targets])
sum_losses.append(obj_loss + center_loss + scale_loss + cls_loss)
autograd.backward(sum_losses,retain_graph=True)
print(net.grad.asnumpy())
I am getting told here that AttributeError: 'YOLOV3' object has no attribute 'grad'. I am using this documentation https://mxnet.apache.org/versions/1.6/api/python/docs/tutorials/packages/autograd/index.html
Any idea how to get an image and pass it through to the gradient of the yolo loss function?

Fine tune GPT-2 Text Prediction for Conversational AI

I am experimenting with the gpt-2 model's conditional text generation to tweak it for a good chatbot. I am using nsheppard's code for retraining it on my custom dataset.
I trained my model on a custom dataset of conversations that I pulled from my facebook data. I changed the sample length to 20 as they are dialogues during interactive conditional generation.
The dataset looks something like this:
How are you
Hi Great and you
Am also good
So you re a graphic designer
Yeah
How can you contribute to making the game In d graphics aspect
Can you show me some of your work if u don t mind
Am planning to learn making it a motion type
U can go through my photos
K
Can you make animations for it
Flash animations to be specific
No please only stable ones
Ok
But, after the training when i try to chat with it, it is instead completing my sentences instead of replying to them.
User >>> bye
======================================== SAMPLE 1 ========================================
and
hi
are there any positions in khrzh being appointed right now
I understand that the interactive_conditional_samples.py was built to complete the sentence based on the prompt, but I thought changing the dataset would work and sure it doesn't work.
train.py
#!/usr/bin/env python3
# Usage:
# PYTHONPATH=src ./train --dataset <file|directory|glob>
import argparse
import json
import os
import numpy as np
import tensorflow as tf
import time
import tqdm
from tensorflow.core.protobuf import rewriter_config_pb2
import model, sample, encoder
from load_dataset import load_dataset, Sampler
from accumulate import AccumulatingOptimizer
import memory_saving_gradients
CHECKPOINT_DIR = 'checkpoint'
SAMPLE_DIR = 'samples'
parser = argparse.ArgumentParser(
description='Fine-tune GPT-2 on your custom dataset.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--dataset', metavar='PATH', type=str, required=True, help='Input file, directory, or glob pattern (utf-8 text, or preencoded .npz files).')
parser.add_argument('--model_name', metavar='MODEL', type=str, default='117M', help='Pretrained model name')
parser.add_argument('--combine', metavar='CHARS', type=int, default=50000, help='Concatenate input files with <|endoftext|> separator into chunks of this minimum size')
parser.add_argument('--batch_size', metavar='SIZE', type=int, default=1, help='Batch size')
parser.add_argument('--learning_rate', metavar='LR', type=float, default=0.00002, help='Learning rate for Adam')
parser.add_argument('--accumulate_gradients', metavar='N', type=int, default=1, help='Accumulate gradients across N minibatches.')
parser.add_argument('--memory_saving_gradients', default=False, action='store_true', help='Use gradient checkpointing to reduce vram usage.')
parser.add_argument('--only_train_transformer_layers', default=False, action='store_true', help='Restrict training to the transformer blocks.')
parser.add_argument('--optimizer', type=str, default='adam', help='Optimizer. <adam|sgd>.')
parser.add_argument('--noise', type=float, default=0.0, help='Add noise to input training data to regularize against typos.')
parser.add_argument('--top_k', type=int, default=40, help='K for top-k sampling.')
parser.add_argument('--top_p', type=float, default=0.0, help='P for top-p sampling. Overrides top_k if set > 0.')
parser.add_argument('--restore_from', type=str, default='latest', help='Either "latest", "fresh", or a path to a checkpoint file')
parser.add_argument('--run_name', type=str, default='run1', help='Run id. Name of subdirectory in checkpoint/ and samples/')
parser.add_argument('--sample_every', metavar='N', type=int, default=100, help='Generate samples every N steps')
parser.add_argument('--sample_length', metavar='TOKENS', type=int, default=1023, help='Sample this many tokens')
parser.add_argument('--sample_num', metavar='N', type=int, default=1, help='Generate this many samples')
parser.add_argument('--save_every', metavar='N', type=int, default=1000, help='Write a checkpoint every N steps')
parser.add_argument('--val_dataset', metavar='PATH', type=str, default=None, help='Dataset for validation loss, defaults to --dataset.')
parser.add_argument('--val_batch_size', metavar='SIZE', type=int, default=2, help='Batch size for validation.')
parser.add_argument('--val_batch_count', metavar='N', type=int, default=40, help='Number of batches for validation.')
parser.add_argument('--val_every', metavar='STEPS', type=int, default=0, help='Calculate validation loss every STEPS steps.')
def maketree(path):
try:
os.makedirs(path)
except:
pass
def randomize(context, hparams, p):
if p > 0:
mask = tf.random.uniform(shape=tf.shape(context)) < p
noise = tf.random.uniform(shape=tf.shape(context), minval=0, maxval=hparams.n_vocab, dtype=tf.int32)
return tf.where(mask, noise, context)
else:
return context
def main():
args = parser.parse_args()
enc = encoder.get_encoder(args.model_name)
hparams = model.default_hparams()
with open(os.path.join('models', args.model_name, 'hparams.json')) as f:
hparams.override_from_dict(json.load(f))
if args.sample_length > hparams.n_ctx:
raise ValueError(
"Can't get samples longer than window size: %s" % hparams.n_ctx)
if args.model_name == '345M':
args.memory_saving_gradients = True
if args.optimizer == 'adam':
args.only_train_transformer_layers = True
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.graph_options.rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.OFF
with tf.Session(config=config) as sess:
context = tf.placeholder(tf.int32, [args.batch_size, None])
context_in = randomize(context, hparams, args.noise)
output = model.model(hparams=hparams, X=context_in)
loss = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=context[:, 1:], logits=output['logits'][:, :-1]))
if args.val_every > 0:
val_context = tf.placeholder(tf.int32, [args.val_batch_size, None])
val_output = model.model(hparams=hparams, X=val_context)
val_loss = tf.reduce_mean(
tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=val_context[:, 1:], logits=val_output['logits'][:, :-1]))
val_loss_summary = tf.summary.scalar('val_loss', val_loss)
tf_sample = sample.sample_sequence(
hparams=hparams,
length=args.sample_length,
context=context,
batch_size=args.batch_size,
temperature=1.0,
top_k=args.top_k,
top_p=args.top_p)
all_vars = [v for v in tf.trainable_variables() if 'model' in v.name]
train_vars = [v for v in all_vars if '/h' in v.name] if args.only_train_transformer_layers else all_vars
if args.optimizer == 'adam':
opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
elif args.optimizer == 'sgd':
opt = tf.train.GradientDescentOptimizer(learning_rate=args.learning_rate)
else:
exit('Bad optimizer:', args.optimizer)
if args.accumulate_gradients > 1:
if args.memory_saving_gradients:
exit("Memory saving gradients are not implemented for gradient accumulation yet.")
opt = AccumulatingOptimizer(
opt=opt,
var_list=train_vars)
opt_reset = opt.reset()
opt_compute = opt.compute_gradients(loss)
opt_apply = opt.apply_gradients()
summary_loss = tf.summary.scalar('loss', opt_apply)
else:
if args.memory_saving_gradients:
opt_grads = memory_saving_gradients.gradients(loss, train_vars)
else:
opt_grads = tf.gradients(loss, train_vars)
opt_grads = list(zip(opt_grads, train_vars))
opt_apply = opt.apply_gradients(opt_grads)
summary_loss = tf.summary.scalar('loss', loss)
summary_lr = tf.summary.scalar('learning_rate', args.learning_rate)
summaries = tf.summary.merge([summary_lr, summary_loss])
summary_log = tf.summary.FileWriter(
os.path.join(CHECKPOINT_DIR, args.run_name))
saver = tf.train.Saver(
var_list=all_vars,
max_to_keep=5,
keep_checkpoint_every_n_hours=2)
sess.run(tf.global_variables_initializer())
if args.restore_from == 'latest':
ckpt = tf.train.latest_checkpoint(
os.path.join(CHECKPOINT_DIR, args.run_name))
if ckpt is None:
# Get fresh GPT weights if new run.
ckpt = tf.train.latest_checkpoint(
os.path.join('models', args.model_name))
elif args.restore_from == 'fresh':
ckpt = tf.train.latest_checkpoint(
os.path.join('models', args.model_name))
else:
ckpt = tf.train.latest_checkpoint(args.restore_from)
print('Loading checkpoint', ckpt)
saver.restore(sess, ckpt)
print('Loading dataset...')
chunks = load_dataset(enc, args.dataset, args.combine)
data_sampler = Sampler(chunks)
if args.val_every > 0:
val_chunks = load_dataset(enc, args.val_dataset, args.combine) if args.val_dataset else chunks
print('dataset has', data_sampler.total_size, 'tokens')
print('Training...')
if args.val_every > 0:
# Sample from validation set once with fixed seed to make
# it deterministic during training as well as across runs.
val_data_sampler = Sampler(val_chunks, seed=1)
val_batches = [[val_data_sampler.sample(1024) for _ in range(args.val_batch_size)]
for _ in range(args.val_batch_count)]
counter = 1
counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter')
if os.path.exists(counter_path):
# Load the step number if we're resuming a run
# Add 1 so we don't immediately try to save again
with open(counter_path, 'r') as fp:
counter = int(fp.read()) + 1
def save():
maketree(os.path.join(CHECKPOINT_DIR, args.run_name))
print(
'Saving',
os.path.join(CHECKPOINT_DIR, args.run_name,
'model-{}').format(counter))
saver.save(
sess,
os.path.join(CHECKPOINT_DIR, args.run_name, 'model'),
global_step=counter)
with open(counter_path, 'w') as fp:
fp.write(str(counter) + '\n')
def generate_samples():
print('Generating samples...')
context_tokens = data_sampler.sample(1)
all_text = []
index = 0
while index < args.sample_num:
out = sess.run(
tf_sample,
feed_dict={context: args.batch_size * [context_tokens]})
for i in range(min(args.sample_num - index, args.batch_size)):
text = enc.decode(out[i])
text = '======== SAMPLE {} ========\n{}\n'.format(
index + 1, text)
all_text.append(text)
index += 1
print(text)
maketree(os.path.join(SAMPLE_DIR, args.run_name))
with open(
os.path.join(SAMPLE_DIR, args.run_name,
'samples-{}').format(counter), 'w') as fp:
fp.write('\n'.join(all_text))
def validation():
print('Calculating validation loss...')
losses = []
for batch in tqdm.tqdm(val_batches):
losses.append(sess.run(val_loss, feed_dict={val_context: batch}))
v_val_loss = np.mean(losses)
v_summary = sess.run(val_loss_summary, feed_dict={val_loss: v_val_loss})
summary_log.add_summary(v_summary, counter)
summary_log.flush()
print(
'[{counter} | {time:2.2f}] validation loss = {loss:2.2f}'
.format(
counter=counter,
time=time.time() - start_time,
loss=v_val_loss))
def sample_batch():
return [data_sampler.sample(1024) for _ in range(args.batch_size)]
avg_loss = (0.0, 0.0)
start_time = time.time()
try:
while True:
if counter % args.save_every == 0:
save()
if counter % args.sample_every == 0:
generate_samples()
if args.val_every > 0 and (counter % args.val_every == 0 or counter == 1):
validation()
if args.accumulate_gradients > 1:
sess.run(opt_reset)
for _ in range(args.accumulate_gradients):
sess.run(
opt_compute, feed_dict={context: sample_batch()})
(v_loss, v_summary) = sess.run((opt_apply, summaries))
else:
(_, v_loss, v_summary) = sess.run(
(opt_apply, loss, summaries),
feed_dict={context: sample_batch()})
summary_log.add_summary(v_summary, counter)
avg_loss = (avg_loss[0] * 0.99 + v_loss,
avg_loss[1] * 0.99 + 1.0)
print(
'[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}'
.format(
counter=counter,
time=time.time() - start_time,
loss=v_loss,
avg=avg_loss[0] / avg_loss[1]))
counter += 1
except KeyboardInterrupt:
print('interrupted')
save()
if __name__ == '__main__':
main()
sample.py
import tensorflow as tf
import model
def top_k_logits(logits, k):
if k == 0:
# no truncation
return logits
def _top_k():
values, _ = tf.nn.top_k(logits, k=k)
min_values = values[:, -1, tf.newaxis]
return tf.where(
logits < min_values,
tf.ones_like(logits, dtype=logits.dtype) * -1e10,
logits,
)
return tf.cond(
tf.equal(k, 0),
lambda: logits,
lambda: _top_k(),
)
def top_p_logits(logits, p):
with tf.variable_scope('top_p_logits'):
logits_sort = tf.sort(logits, direction='DESCENDING')
probs_sort = tf.nn.softmax(logits_sort)
probs_sums = tf.cumsum(probs_sort, axis=1, exclusive=True)
logits_masked = tf.where(probs_sums < p, logits_sort, tf.ones_like(logits_sort)*1000) # [batchsize, vocab]
min_logits = tf.reduce_min(logits_masked, axis=1, keepdims=True) # [batchsize, 1]
return tf.where(
logits < min_logits,
tf.ones_like(logits, dtype=logits.dtype) * -1e10,
logits,
)
def sample_sequence(*, hparams, length, start_token=None, batch_size=None, context=None, temperature=1, top_k=0, top_p=0.0):
if start_token is None:
assert context is not None, 'Specify exactly one of start_token and context!'
else:
assert context is None, 'Specify exactly one of start_token and context!'
context = tf.fill([batch_size, 1], start_token)
def step(hparams, tokens, past=None):
lm_output = model.model(hparams=hparams, X=tokens, past=past, reuse=tf.AUTO_REUSE)
logits = lm_output['logits'][:, :, :hparams.n_vocab]
presents = lm_output['present']
presents.set_shape(model.past_shape(hparams=hparams, batch_size=batch_size))
return {
'logits': logits,
'presents': presents,
}
with tf.name_scope('sample_sequence'):
# Don't feed the last context token -- leave that to the loop below
# TODO: Would be slightly faster if we called step on the entire context,
# rather than leaving the last token transformer calculation to the while loop.
context_output = step(hparams, context[:, :-1])
def body(past, prev, output):
next_outputs = step(hparams, prev[:, tf.newaxis], past=past)
logits = next_outputs['logits'][:, -1, :] / tf.to_float(temperature)
if top_p > 0.0:
logits = top_p_logits(logits, p=top_p)
else:
logits = top_k_logits(logits, k=top_k)
samples = tf.multinomial(logits, num_samples=1, output_dtype=tf.int32)
return [
tf.concat([past, next_outputs['presents']], axis=-2),
tf.squeeze(samples, axis=[1]),
tf.concat([output, samples], axis=1),
]
def cond(*args):
return True
_, _, tokens = tf.while_loop(
cond=cond, body=body,
maximum_iterations=length,
loop_vars=[
context_output['presents'],
context[:, -1],
context,
],
shape_invariants=[
tf.TensorShape(model.past_shape(hparams=hparams, batch_size=batch_size)),
tf.TensorShape([batch_size]),
tf.TensorShape([batch_size, None]),
],
back_prop=False,
)
return tokens
interactive_conditional_samples.py
#!/usr/bin/env python3
import fire
import json
import os
import numpy as np
import tensorflow as tf
import model, sample, encoder
def interact_model(
model_name='chatbot',
seed=None,
nsamples=1,
batch_size=1,
length=20,
temperature=1,
top_k=0,
top_p=0.0
):
"""
Interactively run the model
:model_name=chatbot : String, which model to use
:seed=None : Integer seed for random number generators, fix seed to reproduce
results
:nsamples=1 : Number of samples to return total
:batch_size=1 : Number of batches (only affects speed/memory). Must divide nsamples.
:length=None : Number of tokens in generated text, if None (default), is
determined by model hyperparameters
:temperature=1 : Float value controlling randomness in boltzmann
distribution. Lower temperature results in less random completions. As the
temperature approaches zero, the model will become deterministic and
repetitive. Higher temperature results in more random completions.
:top_k=0 : Integer value controlling diversity. 1 means only 1 word is
considered for each step (token), resulting in deterministic completions,
while 40 means 40 words are considered at each step. 0 (default) is a
special setting meaning no restrictions. 40 generally is a good value.
:top_p=0.0 : Float value controlling diversity. Implements nucleus sampling,
overriding top_k if set to a value > 0. A good setting is 0.9.
"""
if batch_size is None:
batch_size = 1
assert nsamples % batch_size == 0
enc = encoder.get_encoder(model_name)
hparams = model.default_hparams()
with open(os.path.join('models', model_name, 'hparams.json')) as f:
hparams.override_from_dict(json.load(f))
if length is None:
length = hparams.n_ctx // 2
elif length > hparams.n_ctx:
raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx)
with tf.Session(graph=tf.Graph()) as sess:
context = tf.placeholder(tf.int32, [batch_size, None])
np.random.seed(seed)
tf.set_random_seed(seed)
output = sample.sample_sequence(
hparams=hparams, length=length,
context=context,
batch_size=batch_size,
temperature=temperature, top_k=top_k, top_p=top_p
)
saver = tf.train.Saver()
ckpt = tf.train.latest_checkpoint(os.path.join('models', model_name))
saver.restore(sess, ckpt)
while True:
raw_text = input("User >>> ")
while not raw_text:
print('Prompt should not be empty!')
raw_text = input("User >>> ")
context_tokens = enc.encode(raw_text)
generated = 0
for _ in range(nsamples // batch_size):
out = sess.run(output, feed_dict={
context: [context_tokens for _ in range(batch_size)]
})[:, len(context_tokens):]
for i in range(batch_size):
generated += 1
text = enc.decode(out[i])
print("=" * 40 + " SAMPLE " + str(generated) + " " + "=" * 40)
print(text)
print("=" * 80)
if __name__ == '__main__':
fire.Fire(interact_model)
How can I tweak the code to get it working like a chatbot? I am guessing it has something to do with the context part in sample.py, though i am unsure how is this going to work.
I know this is an old question now, but I have successfully tuned many Q&A style datasets on GPT-2 and have a suggestion that will work for future people who find this question.
GPT-2 reads unstructured text data, but it is very good at inferring and obeying structure in that data. Your issue is basically that you are not terminating your input lines with an identifier that GPT-2 understands, so it continues the sentence.
A simple way to fix this would be to annotate your dataset. Really anything with stop/start tokens will work, but you should also annotate the speaker identities. I would just do something like this:
A: How are you <EOL>
B: Hi Great and you <EOL>
A: Am also good <EOL>
B: So you re a graphic designer <EOL>
B: Another line from B <EOL>
The other benefit of this approach is that GPT-2 will learn multi-line input/output, and the different identities of the two conversants.
Problem is, all model sees is looking at the series of text you gave it, and trying to predict next most likely /token to be exact. It's not an encoder-decoder architecture. What you require is fine-tuning this architecture for a chatbot architecture.The only implementation I found regarding that one is here. But's it's done in pytorch so i am afraid it won't be what you are wanting.
https://medium.com/huggingface/how-to-build-a-state-of-the-art-conversational-ai-with-transfer-learning-2d818ac26313

How to convert this Keras code to Chainer code? (LSTM Autoencoder)

Here, I have LSTM Autoencoder written in Keras. I want to convert the code to Chainer.
import numpy as np
from keras.layers import Input, GRU
from keras.models import Model
input_feat = Input(shape=(30, 2000))
l = GRU( 100, return_sequences=True, activation="tanh", recurrent_activation="hard_sigmoid")(input_feat)
l = GRU(2000, return_sequences=True, activation="tanh", recurrent_activation="hard_sigmoid")(l)
model = Model(input_feat, l)
model.compile(optimizer="RMSprop", loss="mean_squared_error")
feat = np.load("feat.npy")
model.fit(feat, feat[:, ::-1, :], epochs=200, batch_size=250)
feat is numpy whose dimension is (269, 30, 2000). I could run above code and the result was reasonable. I had written below Chainer code.
import numpy as np
from chainer import Chain, Variable, optimizers
import chainer.functions as F
import chainer.links as L
class GRUAutoEncoder(Chain):
def __init__(self):
super().__init__()
with self.init_scope():
self.encode = L.GRU(2000, 100)
self.decode = L.GRU(100, 2000)
def __call__(self, h, mode):
if mode == "encode":
h = F.tanh(self.encode(h))
return h
if mode == "decode":
h = F.tanh(self.decode(h))
return h
def reset(self):
self.encode.reset_state()
self.decode.reset_state()
def main():
feat = np.load("feat.npy") #(269, 30, 2000)
gru_autoencoder = GRUAutoEncoder()
optimizer = optimizers.RMSprop(lr=0.01).setup(gru_autoencoder)
N = len(feat)
batch_size = 250
for epoch in range(200):
index = np.random.randint(0, N-batch_size+1)
input_splices = feat[index:index+batch_size] #(250, 30, 2000)
#Encoding
input_vector = np.zeros((30, batch_size, 2000), dtype="float32")
h = []
for i in range(frame_rate):
input_vector[i] = input_splices[:, i, :] #(250, 1, 2000)
tmp = Variable(input_vector[i])
h.append(gru_autoencoder(tmp, "encode")) #(250, 100)
#Decoding
output_vector = []
for i in range(frame_rate):
tmp = h[i]
output_vector.append(gru_autoencoder(tmp, "decode"))
x = input_vector[0]
t = output_vector[0]
for i in range(len(output_vector)):
x = F.concat((x,input_vector[i]), axis=1)
t = F.concat((t,output_vector[i]), axis=1)
loss = F.mean_squared_error(x, t)
gru_autoencoder.cleargrads()
loss.backward()
optimizer.update()
gru_autoencoder.reset()
if __name__ == "__main__":
main()
But the result of above code was not reasonable. I think the Chainer code has something wrong but I cannot find where it is.
In Keras code,
model.fit(feat, feat[:, ::-1, :])
So, I tried to reverse output_vector in Chainer code,
output_vector.reverse()
but the result was not still reasonable.
.. note: This answer is a translation of [Japanese SO].(https://ja.stackoverflow.com/questions/52162/keras%E3%81%AE%E3%82%B3%E3%83%BC%E3%83%89%E3%82%92chainer%E3%81%AB%E6%9B%B8%E3%81%8D%E6%8F%9B%E3%81%88%E3%81%9F%E3%81%84lstm-autoencoder%E3%81%AE%E5%AE%9F%E8%A3%85/52213#52213)
You should avoid using L.GRU and should use L.NStepGRU, because for L.GRU you have to write "recurrence-aware" code. In other words, you have to apply L.GRU multiple times to one timeseries, therefore "batch" must be treated with great care. L.NStepGRU (with n_layers=1) wraps the batch-processing, so it would be user-friendly.
An instance of L.StepGRU takes two input arguments: one is initial state, and the other is a list of timeserieses, which composes a batch. Conventionally, the initial state is None.
Therefore, the whole answer for your question is as follows.
### dataset.py
from chainer.dataset import DatasetMixin
import numpy as np
class MyDataset(DatasetMixin):
N_SAMPLES = 269
N_TIMESERIES = 30
N_DIMS = 2000
def __init__(self):
super().__init__()
self.data = np.random.randn(self.N_SAMPLES, self.N_TIMESERIES, self.N_DIMS) \
.astype(np.float32)
def __len__(self):
return self.N_SAMPLES
def get_example(self, i):
return self.data[i, :, :]
### model.py
import chainer
from chainer import links as L
from chainer import functions as F
from chainer.link import Chain
class MyModel(Chain):
N_IN_CHANNEL = 2000
N_HIDDEN_CHANNEL = 100
N_OUT_CHANNEL = 2000
def __init__(self):
super().__init__()
self.encoder = L.NStepGRU(n_layers=1, in_size=self.N_IN_CHANNEL, out_size=self.N_HIDDEN_CHANNEL, dropout=0)
self.decoder = L.NStepGRU(n_layers=1, in_size=self.N_HIDDEN_CHANNEL, out_size=self.N_OUT_CHANNEL, dropout=0)
def to_gpu(self, device=None):
self.encoder.to_gpu(device)
self.decoder.to_gpu(device)
def to_cpu(self):
self.encoder.to_cpu()
self.decoder.to_cpu()
#staticmethod
def flip_list(source_list):
return [F.flip(source, axis=1) for source in source_list]
def __call__(self, source_list):
"""
.. note:
This implementation makes use of "auto-encoding"
by avoiding redundant copy in GPU device.
In the typical implementation, this function should receive
both of ``source_list`` and ``target_list``.
"""
target_list = self.flip_list(source_list)
_, h_list = self.encoder(hx=None, xs=source_list)
_, predicted_list = self.decoder(hx=None, xs=h_list)
diff_list = [F.mean_squared_error(target, predicted).reshape((1,)) for target, predicted in zip(target_list, predicted_list)]
loss = F.sum(F.concat(diff_list, axis=0))
chainer.report({'loss': loss}, self)
return loss
### converter.py (referring examples/seq2seq/seq2seq.py)
from chainer.dataset import to_device
def convert(batch, device):
"""
.. note:
batch must be list(batch_size) of array
"""
if device is None:
return batch
else:
return [to_device(device, x) for x in batch]
### train.py
from chainer.iterators import SerialIterator
from chainer.optimizers import RMSprop
from chainer.training.updaters import StandardUpdater
from chainer.training.trainer import Trainer
dataset = MyDataset()
BATCH_SIZE = 32
iterator = SerialIterator(dataset, BATCH_SIZE)
model = MyModel()
optimizer = RMSprop()
optimizer.setup(model)
updater = StandardUpdater(iterator, optimizer, convert, device=0)
trainer = Trainer(updater, (100, 'iteration'))
from chainer.training.extensions import snapshot_object
trainer.extend(snapshot_object(model, "model_iter_{.updater.iteration}"), trigger=(10, 'iteration'))
from chainer.training.extensions import LogReport, PrintReport, ProgressBar
trainer.extend(LogReport(['epoch', 'iteration', 'main/loss'], (1, 'iteration')))
trainer.extend(PrintReport(['epoch', 'iteration', 'main/loss']), trigger=(1, 'iteration'))
trainer.extend(ProgressBar(update_interval=1))
trainer.run()

Categories

Resources