PyTorch Optimizer Doesn’t update Weights

PyTorch Optimizer Doesn’t update Weights - python

i'm newbie in PyTorch. Can someone help? I am trying teach Neural Network to play in tetris, but can't understand why weights doesn't cange.
Neural Network:
class CNN(Module):
# define model elements
def __init__(self, n_channels):
super(CNN, self).__init__()
self.number_of_actions = 5
self.gamma = 0.01
self.final_epsilon = 0.0001
self.initial_epsilon = 0.1
self.number_of_iterations = 2000000
self.replay_memory_size = 10000
self.minibatch_size = 32
# input to first hidden layer
#self.hidden1 = Linear(n_channels, 50)
self.hidden1 = Conv2d(1, 1, (22, 10))
kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
#self.act1 = ReLU()
self.pool1 = MaxPool2d(2, 2)
# second hidden layer
self.hidden2 = Linear(1, 30)
kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
self.act2 = ReLU()
# fully connected layer
self.hidden3 = Linear(30, 10)
kaiming_uniform_(self.hidden3.weight, nonlinearity='relu')
self.act3 = ReLU()
# output layer
self.hidden4 = Linear(10, 1)
xavier_uniform_(self.hidden4.weight)
self.act4 = Softmax(dim=1)
# forward propagate input
def forward(self, X):
# input to first hidden layer
X = self.hidden1(X)
X = self.pool1(X)
# second hidden layer
X = self.hidden2(X)
X = self.act2(X)
# third hidden layer
X = self.hidden3(X)
X = self.act3(X)
# output layer
X = self.hidden4(X)
X = self.act4(X)
return X
Learning loop:
reward = torch.tensor([[0.]], requires_grad=True)
inputs = torch.tensor([[self.BoardMatrix]])
outputs = model(inputs)
# compute the model output
state_action_values = model(inputs)
replay_memory.append(self.BoardMatrix)
if len(replay_memory) > model.replay_memory_size:
replay_memory.pop(0)
if self.isWaitingAfterLine:
self.isWaitingAfterLine = False
self.newPiece()
else:
# learning
if (state_action_values[0][0][0][0] > 0.6):
self.action("left")
action_batch = 0.8
elif (state_action_values[0][0][0][0] > 0.2):
self.action("right")
action_batch = 0.6
elif (state_action_values[0][0][0][0] > -0.2):
self.action("down")
action_batch = 0.4
elif (state_action_values[0][0][0][0] > -0.6):
self.action("up")
action_batch = 0.2
elif (state_action_values[0][0][0][0] > -1):
self.action("space")
action_batch = 0
self.oneLineDown()
minibatch = random.sample(replay_memory, min(len(replay_memory), model.minibatch_size))
criterion: MSELoss = nn.MSELoss()
reward_batch = reward
y_batch = torch.cat(tuple(reward_batch if minibatch[i][4]
else reward_batch + model.gamma * torch.max(state_action_values[i])
for i in range(len(minibatch))))
y_batch = y_batch.detach()
q_value = torch.sum(outputs * action_batch, dim=1)
optimizer.zero_grad()
loss = criterion(q_value, y_batch[0])
# credit assignment
loss.backward()
optimizer.step()
print(model.hidden1.weight)
else:
super(Board, self).timerEvent(event)
NN, optimizer initialization:
model = CNN(boardSize)
criterion = CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-6)
I working by this tutorial "https://www.toptal.com/deep-learning/pytorch-reinforcement-learning-tutorial", but try change it for tetris.
P.S. I also find out y_batch never change but i don't know why

Related

How to visualize mean edit distance in Tensorboard using Keras callback?

So far, I have been experimenting with Tensorflow and Keras. I took a code from image_ocr.py which allowed me to train printed text ocr. I want to see the training progress as it goes and have successfuly visualized the accuracy and loss of the training model. However, from what I have heard OCR RNN does not take accuracy as a validation but using mean edit distance instead to validate the accuracy of the words. In this case, I have been trying to get a variable called mean_ed and mean_norm_ed to be visualized in Tensorboard from class VizCallback. I have tried the method from this link but it still does not work. Can anyone help me with visualizing the mean edit distance variables? Here are the code snippets from my code:
class VizCallback(keras.callbacks.Callback):
def __init__(self, run_name, test_func, text_img_gen, num_display_words=6):
self.test_func = test_func
self.output_dir = os.path.join(
OUTPUT_DIR, run_name)
self.text_img_gen = text_img_gen
self.num_display_words = num_display_words
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir)
def on_train_begin(self, logs={}):
self.med = []
self.nmed = []
def show_edit_distance(self, num, logs={}):
num_left = num
mean_norm_ed = 0.0
mean_ed = 0.0
while num_left > 0:
word_batch = next(self.text_img_gen)[0]
num_proc = min(word_batch['the_input'].shape[0], num_left)
decoded_res = decode_batch(self.test_func, word_batch['the_input'][0:num_proc])
for j in range(num_proc):
edit_dist = editdistance.eval(decoded_res[j], word_batch['source_str'][j])
mean_ed += float(edit_dist)
mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
num_left -= num_proc
mean_norm_ed = mean_norm_ed / num
mean_ed = mean_ed / num
#Create scalar summaries for both mean edit distance and normalized mean edit distance
tf_med_ph = tf.placeholder(tf.float32,shape=None,name='med_summary')
tf_nmed_ph = tf.placeholder(tf.float32,shape=None,name='nmed_summary')
tf_med = tf.summary.scalar('med', tf_med_ph)
tf_nmed = tf.summary.scalar('nmed', tf_nmed_ph)
performance_summaries = tf.summary.merge([tf_med,tf_nmed])
#Create a session for displaying the summary
config = tf.ConfigProto(allow_soft_placement=True)
session = tf.InteractiveSession(config=config)
summ_writer = tf.summary.FileWriter(os.path.join('summaries','first'), session.graph)
# Execute the summaries defined above
summ = session.run(performance_summaries, feed_dict={tf_med_ph:mean_ed, tf_nmed_ph:mean_norm_ed})
# Write the obtained summaries to the file, so it can be displayed in the TensorBoard
summ_writer.add_summary(summ, epoch)
session.close()
print('\nOut of %d samples: Mean edit distance: %.3f Mean normalized edit distance: %0.3f'
% (num, mean_ed, mean_norm_ed))
def on_epoch_end(self, epoch, logs={}):
self.model.save_weights(os.path.join(self.output_dir, 'weights%02d.h5' % (epoch)))
self.show_edit_distance(256)
word_batch = next(self.text_img_gen)[0]
res = decode_batch(self.test_func, word_batch['the_input'][0:self.num_display_words])
if word_batch['the_input'][0].shape[0] < 256:
cols = 2
else:
cols = 1
for i in range(self.num_display_words):
plt.subplot(self.num_display_words // cols, cols, i + 1)
if K.image_data_format() == 'channels_first':
the_input = word_batch['the_input'][i, 0, :, :]
else:
the_input = word_batch['the_input'][i, :, :, 0]
plt.imshow(the_input.T, cmap='Greys_r')
plt.xlabel('Truth = \'%s\'\nDecoded = \'%s\'' % (word_batch['source_str'][i], res[i]))
fig = plt.gcf()
fig.set_size_inches(10, 13)
plt.savefig(os.path.join(self.output_dir, 'e%02d.png' % (epoch)))
plt.close()
def train(run_name, start_epoch, stop_epoch, img_w):
# Input Parameters
img_h = 64
words_per_epoch = 16000
val_split = 0.2
val_words = int(words_per_epoch * (val_split))
# Network parameters
conv_filters = 16
kernel_size = (3, 3)
pool_size = 2
time_dense_size = 32
rnn_size = 512
minibatch_size = 32
if K.image_data_format() == 'channels_first':
input_shape = (1, img_w, img_h)
else:
input_shape = (img_w, img_h, 1)
fdir = os.path.dirname(get_file('wordlists.tgz',
origin='http://test.com/wordlist.tgz', untar=True))
img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
minibatch_size=minibatch_size,
img_w=img_w,
img_h=img_h,
downsample_factor=(pool_size ** 2),
val_split=words_per_epoch - val_words
)
act = 'relu'
input_data = Input(name='the_input', shape=input_shape, dtype='float32')
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv1')(input_data)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv2')(inner)
inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)
inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
# cuts down input size going into RNN:
inner = Dense(time_dense_size, activation=act, name='dense1')(inner)
# Two layers of bidirectional GRUs
# GRU seems to work as well, if not better than LSTM:
gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner)
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner)
gru1_merged = add([gru_1, gru_1b])
gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)
# transforms RNN output to character activations:
inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal',
name='dense2')(concatenate([gru_2, gru_2b]))
y_pred = Activation('softmax', name='softmax')(inner)
Model(inputs=input_data, outputs=y_pred).summary()
labels = Input(name='the_labels', shape=[img_gen.absolute_max_string_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
# clipnorm seems to speeds up convergence
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)
#Make tensorboard instance
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
tbname="tensorboard-of-{}".format(int(time.time()))
tensorboard = keras.callbacks.TensorBoard(
log_dir="logs/{}".format(tbname),
histogram_freq=0,
write_images=True)
# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd,
metrics=['accuracy'])
if start_epoch > 0:
weight_file = os.path.join(OUTPUT_DIR, os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
model.load_weights(weight_file)
# captures output of softmax so we can decode the output during visualization
test_func = K.function([input_data], [y_pred])
viz_cb = VizCallback(run_name, test_func, img_gen.next_val())
model.fit_generator(generator=img_gen.next_train(),
steps_per_epoch=(words_per_epoch - val_words) // minibatch_size,
epochs=stop_epoch,
validation_data=img_gen.next_val(),
validation_steps=val_words // minibatch_size,
callbacks=[tensorboard,viz_cb, img_gen],
initial_epoch=start_epoch)
Any help would be much appriciated. Thank you!
P.S. I am using Tensorflow 1.9.0 and Python 3.6.8
UPDATE
now it is just a matter of passing the variable performance_summaries from the VizCallbak class towards the metrics in the train function. Any help here?

You could modify show_edit_distance to add the summaries every time this function is being called:
def show_edit_distance(self, num, epoch):
...
summary = tf.Summary()
summary.value.add(tag='mean_ed', simple_value=mean_ed)
summ_writer.add_summary(summary, epoch)
summary = tf.Summary()
summary.value.add(tag='mean_norm_ed', simple_value=mean_norm_ed)
summ_writer.add_summary(summary, epoch)
...
Note that you will need an extra argument epoch:
def on_epoch_end(self, epoch, logs={}):
...
self.show_edit_distance(256, epoch)
...
The Tensorboard callback should automatically pick up these summaries, as they're being added to the GraphKeys.SUMMARIES collection.
NOTE: Unfortunately, I couldn't test the solution. Please let me know if there is something I am missing.

Applying Normalization to Inputs in Tensorflow

I have created a custom class to be an ML model, and it is working fine, but I would like to normalize the inputs as they have a wide range of values (e.g. 0, 20000, 500, 10, 8). Currently, as a way of normalizing the inputs, I'm applying lambda x: np.log(x + 1) to each input (the +1 is so it doesn't error out when 0 is passed in). Would a normalization layer be better than my current approach? If so, how would I go about implementing it? My code for the model is below:
class FollowModel:
def __init__(self, input_shape, output_shape, hidden_layers, input_labels, learning_rate=0.001):
tf.reset_default_graph()
assert len(input_labels) == input_shape[1], 'Incorrect number of input labels!'
# Placeholders for input and output data
self.input_labels = input_labels
self.input_shape = input_shape
self.output_shape = output_shape
self.X = tf.placeholder(shape=input_shape, dtype=tf.float64, name='X')
self.y = tf.placeholder(shape=output_shape, dtype=tf.float64, name='y')
self.hidden_layers = hidden_layers
self.learning_rate = learning_rate
# Variables for two group of weights between the three layers of the network
self.W1 = tf.Variable(np.random.rand(input_shape[1], hidden_layers), dtype=tf.float64)
self.W2 = tf.Variable(np.random.rand(hidden_layers, output_shape[1]), dtype=tf.float64)
# Create the neural net graph
self.A1 = tf.sigmoid(tf.matmul(self.X, self.W1))
self.y_est = tf.sigmoid(tf.matmul(self.A1, self.W2))
# Define a loss function
self.deltas = tf.square(self.y_est - self.y) # want this to be 0
self.loss = tf.reduce_sum(self.deltas)
# Define a train operation to minimize the loss
self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.loss)
#initialize
self.model_init = tf.global_variables_initializer()
self.trained = False
def train(self, Xtrain, ytrain, Xtest, ytest, training_steps, batch_size, print_progress=True):
#intiialize session
self.trained = True
self.training_steps = training_steps
self.batch_size = batch_size
self.sess = tf.Session()
self.sess.run(self.model_init)
self.losses = []
self.accs = []
self.testing_accuracies = []
for i in range(training_steps*batch_size):
self.sess.run(self.optimizer, feed_dict={self.X: Xtrain, self.y: ytrain})
local_loss = self.sess.run(self.loss, feed_dict={self.X: Xtrain.values, self.y: ytrain.values})
self.losses.append(local_loss)
self.weights1 = self.sess.run(self.W1)
self.weights2 = self.sess.run(self.W2)
y_est_np = self.sess.run(self.y_est, feed_dict={self.X: Xtrain.values, self.y: ytrain.values})
correct = [estimate.argmax(axis=0) == target.argmax(axis=0)
for estimate, target in zip(y_est_np, ytrain.values)]
acc = 100 * sum(correct) / len(correct)
self.accs.append(acc)
if i % batch_size == 0:
batch_num = i / batch_size
if batch_num % 5 == 0:
self.testing_accuracies.append(self.test_accuracy(Xtest, ytest, False, True))
temp_table = pd.concat([Xtrain, ytrain], axis=1).sample(frac=1)
column_names = list(temp_table.columns.values)
X_columns, y_columns = column_names[0:len(column_names) - 2], column_names[len(column_names) - 2:]
Xtrain = temp_table[X_columns]
ytrain = temp_table[y_columns]
if print_progress: print('Step: %d, Accuracy: %.2f, Loss: %.2f' % (int(i/batch_size), acc, local_loss))
if print_progress: print("Training complete!\nloss: {}, hidden nodes: {}, steps: {}, epoch size: {}, total steps: {}".format(int(self.losses[-1]*100)/100, self.hidden_layers, training_steps, batch_size, training_steps*batch_size))
self.follow_accuracy = acc
return acc
def test_accuracy(self, Xtest, ytest, print_progress=True, return_accuracy=False):
if self.trained:
X = tf.placeholder(shape=Xtest.shape, dtype=tf.float64, name='X')
y = tf.placeholder(shape=ytest.shape, dtype=tf.float64, name='y')
W1 = tf.Variable(self.weights1)
W2 = tf.Variable(self.weights2)
A1 = tf.sigmoid(tf.matmul(X, W1))
y_est = tf.sigmoid(tf.matmul(A1, W2))
# Calculate the predicted outputs
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
y_est_np = sess.run(y_est, feed_dict={X: Xtest, y: ytest})
correctly_followed = 0
incorrectly_followed = 0
missed_follows = 0
correctly_skipped = 0
for estimate, actual in zip(y_est_np, ytest.values):
est = estimate.argmax(axis=0)
# print(estimate)
actual = actual.argmax(axis=0)
if est == 1 and actual == 0: incorrectly_followed += 1
elif est == 1 and actual == 1: correctly_followed += 1
elif est == 0 and actual == 1: missed_follows += 1
else: correctly_skipped += 1
# correct = [estimate.argmax(axis=0) == target.argmax(axis=0) for estimate, target in zip(y_est_np, ytest.values)]
total_followed = incorrectly_followed + correctly_followed
total_correct = correctly_followed + correctly_skipped
total_incorrect = incorrectly_followed + missed_follows
try: total_accuracy = int(total_correct * 10000 / (total_correct + total_incorrect)) / 100
except: total_accuracy = 0
total_skipped = correctly_skipped + missed_follows
try: follow_accuracy = int(correctly_followed * 10000 / total_followed) / 100
except: follow_accuracy = 0
try: skip_accuracy = int(correctly_skipped * 10000 / total_skipped) / 100
except: skip_accuracy = 0
if print_progress: print('Correctly followed {} / {} ({}%), correctly skipped {} / {} ({}%)'.format(
correctly_followed, total_followed, follow_accuracy, correctly_skipped, total_skipped, skip_accuracy))
self.follow_accuracy = follow_accuracy
if return_accuracy:
return total_accuracy
else:
print('The model is not trained!')
def make_prediction_on_normal_data(self, input_list):
assert len(input_list) == len(self.input_labels), 'Incorrect number of inputs (had {} should have {})'.format(len(input_list), len(self.input_labels))
# from ProcessData import normalize_list
# normalize_list(input_list)
input_array = np.array([input_list])
X = tf.placeholder(shape=(1, len(input_list)), dtype=tf.float64, name='X')
y = tf.placeholder(shape=(1, 2), dtype=tf.float64, name='y')
W1 = tf.Variable(self.weights1)
W2 = tf.Variable(self.weights2)
A1 = tf.sigmoid(tf.matmul(X, W1))
y_est = tf.sigmoid(tf.matmul(A1, W2))
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
y_est_np = sess.run(y_est, feed_dict={X: input_array, y: self.create_blank_outputs()})
predicted_value = y_est_np[0].argmax(axis=0)
return predicted_value
def make_prediction_on_abnormal_data(self, input_list):
from ProcessData import normalize_list
normalize_list(input_list)
return self.make_prediction_on_normal_data(input_list)
def create_blank_outputs(self):
blank_outputs = np.zeros(shape=(1,2), dtype=np.int)
for i in range(len(blank_outputs[0])):
blank_outputs[0][i] = float(blank_outputs[0][i])
return blank_outputs

I don't see see why you want to create a layer that does that. The common practice of preprocessing your inputs is as you are currently doing.
Using the log operator is quite common for skewed data, but there are other preprocessing solutions such as sklearn's MinMaxScaler and StandardScaler
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
Those are just examples of two other ways to scale your data.
There is such a thing called BatchNorm but it is not recommended as the first layer of the network as distribution of the data is fixed and doesn’t vary during training.

TensorFlow tf.train.Saver() not working on tf.contrib.layers.fully_connected()

So I wrote this generalised TensorFlow code and want to save and restore models. But apparently the error is that there is no variables to save. I did everything as given in this official example. Ignore the __init__ method except the last line, since it only takes relevant parameters to train the model with, also there is no Syntax Errors. The error it produces is given below the code.
class Neural_Network(object):
def __init__(self, numberOfLayers, nodes, activations, learningRate,
optimiser = 'GradientDescent', regularizer = None,
dropout = 0.5, initializer = tf.contrib.layers.xavier_initializer()):
self.numberOfLayers = numberOfLayers
self.nodes = nodes
self.activations = activations
self.learningRate = learningRate
self.regularizer = regularizer
self.dropout = dropout
self.initializer = initializer
if(optimiser == 'GradientDescent'):
self.optimiser = tf.train.GradientDescentOptimizer(self.learningRate)
elif(optimiser == 'AdamOptimiser'):
self.optimiser = tf.train.AdamOptimizer(self.learningRate)
self.saver = tf.train.Saver()
def create_Neural_Net(self, numberOfFeatures):
self.numberOfFeatures = numberOfFeatures
self.X = tf.placeholder(dtype = tf.float32, shape = (None, self.numberOfFeatures), name = 'Input_Dataset')
#self.output = None
for i in range(0, self.numberOfLayers):
if(i == 0):
layer = tf.contrib.layers.fully_connected(self.X, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
elif(i == self.numberOfLayers-1):
self.output = tf.contrib.layers.fully_connected(layer, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
else:
layer = tf.contrib.layers.fully_connected(layer, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
def train_Neural_Net(self, dataset, labels, epochs):
entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = self.output, labels = labels, name = 'cross_entropy')
loss = tf.reduce_mean(entropy, name = 'loss')
hypothesis = tf.nn.softmax(self.output)
correct_preds = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
train_op = self.optimiser.minimize(loss)
self.loss=[]
self.accuracy = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(0, epochs):
_, l, acc = sess.run([train_op, loss, accuracy], feed_dict = {self.X:dataset})
print('Loss in epoch ' + str(i) + ' is: ' + str(l))
self.loss.append(l)
self.accuracy.append(acc)
self.saver.save(sess, './try.ckpt')
return self.loss, self.accuracy
And ran this code as:
nn = Neural_Network(2, [20,3], [tf.nn.relu, tf.nn.relu], 0.001, optimiser = 'AdamOptimiser')
nn.create_Neural_Net(4)
nn.train_Neural_Net(dataset, labels, 1000)
The error it gives is:
ValueError: No variables to save
So what is wrong in this code? And how can I fix it?

No learning occuring in reinforcement learning agent

I am trying to implement the deep deterministic gradient method in tensorflow and keras, however, I seem to be stuck. There seems to be no learning occurring, the actions taken by the model do not seem to change at all and the gradient applied to the actor-network is very small as well (on the order of magnitude of e^-5). I have used another implementation as a reference and this one runs very well with the exact same hyperparameters and network architectures (except that it is implemented with tflearn and includes batch normalization layers), making me believe that there is a mistake somewhere in my code. Maybe someone here can spot it.
Thank you for your time!
Edit: I think the reason for the bad performance is that the critic-network's gradient with respect to the action is vanishing. I can't, however, figure out why. Maybe my use of the concatenate layer is wrong?
class AIInterface(object):
def __init__(self, sim):
self.sim = sim
self.pedal_pos = 0
self.steering_pos = 0
self.sess = tf.Session()
self.learning_rate = 10e-4
self.BATCH_SIZE = 64
self.epsilon = .75 #amount of random exploration
self.epsilon_decay = .997
self.gamma = .99 #reward discount factor
self.tau = .00125 #target update factor
self.rewards = deque(maxlen=100000)
self.memory = deque(maxlen=100000)
# Actor stuff
self.actor_model, self.actor_var_in = self.initialize_actor()
self.target_actor, _ = self.initialize_actor()
self.actor_critic_grad = tf.placeholder(tf.float32, [None, 1])
self.actor_model_weights = self.actor_model.trainable_weights
with tf.name_scope("actor_gradients"):
self.actor_grads = tf.gradients(self.actor_model.output, self.actor_model_weights, -self.actor_critic_grad)
self.normalized_actor_grads = list(map(lambda x: tf.div(x, self.BATCH_SIZE), self.actor_grads))
grads = zip(self.normalized_actor_grads, self.actor_model_weights)
self.optimize = tf.train.AdamOptimizer(self.learning_rate).apply_gradients(grads)
# Critic stuff
self.critic_model, self.critic_var_in, self.critic_action_in = self.initialize_critic()
self.target_critic, _, _ = self.initialize_critic()
with tf.name_scope("CriticGrads"):
self.critic_grads = tf.gradients(self.critic_model.output, self.critic_action_in)
self.sess.run(tf.global_variables_initializer())
self.target_actor.set_weights(self.actor_model.get_weights())
self.target_critic.set_weights(self.critic_model.get_weights())
self.global_step = 0
def initialize_actor(self):
state_variable_input = Input(shape=(3, ))
init = TruncatedNormal(mean=0.0, stddev=0.02)
dense = Dense(128, activation="relu", kernel_initializer=init)(state_variable_input)
dense2 = Dense(128, activation="relu", kernel_initializer=init)(dense)
output = Dense(1, activation="tanh", kernel_initializer=RandomUniform(-3e-3, 3e-3))(dense2)
model = Model(inputs=state_variable_input,
outputs=output)
model.compile(optimizer="adam", loss="mse")
return model, state_variable_input
def initialize_critic(self):
state_variable_input = Input(shape=(3, ))
action_input = Input(shape=(1, ))
init = TruncatedNormal(mean=0.0, stddev=0.02)
dense_state = Dense(128, activation="relu", kernel_initializer=init)(state_variable_input)
merge = Concatenate()([dense_state, action_input])
dense2 = Dense(128, activation="relu", kernel_initializer=init)(merge)
output = Dense(1, activation="linear", kernel_initializer=RandomUniform(-3e-3, 3e-3))(dense2)
model = Model(inputs=[state_variable_input, action_input],
outputs=output)
model.compile(optimizer="adam", loss="mse")
return model, state_variable_input, action_input
def train(self):
if len(self.memory) < self.BATCH_SIZE:
return
samples = random.sample(self.memory, self.BATCH_SIZE)
samples = [np.concatenate(x) for x in zip(*samples)]
self.train_critic(samples)
self.train_actor(samples)
self.global_step += 1
def train_critic(self, samples):
cur_state_var, action, reward, new_state_var = samples
predicted_action = self.target_actor.predict([new_state_var])
future_reward = self.target_critic.predict([new_state_var, predicted_action])
Q = reward + self.gamma*future_reward
self.critic_model.train_on_batch([cur_state_var, action], Q)
def train_actor(self, samples):
cur_state_var, action, reward, new_state_var = samples
predicted_action = self.actor_model.predict([cur_state_var])
grads = self.sess.run([self.critic_grads], feed_dict={
self.critic_var_in: cur_state_var,
self.critic_action_in: predicted_action})
self.sess.run(self.optimize, feed_dict={
self.actor_var_in: cur_state_var,
self.actor_critic_grad: grads[0]
})
def update_actor_target(self):
actor_model_weights = self.actor_model.get_weights()
actor_target_weights = self.target_actor.get_weights()
for i in range(len(actor_target_weights)):
actor_target_weights[i] = self.tau * actor_model_weights[i] + (1-self.tau)*actor_target_weights[i]
self.target_actor.set_weights(actor_target_weights)
def update_critic_target(self):
critic_model_weights = self.critic_model.get_weights()
critic_target_weights = self.target_critic.get_weights()
for i in range(len(critic_target_weights)):
critic_target_weights[i] = self.tau * critic_model_weights[i] + (1-self.tau)*critic_target_weights[i]
self.target_critic.set_weights(critic_target_weights)
def update_model(self):
self.update_actor_target()
self.update_critic_target()
def act(self, cur_state_var, noise=None, env=None):
if env:
if np.random.random() < self.epsilon:
return env.action_space.sample()
else:
sh = cur_state_var.shape
action = self.actor_model.predict([cur_state_var], batch_size=1)[0]
return action
elif not noise:
if np.random.random() < self.epsilon:
return self.sample_action_space()
return self.actor_model.predict([cur_state_var], batch_size=1)[0]
else:
no = noise()
pred = self.actor_model.predict([cur_state_var], batch_size=1)[0]
return pred + no
def sample_action_space(self):
return np.array([random.uniform(-0.5, 0.5), random.uniform(-1.0, 1.0)]).reshape(2, )
def remember(self, cur_state_var, action, reward, new_state_var):
self.memory.append([cur_state_var, action, reward, new_state_var])

Is this a correct reimplementation of Pytorch Seq2Seq model?

I made a code that sort of change the tutorial script of seq2seq provided by Pytorch. Here’s the model:
class Seq2Seq(nn.Module):
def __init__(self, encoder, batch_size, vocab_size, input_size, output_size, hidden_dim, embedding_dim, n_layers=2, dropout_p=0.5):
super(Seq2Seq, self).__init__()
self.hidden_dim = hidden_dim
self.batch_size = batch_size
self.input_length = input_size
self.output_length = output_size
self.vocab_size = vocab_size
self.encoder = encoder
self.dropout = nn.Dropout(dropout_p)
self.selu = nn.SELU()
self.decoder_embeddings = nn.Embedding(vocab_size, hidden_dim)
self.decoder_gru = nn.GRU(hidden_dim, hidden_dim)
self.out = nn.Linear(hidden_dim, vocab_size)
self.softmax = nn.LogSoftmax()
def decode(self, SOS_token, encoder_hidden, target_output, teacher_forcing_ratio=0.8):
decoder_output_full = autograd.Variable(torch.zeros(self.output_length, self.batch_size, self.vocab_size))
decoder_output_full = decoder_output_full.cuda() if use_cuda else decoder_output_full
target = target_output.permute(1,0)
use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
for idx in range(self.output_length):
if idx == 0:
decoder_input = SOS_token
decoder_hidden = encoder_hidden.unsqueeze(0)
output = self.decoder_embeddings(decoder_input).view(1, self.batch_size, -1)
output = self.dropout(output)
output = self.selu(output)
if use_teacher_forcing:
decoder_output, decoder_hidden = self.decoder_gru(output, decoder_hidden)
temp = 1
out = self.out(decoder_output[0])
out = out + sample_gumbel(out.shape)
decoder_output = F.softmax(out / temp, dim=1)
# decoder_output = (self.decoder_embeddings.weight * decoder_output.unsqueeze(1)).sum(0).view(1, 1, -1)
decoder_output_full[idx, :, :] = decoder_output
decoder_input = target[idx-1] # Teacher forcing
else:
decoder_output, decoder_hidden = self.decoder_gru(output, decoder_hidden)
temp = 1
out = self.out(decoder_output[0])
out = out + sample_gumbel(out.shape)
decoder_output = F.softmax(out / temp, dim=1)
# decoder_output = (self.decoder_embeddings.weight * decoder_output.unsqueeze(1)).sum(0).view(1, 1, -1)
topv, topi = decoder_output.data.topk(1)
# print topi
ni = topi
# decoder_input_v = autograd.Variable(torch.LongTensor([[ni]]))
decoder_input = autograd.Variable(ni)
# decoder_input = decoder_input.cuda() if use_cuda else decoder_input
# print decoder_input
decoder_output_full[idx, :, :] = decoder_output
decoder_output_full = decoder_output_full.permute(1,0,2)
# gen_output = self.softmax(self.out(decoder_output_full))
return decoder_output_full
def forward(self, input, target_output, teacher_forcing_ratio=0.8):
encoder_feat, _ = self.encoder(input)
SOS_token = np.zeros((self.batch_size,1), dtype=np.int32)
SOS_token = torch.LongTensor(SOS_token.tolist())
SOS_token = autograd.Variable(SOS_token)
if use_cuda:
SOS_token = SOS_token.cuda(gpu)
gen_output = self.decode(SOS_token, encoder_feat, target_output, teacher_forcing_ratio)
return gen_output
def initHidden(self):
result = autograd.Variable(torch.zeros(1, self.batch_size, self.hidden_dim))
if use_cuda:
return result.cuda()
else:
return result
The way I calculate the NLL loss is by creating one whole sequence of output first and compare it with the target output. Here’s the loss function:
class batchNLLLoss(nn.Module):
def __init__(self):
super(batchNLLLoss, self).__init__()
def forward(self, synt, target, claim_length=20):
loss_fn = nn.NLLLoss()
loss = 0
for i in range(synt.shape[0]):
for j in range(claim_length):
loss += loss_fn(synt[i][j].unsqueeze(0), target[i][j])
return loss
The current problem is the loss value is really small and seems like the network learns nothing (the output is the same word repeated again and again). Any thought about this? Thanks in advance!

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

PyTorch Optimizer Doesn’t update Weights - python

Related

How to visualize mean edit distance in Tensorboard using Keras callback?

Applying Normalization to Inputs in Tensorflow

TensorFlow tf.train.Saver() not working on tf.contrib.layers.fully_connected()

No learning occuring in reinforcement learning agent

Is this a correct reimplementation of Pytorch Seq2Seq model?

Categories

Resources