tflearn loss is always 0.0 while training reinforcement learning agent - python

I tried to train a reinforcement learning agent with gym and tflearn using this code:
from tflearn import *
import gym
import numpy as np
env = gym.make('CartPole-v0')
x = []
y = []
max_reward = 0
for i in range(1000):
env.reset()
while True:
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
if done:
break
if reward >= max_reward:
x.append(observation)
y.append(np.array([action]))
x = np.asarray(x)
y = np.asarray(y)
net = input_data((None,4))
net = fully_connected(net,8,'softmax')
net = fully_connected(net,16,'softmax')
net = fully_connected(net,32,'softmax')
net = fully_connected(net,64,'softmax')
net = fully_connected(net,128,'softmax')
net = fully_connected(net,64,'softmax')
net = fully_connected(net,32,'softmax')
net = fully_connected(net,16,'softmax')
net = fully_connected(net,8,'softmax')
net = fully_connected(net,4,'softmax')
net = fully_connected(net,2,'softmax')
net = fully_connected(net,1)
net = regression(net,optimizer='adam',learning_rate=0.01,loss='categorical_crossentropy',batch_size=1)
model = DNN(net)
model.fit(x,y,10)
model.save('saved/model.tflearn')
The Problem is, when the model is training the loss is always 0.0.
Can someone help me with this Issue?

Not sure what is your objective but categorical_crossentropy is a loss function used for multiclass classification, but the output of your network is just one unit fully_connected(net,1) with a linear activation, that is why you are getting loss 0.
Try with mean_square or even binary_crossentropy and you will see different values of loss.
I would use a sigmoid activation on the last layer, and relus on the rest.

Related

Fully Connected Neural Network not predicting correctly

I am a newbie in ML and DL but i decide to try out something but i found out that my network is not predicting correctly.
I have a fully connected neural network with just one dense (linear) layer and i used SGD as the optimizer and it predicted 9.9 instead of 10 but when i use Adam it predicted 10. expected result is 10, i'm confused can someone explain to me why is this so?
!pip install -Uqq tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm as tqdm
My training data as a sample
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)
My model or network for forward pass and neural net
class SimpleNeuralNetwork(nn.Module) :
def __init__(self, num_input, num_output):
super(SimpleNeuralNetwork, self).__init__()
self.fc = nn.Linear(num_input, num_output)
def forward(self, x):
x = self.fc(x)
return x
In features and batch
in_samples, in_features = X.shape
Defining and Initialising my loss function
criterion = nn.MSELoss()
Parameters for the training process
learning_rate = 0.01
ePoch = 1000
Initialising my model
sNN = SimpleNeuralNetwork(in_features, in_features)
Initialising my optimizer
optimiser = optim.SGD(sNN.parameters(), lr=learning_rate)
Training my Network
for i in tqdm(list(range(ePoch))):
# prediction - forward pass in the model
y_pred = sNN(X)
# loss - check how well or how far our model did with the prediction
loss = criterion(Y, y_pred)
# gradient - do a backward propagation (backward pass)
loss.backward()
# update weight - readjust the weight using our learning rate as a proximity
optimiser.step()
# zero gradient - reinitialize our memory to zero so that the neural network will not cram
optimiser.zero_grad()
# if i % 10 == 0:
# [w, b] = sNN.parameters()
# print(f'epoch: {i + 1}, weight: {w[0][0].item()}, bias: {b[0].item()}, pred: {y_pred}')
Actual prediction
predict = sNN(torch.tensor([5], dtype=torch.float32))
print(f'prediction for 5: {predict[0].item()}')

Why does this RNN in tensorflow not learn?

I am trying to train an RNN without using the RNN API in tensorflow (2) in Python 3.7, so the code is very basic. Something is going really wrong, but I'm not sure what it is.
As a reference, I am using a dataset from this tensorflow tutorial so I know what the error should roughly converge to. My RNN code is the following. What it is trying to do is use the previous 20 timesteps to predict the value of a series at the 21st timestep. I am training in batches of size 256.
While there is a decrease in loss over time, the ceiling is approximately 10x what it is if I follow the tutorial approach. Could it be some problem with the backpropagation through time?
state_size = 20 #dimensionality of the network
BATCH_SIZE = 256
#define recurrent weights and biases. W has 1 more dimension that the state
#dimension as also processes the inputs
W = tf.Variable(np.random.rand(state_size+1, state_size), dtype=tf.float32)
b = tf.Variable(np.zeros((1,state_size)), dtype=tf.float32)
#weights and biases for the output
W2 = tf.Variable(np.random.rand(state_size, 1),dtype=tf.float32)
b2 = tf.Variable(np.zeros((1,1)), dtype=tf.float32)
init_state = tf.Variable(np.random.normal(size=[BATCH_SIZE,state_size]),dtype='float32')
optimizer = tf.keras.optimizers.Adam(1e-3)
losses = []
for epoch in range(20):
with tf.GradientTape() as tape:
loss = 0
for batch_idx in range(200):
current_state = init_state
batchx = x_train_uni[batch_idx*BATCH_SIZE:(batch_idx+1)*BATCH_SIZE].swapaxes(0,1)
batchy = y_train_uni[batch_idx*BATCH_SIZE:(batch_idx+1)*BATCH_SIZE]
#forward pass through the timesteps
for x in batchx:
inst = tf.concat([current_state,x],1) #concatenate state and inputs for that timepoint
current_state = tf.tanh(tf.matmul(inst, W) + b) #
#predict using the hidden state after the full forward pass
pred = tf.matmul(current_state,W2) + b2
loss += tf.reduce_mean(tf.abs(batchy-pred))
#get gradients with respect to parameters
gradients = tape.gradient(loss, [W,b,W2,b2])
#apply gradients
optimizer.apply_gradients(zip(gradients, [W,b,W2,b2]))
losses.append(loss)
print(loss)

Policy Gradient algorithm gets worse over time

I tried to write a Policy Gradient algorithm for the Video game Pong.
Here's the Code:
import tensorflow as tf
import gym
import numpy as np
import matplotlib.pyplot as plt
from os import getcwd
num_episodes = 1000
learning_rate = 0.01
rewards = []
env_name = 'Pong-v0'
env = gym.make(env_name)
x = tf.placeholder(tf.float32,(None,)+env.observation_space.shape)
y = tf.placeholder(tf.float32,(None,env.action_space.n))
def net(x):
layer1 = tf.layers.flatten(x)
layer2 = tf.layers.dense(layer1,200,activation=tf.nn.softmax)
layer3 = tf.layers.dense(layer2,env.action_space.n,activation=tf.nn.softmax)
return layer3
logits = net(x)
loss = tf.losses.sigmoid_cross_entropy(y,logits)
train = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
saver = tf.train.Saver()
init = tf.global_variables_initializer()
sess = tf.Session()
with tf.device('/device:GPU:0'):
sess.run(init)
for episode in range(num_episodes):
print('episode:',episode+1)
total_reward = 0
losses = []
training_data = []
observation = env.reset()
while True:
if max(0.1, (episode+1)/num_episodes) > np.random.uniform():
probs = sess.run(logits,feed_dict={x:[observation]})[0]
action = np.argmax(probs)
else:
action = env.action_space.sample()
onehot = np.zeros(env.action_space.n)
onehot[action] = 1
training_data.append([observation,onehot])
observation, reward, done, _ = env.step(action)
total_reward += reward
if done:
break
if total_reward >= 0:
learning_rate = 0.01
else:
learning_rate = -0.01
for sample in training_data:
l,_ = sess.run([loss,train],feed_dict={x:[sample[0]], y:[sample[1]]})
losses.append(l)
print('loss:',l)
print('average loss:',sum(losses)/len(losses))
saver.save(sess,getcwd()+'/model.ckpt')
rewards.append(total_reward)
plt.plot(range(episode+1),rewards)
plt.ylabel('total reward')
plt.xlabel('episodes')
plt.savefig(getcwd()+'/reward_plot.png')
But after I trained my Network, the plot which the script made seemed to suggest that the Network got worse towards the end. Also during the last Episode the loss was the same for all Training examples (~0.68) and when I try to test the Network, the paddle of the Player just sits there motionless. Is there any way I can improve my Code?
I would ask you to familiarize yourself with how to code neural networks using tensorflow because there is where the problem lies. You provide activation=tf.nn.softmax in both the nn layers which should be a terminal layer (since you are trying to find the maximum action probability). You can change it to tf.nn.relu in the second layer. There is a bigger problem with the learning_rate:
if total_reward >= 0:
learning_rate = 0.01
else:
learning_rate = -0.01
Negative learning rate makes absolutely no sense. You want the learning rate to be positive (you can use a constant 0.01 for now).
Also, another comment, you have not mentioned the observation_space shape but I am going to assume it is a 2D matrix. Then you can reshape it before inputting it into x. So you would not need to unnecessarily use tf.flatten.

Learning parameters of MultivariateNormalDiag in tensorflow

I have been trying to code up a Variational Autoencoder (VAE) in tensorflow. I was able to implement the version with has a Gaussian encoder network and a Bernoulli decoder as in the paper Auto-Encoding Variational Bayes.
However, I would like to work with real valued data and I have not been able to get a VAE with a Gaussian decoder to work. I have narrowed this down to the problem with my network: my network does not seem to learn the parameters of the diagonal multivariate Gaussian. Here is the code for very simple test case. Where my input data is just drawn from a normal(0,1). The network needs to learn is the mean and variance of my data. I would expect the mean to converge to 0 and variance to converge to 1. But it does not:
import tensorflow as tf
import numpy as np
tf.reset_default_graph()
input_dim = 1
hidden_dim = 10
learning_rate = 0.001
num_batches = 1000
# Network
x = tf.placeholder(tf.float32, (None, input_dim))
with tf.variable_scope('Decoder'):
h1 = tf.layers.dense(x, hidden_dim, activation=tf.nn.softplus, name='h1')
mu = tf.layers.dense(h1, input_dim, activation=tf.nn.softplus, name='mu')
diag_stdev = tf.layers.dense(h1, input_dim, activation=tf.nn.softplus, name='diag_stdev')
# Loss: -log(p(x))
with tf.variable_scope('Loss'):
dist = tf.contrib.distributions.MultivariateNormalDiag(loc=mu, scale_diag=diag_stdev)
loss = - tf.reduce_mean(tf.log(1e-10 + dist.prob(x)))
# Optimizer
train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
summary_writer = tf.summary.FileWriter('./log_dir', tf.get_default_graph())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
mu_plot = np.zeros(num_batches,)
for i in range(num_batches): # degenerate case batch_size of 1
input_ = np.random.multivariate_normal(mean=[0], cov=np.diag([1]), size=(1))
loss_ , mu_ , diag_stdev_ , _ = sess.run([loss, mu, diag_stdev, train_step],feed_dict={x: input_})
print("-p(x): {}, mu: {}, diag_stdev: {}".format(loss_, mu_,diag_stdev_))

Grouping plots in Tensorboard

I would like to organize my plots in Tensorboard into groups.
For example, say I'm training several networks at the same time, they each have their own accuracy and loss wrt step, and it would be nice to group accuracy and loss by its network. Or maybe I want to inspect the activation, average weight, average biases of each layer, grouped by their layer, to better understand how they change during training.
How can I do that?
Try something like this:
import tensorflow as tf
# define first network
model_1 = tf.layers.dense(input1 , 100)
...
loss_1 = ...
summaries_1 = tf.summary.merge([tf.summary.scalar("loss_1", loss_1)])
train_op_1 = ...
# define second network
model_2 = tf.layers.dense(input2 , 100)
...
loss_2 = ...
summaries_2 = tf.summary.merge([tf.summary.scalar("loss_2", loss_2)])
train_op_2 = ...
#define file writer
fw = tf.summary.FileWriter(logdir='/tmp/my_logs')
sess = tf.Session()
# train your networks
for i in range(NUM_ITR):
# train first net
_, summary_str = sess.run([train_op_1, summaries_1])
fw.add_summary(summary_str, global_step=i)
# train second net
_, summary_str = sess.run([train_op_2, summaries_2])
fw.add_summary(summary_str, global_step=i)

Categories

Resources