No learning occuring in reinforcement learning agent

No learning occuring in reinforcement learning agent - python

I am trying to implement the deep deterministic gradient method in tensorflow and keras, however, I seem to be stuck. There seems to be no learning occurring, the actions taken by the model do not seem to change at all and the gradient applied to the actor-network is very small as well (on the order of magnitude of e^-5). I have used another implementation as a reference and this one runs very well with the exact same hyperparameters and network architectures (except that it is implemented with tflearn and includes batch normalization layers), making me believe that there is a mistake somewhere in my code. Maybe someone here can spot it.
Thank you for your time!
Edit: I think the reason for the bad performance is that the critic-network's gradient with respect to the action is vanishing. I can't, however, figure out why. Maybe my use of the concatenate layer is wrong?
class AIInterface(object):
def __init__(self, sim):
self.sim = sim
self.pedal_pos = 0
self.steering_pos = 0
self.sess = tf.Session()
self.learning_rate = 10e-4
self.BATCH_SIZE = 64
self.epsilon = .75 #amount of random exploration
self.epsilon_decay = .997
self.gamma = .99 #reward discount factor
self.tau = .00125 #target update factor
self.rewards = deque(maxlen=100000)
self.memory = deque(maxlen=100000)
# Actor stuff
self.actor_model, self.actor_var_in = self.initialize_actor()
self.target_actor, _ = self.initialize_actor()
self.actor_critic_grad = tf.placeholder(tf.float32, [None, 1])
self.actor_model_weights = self.actor_model.trainable_weights
with tf.name_scope("actor_gradients"):
self.actor_grads = tf.gradients(self.actor_model.output, self.actor_model_weights, -self.actor_critic_grad)
self.normalized_actor_grads = list(map(lambda x: tf.div(x, self.BATCH_SIZE), self.actor_grads))
grads = zip(self.normalized_actor_grads, self.actor_model_weights)
self.optimize = tf.train.AdamOptimizer(self.learning_rate).apply_gradients(grads)
# Critic stuff
self.critic_model, self.critic_var_in, self.critic_action_in = self.initialize_critic()
self.target_critic, _, _ = self.initialize_critic()
with tf.name_scope("CriticGrads"):
self.critic_grads = tf.gradients(self.critic_model.output, self.critic_action_in)
self.sess.run(tf.global_variables_initializer())
self.target_actor.set_weights(self.actor_model.get_weights())
self.target_critic.set_weights(self.critic_model.get_weights())
self.global_step = 0
def initialize_actor(self):
state_variable_input = Input(shape=(3, ))
init = TruncatedNormal(mean=0.0, stddev=0.02)
dense = Dense(128, activation="relu", kernel_initializer=init)(state_variable_input)
dense2 = Dense(128, activation="relu", kernel_initializer=init)(dense)
output = Dense(1, activation="tanh", kernel_initializer=RandomUniform(-3e-3, 3e-3))(dense2)
model = Model(inputs=state_variable_input,
outputs=output)
model.compile(optimizer="adam", loss="mse")
return model, state_variable_input
def initialize_critic(self):
state_variable_input = Input(shape=(3, ))
action_input = Input(shape=(1, ))
init = TruncatedNormal(mean=0.0, stddev=0.02)
dense_state = Dense(128, activation="relu", kernel_initializer=init)(state_variable_input)
merge = Concatenate()([dense_state, action_input])
dense2 = Dense(128, activation="relu", kernel_initializer=init)(merge)
output = Dense(1, activation="linear", kernel_initializer=RandomUniform(-3e-3, 3e-3))(dense2)
model = Model(inputs=[state_variable_input, action_input],
outputs=output)
model.compile(optimizer="adam", loss="mse")
return model, state_variable_input, action_input
def train(self):
if len(self.memory) < self.BATCH_SIZE:
return
samples = random.sample(self.memory, self.BATCH_SIZE)
samples = [np.concatenate(x) for x in zip(*samples)]
self.train_critic(samples)
self.train_actor(samples)
self.global_step += 1
def train_critic(self, samples):
cur_state_var, action, reward, new_state_var = samples
predicted_action = self.target_actor.predict([new_state_var])
future_reward = self.target_critic.predict([new_state_var, predicted_action])
Q = reward + self.gamma*future_reward
self.critic_model.train_on_batch([cur_state_var, action], Q)
def train_actor(self, samples):
cur_state_var, action, reward, new_state_var = samples
predicted_action = self.actor_model.predict([cur_state_var])
grads = self.sess.run([self.critic_grads], feed_dict={
self.critic_var_in: cur_state_var,
self.critic_action_in: predicted_action})
self.sess.run(self.optimize, feed_dict={
self.actor_var_in: cur_state_var,
self.actor_critic_grad: grads[0]
})
def update_actor_target(self):
actor_model_weights = self.actor_model.get_weights()
actor_target_weights = self.target_actor.get_weights()
for i in range(len(actor_target_weights)):
actor_target_weights[i] = self.tau * actor_model_weights[i] + (1-self.tau)*actor_target_weights[i]
self.target_actor.set_weights(actor_target_weights)
def update_critic_target(self):
critic_model_weights = self.critic_model.get_weights()
critic_target_weights = self.target_critic.get_weights()
for i in range(len(critic_target_weights)):
critic_target_weights[i] = self.tau * critic_model_weights[i] + (1-self.tau)*critic_target_weights[i]
self.target_critic.set_weights(critic_target_weights)
def update_model(self):
self.update_actor_target()
self.update_critic_target()
def act(self, cur_state_var, noise=None, env=None):
if env:
if np.random.random() < self.epsilon:
return env.action_space.sample()
else:
sh = cur_state_var.shape
action = self.actor_model.predict([cur_state_var], batch_size=1)[0]
return action
elif not noise:
if np.random.random() < self.epsilon:
return self.sample_action_space()
return self.actor_model.predict([cur_state_var], batch_size=1)[0]
else:
no = noise()
pred = self.actor_model.predict([cur_state_var], batch_size=1)[0]
return pred + no
def sample_action_space(self):
return np.array([random.uniform(-0.5, 0.5), random.uniform(-1.0, 1.0)]).reshape(2, )
def remember(self, cur_state_var, action, reward, new_state_var):
self.memory.append([cur_state_var, action, reward, new_state_var])

Related

PyTorch Optimizer Doesn’t update Weights

i'm newbie in PyTorch. Can someone help? I am trying teach Neural Network to play in tetris, but can't understand why weights doesn't cange.
Neural Network:
class CNN(Module):
# define model elements
def __init__(self, n_channels):
super(CNN, self).__init__()
self.number_of_actions = 5
self.gamma = 0.01
self.final_epsilon = 0.0001
self.initial_epsilon = 0.1
self.number_of_iterations = 2000000
self.replay_memory_size = 10000
self.minibatch_size = 32
# input to first hidden layer
#self.hidden1 = Linear(n_channels, 50)
self.hidden1 = Conv2d(1, 1, (22, 10))
kaiming_uniform_(self.hidden1.weight, nonlinearity='relu')
#self.act1 = ReLU()
self.pool1 = MaxPool2d(2, 2)
# second hidden layer
self.hidden2 = Linear(1, 30)
kaiming_uniform_(self.hidden2.weight, nonlinearity='relu')
self.act2 = ReLU()
# fully connected layer
self.hidden3 = Linear(30, 10)
kaiming_uniform_(self.hidden3.weight, nonlinearity='relu')
self.act3 = ReLU()
# output layer
self.hidden4 = Linear(10, 1)
xavier_uniform_(self.hidden4.weight)
self.act4 = Softmax(dim=1)
# forward propagate input
def forward(self, X):
# input to first hidden layer
X = self.hidden1(X)
X = self.pool1(X)
# second hidden layer
X = self.hidden2(X)
X = self.act2(X)
# third hidden layer
X = self.hidden3(X)
X = self.act3(X)
# output layer
X = self.hidden4(X)
X = self.act4(X)
return X
Learning loop:
reward = torch.tensor([[0.]], requires_grad=True)
inputs = torch.tensor([[self.BoardMatrix]])
outputs = model(inputs)
# compute the model output
state_action_values = model(inputs)
replay_memory.append(self.BoardMatrix)
if len(replay_memory) > model.replay_memory_size:
replay_memory.pop(0)
if self.isWaitingAfterLine:
self.isWaitingAfterLine = False
self.newPiece()
else:
# learning
if (state_action_values[0][0][0][0] > 0.6):
self.action("left")
action_batch = 0.8
elif (state_action_values[0][0][0][0] > 0.2):
self.action("right")
action_batch = 0.6
elif (state_action_values[0][0][0][0] > -0.2):
self.action("down")
action_batch = 0.4
elif (state_action_values[0][0][0][0] > -0.6):
self.action("up")
action_batch = 0.2
elif (state_action_values[0][0][0][0] > -1):
self.action("space")
action_batch = 0
self.oneLineDown()
minibatch = random.sample(replay_memory, min(len(replay_memory), model.minibatch_size))
criterion: MSELoss = nn.MSELoss()
reward_batch = reward
y_batch = torch.cat(tuple(reward_batch if minibatch[i][4]
else reward_batch + model.gamma * torch.max(state_action_values[i])
for i in range(len(minibatch))))
y_batch = y_batch.detach()
q_value = torch.sum(outputs * action_batch, dim=1)
optimizer.zero_grad()
loss = criterion(q_value, y_batch[0])
# credit assignment
loss.backward()
optimizer.step()
print(model.hidden1.weight)
else:
super(Board, self).timerEvent(event)
NN, optimizer initialization:
model = CNN(boardSize)
criterion = CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-6)
I working by this tutorial "https://www.toptal.com/deep-learning/pytorch-reinforcement-learning-tutorial", but try change it for tetris.
P.S. I also find out y_batch never change but i don't know why

q-agent is really broken, can't decide between a reward of 0 and -1

I was using a dqn for something; it wasn't working. I simplified the problem so that there are 2 actions: 0 and 1. Each action corresponds to a single reward: 0 or -1. Still, my q agent is consistently confused, giving the two actions wild values in the thousands. Please, what am I doing wrong?
import numpy as np
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
class ReplayBuffer():
def __init__(self, input_dims):
self.mem_size = memory_size
self.mem_cntr = 0
self.state_memory = np.zeros((self.mem_size, *input_dims),
dtype = np.float32)
self.new_state_memory = np.zeros((self.mem_size, *input_dims),
dtype = np.float32)
self.action_memory = np.zeros(self.mem_size, dtype = np.int32)
self.reward_memory = np.zeros(self.mem_size, dtype = np.float32)
self.terminal_memory = np.zeros(self.mem_size, dtype = np.int32)
def store_transition(self, state, action, reward, state_, done):
index = self.mem_cntr % self.mem_size
self.state_memory[index] = state
self.new_state_memory[index] = state_
self.reward_memory[index] = reward
self.action_memory[index] = action
self.terminal_memory[index] = 1 - int(done)
self.mem_cntr += 1
def sample_buffer(self):
max_mem = min(self.mem_cntr, self.mem_size)
batch = np.random.choice(max_mem, batch_size, replace = False)
states = self.state_memory[batch]
states_ = self.new_state_memory[batch]
rewards = self.reward_memory[batch]
actions = self.action_memory[batch]
terminal = self.terminal_memory[batch]
return states, actions, rewards, states_, terminal
def build_dqn(n_actions, input_dims):
model = keras.Sequential([
keras.layers.InputLayer(input_shape = input_dims),
keras.layers.Dense(fc1_dims, activation = "relu"),
keras.layers.Dense(fc2_dims, activation = "relu"),
keras.layers.Dense(fc3_dims, activation = "relu"),
# keras.layers.Dense(fc4_dims, activation = "relu"),
keras.layers.Dense(n_actions, activation = None)])
model.compile(optimizer = Adam(lr = learning_rate), loss = "mean_squared_error")
return model
class Agent():
def __init__(self, n_actions, input_dims):
self.action_space = [i for i in range(n_actions)]
self.gamma = gamma
self.epsilon = epsilon_start
self.eps_dec = epsilon_dec
self.eps_min = epsilon_end
self.batch_size = batch_size
self.model_file = model_name
self.memory = ReplayBuffer(input_dims)
self.q_eval = build_dqn(n_actions, input_dims)
def store_transition(self, state, action, reward, new_state, done):
self.memory.store_transition(state, action, reward, new_state, done)
def choose_action(self, observation):
if(np.random.random() < self.epsilon):
action = np.random.choice(self.action_space)
else:
state = np.array([observation])
actions = self.q_eval.predict(state)
action = np.argmax(actions[0])
print(actions)
print(action)
return action
def learn(self):
if (self.memory.mem_cntr < self.batch_size):
return
states, actions, rewards, states_, dones = \
self.memory.sample_buffer()
q_eval = self.q_eval.predict(states)
q_next = self.q_eval.predict(states_)
q_target = np.copy(q_eval)
batch_index = np.arange(self.batch_size, dtype = np.int32)
q_target[batch_index, actions] = rewards + \
self.gamma + np.max(q_next, axis = 1)*dones
self.q_eval.train_on_batch(states, q_target)
self.epsilon = self.epsilon - self.eps_dec if self.epsilon > self.eps_min else self.eps_min
def save_model(self):
self.q_eval.save(self.model_file)
def load_model(self):
self.q_eval = load_model(self.model_file)
It's a super-standard dqn Agent, most of it copied from a tutorial. I cant't fathom where it could be going wrong.

Try changing your last layer from
keras.layers.Dense(n_actions, activation = None)])
to
keras.layers.Dense(n_actions, activation = 'sigmoid')])
You say that
I simplified the problem so that there are 2 actions: 0 and 1. Each action corresponds to a single reward: 0 or -1.
Therefore, you should use sigmoid activation function in the last layer because you are trying to solve a binary classification problem.
You can read more about it here - https://keras.io/api/layers/activations/

my deep deterministic policy gradient model is not learning anything even after 2000 iteration

i have tried different hyper parameters and number of layers and node but my model is not learning anything even after 2000 iteration and i have also tried MountainCarContinuous-v0 environment but that also didn't work.
i have tried different architecture and model from github but still my model is not learning anything
import numpy as np
import tensorflow as tf
import random
import gym
import pylab
import sys
from keras.initializers import RandomUniform
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.layers import Dense, Input, Add, Concatenate, Flatten, GaussianNoise, Lambda
from keras import backend as K
from collections import deque
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
K.set_session(sess)
EPISODES = 100000
class Actor(object):
def __init__(self, sess, state_size, action_size, TAU, lr, action_bound=1, load=False):
self.sess = sess
self.TAU = TAU
self.lr = lr
self.load = load
self.action_bound = action_bound
self.model, self.weights, self.state = self.bulid_network(state_size, action_size)
self.target_model, self.target_weights, self.target_weights = self.bulid_network(state_size, action_size)
self.q_grads = tf.placeholder(tf.float32, [None, action_size])
self.grads = tf.gradients(self.model.output, self.weights, -self.q_grads)
self.optimize = tf.train.AdamOptimizer(lr).apply_gradients(zip(self.grads, self.weights))
self.sess.run(tf.global_variables_initializer())
if self.load:
self.model.load_weights("./DDPG_Actor.h5")
self.target_model.load_weights("./DDPG_Actor_target.h5")
def train(self, state, grads):
self.sess.run(self.optimize, feed_dict={self.state : state, self.q_grads : grads})
def update(self):
W, target_W = self.model.get_weights(), self.target_model.get_weights()
for i in range(len(W)):
target_W[i] = self.TAU*W[i] + (1 - self.TAU)*target_W[i]
self.target_model.set_weights(target_W)
def save(self):
self.model.save_weights("./DDPG_Actor.h5")
self.target_model.save_weights("./DDPG_Actor_target.h5")
def bulid_network(self, state_size, action_size):
input = Input(shape=[state_size])
X = Dense(400, activation='relu', kernel_initializer='glorot_normal')(input)
X = Dense(300, activation='relu', kernel_initializer='glorot_normal')(X)
output = Dense(action_size, activation='tanh', kernel_initializer='glorot_normal')(X)
output = Lambda(lambda i : i*self.action_bound)(output)
model = Model(input=input, output=output)
return model, model.trainable_weights, input
class Critic(object):
def __init__(self, sess, state_size, action_size, TAU, lr, load=False):
self.sess = sess
self.TAU = TAU
self.lr = lr
self.load = load
self.optimizer = tf.train.AdamOptimizer(lr)
self.model, self.state, self.action = self.build_network(state_size, action_size)
self.target_model, self.target_state, self.target_action = self.build_network(state_size, action_size)
self.q_grads = tf.gradients(self.model.output, self.action)
self.sess.run(tf.global_variables_initializer())
if self.load:
self.model.load_weights("./DDPG_Critic.h5")
self.target_model.load_weights("./DDPG_Critic_target.h5")
def gradients(self, state, action):
return self.sess.run(self.q_grads, feed_dict={self.state : state, self.action : action})[0]
def save(self):
self.model.save_weights("./DDPG_Critic.h5")
self.target_model.save_weights("./DDPG_Critic_target.h5")
def update(self):
W, target_W = self.model.get_weights(), self.target_model.get_weights()
for i in range(len(W)):
target_W[i] = self.TAU*W[i] + (1 - self.TAU)*target_W[i]
self.target_model.set_weights(target_W)
def build_network(self, state_size, action_size):
S = Input(shape=[state_size])
A = Input(shape=[action_size])
X1 = Dense(400, activation='relu', kernel_initializer='glorot_normal')(S)
X2 = Dense(400, activation='relu', kernel_initializer='glorot_normal')(A)
X = Add()([X1,X2])
X = Dense(300, activation='relu', kernel_initializer='glorot_normal')(X)
output = Dense(action_size, activation='linear', kernel_initializer='glorot_normal')(X)
model = Model(inputs=[S, A], outputs=output)
model.compile(loss='mse', optimizer=Adam(lr=self.lr))
return model, S, A
class DDPG(object):
def __init__(self, sess, state_size, action_size, action_bound=1, memory_size=5000, batch_size=64, actor_lr=0.0001, critic_lr=0.001, gamma=0.99, TAU=0.001):
self.sess = sess
self.state_size = state_size
self.action_size = action_size
self.memory = deque(maxlen=memory_size)
self.batch_size = batch_size
self.actor_lr = actor_lr
self.critic_lr = critic_lr
self.gamma = gamma
self.TAU = TAU
self.train_start = 1000
self.epsilon = 1
self.epsilon_min = 0.001
self.mu = 0.0
self.x = 0
self.theta = 0.01
self.sigma = 0.1
self.epsilon_decay = (self.epsilon - self.epsilon_min) / 100000
self.actor = Actor(sess, state_size, action_size, TAU, actor_lr,action_bound, load=False)
self.critic = Critic(sess, state_size, action_size, TAU, critic_lr, load=False)
def append(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
if self.epsilon > self.epsilon_min :
self.epsilon -= self.epsilon_decay
def OU(self):
dx = self.theta*(self.mu - self.x) + self.sigma*np.random.randn(1)
self.x += dx
return self.x
def get_action(self, state):
action = self.actor.model.predict(state)[0]
return action + self.OU()*self.epsilon
def save(self):
self.actor.save()
self.critic.save()
def train(self):
if len(self.memory) < self.train_start:
return
batch_size = min(self.batch_size, len(self.memory))
mini_batch = random.sample(self.memory, batch_size)
states = np.asarray([e[0] for e in mini_batch])
states = np.reshape(states, [batch_size, self.state_size])
actions = np.asarray([e[1] for e in mini_batch])
rewards = np.asarray([e[2] for e in mini_batch])
next_states = np.asarray([e[3] for e in mini_batch])
next_states = np.reshape(next_states, [batch_size, self.state_size])
done = np.asarray([e[4] for e in mini_batch])
target = np.zeros_like(actions)
target_q_values = self.critic.target_model.predict([next_states, self.actor.target_model.predict(next_states)])
for i in range(len(mini_batch)):
if done[i]:
target[i] = rewards[i]
else :
target[i] = rewards[i] + self.gamma*target_q_values[i]
loss = self.critic.model.train_on_batch([states, actions], target)
action_for_grad = self.actor.model.predict(states)
q_grads = self.critic.gradients(states,action_for_grad)
self.actor.train(states,q_grads)
self.actor.update()
self.critic.update()
env = gym.make('Pendulum-v0')
state_size = env.observation_space.shape[0]
action_size = 1
action_bound = env.action_space.high
agent = DDPG(sess, state_size, action_size, action_bound)
scores, episodes = [], []
for e in range(EPISODES):
done = False
score = 0
state = env.reset()
state = np.reshape(state, [1,state_size])
step = 0
while not done:
action = agent.get_action(state)
#print(action)
next_state, reward, done, _ = env.step([action])
next_state = np.reshape(next_state,[1,state_size])
score += reward[0]
agent.append(state, action, reward, next_state, done)
state = next_state
step += 1
if step % 20 == 0:
agent.train()
if done:
scores.append(score)
episodes.append(e)
pylab.plot(episodes, scores, 'b')
pylab.savefig("./DDPG_Pendulum.png")
print("episode:", e, " score:", score, " epsilon:", agent.epsilon)
#if np.mean(scores[-min(10, len(scores)) :]) > -120 :
#sys.exit()
if e % 50 == 0:
agent.save()
always i got around -1450 reward per episode

Add training data(text, melspectrogram, spectrogram, label(.wav) files) to Tensorflow 2.0

How to add training data(text, melspectrogram, label(.wav)) in Tensorflow 2.0 like tutorials on Tensorflow website?
How to train that model with gradient tape in tf 2.0? I don't know what is the next step to do? I have built the model with layers with tf.keras.layers and tf.sequence_mask. I am replicating deep voice 3 model to do TTS.
def model():
def __init__(self):
with tf.GradientTape() as tape:
self.char2idx, self.idx2char = load_vocab()
self.x, self.y1, self.y2, self.z, self.num_batch = get_batch()
self.prev_max_attentions_li = tf.ones(shape=(hp.dec_layers, hp.batch_size), dtype=tf.int32)
self.decoder_input = tf.concat((tf.zeros_like(self.y1[:, :1, -hp.n_mels:]), self.y1[:, :-1, -hp.n_mels:]), 1)
self.keys, self.vals = Encoder(self.x)
self.mel_logits, self.done_output, self.decoder_output, self.alignments_li, self.max_attentions_li = Decoder(self.decoder_input, self.keys, self.vals, self.prev_max_attentions_li)
self.mel_output = tf.nn.sigmoid(self.mel_logits)
self.converter_input = tf.reshape(self.decoder_output, (-1, hp.Ty, hp.char_embed//hp.r))
self.converter_input = tf.keras.layers.Dense(hp.cchannel, activation = 'relu')(self.converter_input)
self.mag_logits = Converter(self.converter_input)
self.mag_output = tf.nn.sigmoid(self.mag_logits)
self.global_step = tf.Variable(0, name='global_step', trainable=False)
self.loss_mels = tf.reduce_mean(input_tensor=tf.abs(self.mel_output - self.y1))
self.loss_dones = tf.reduce_mean(input_tensor=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.done_output, labels=self.y2))
self.loss_mags = tf.reduce_mean(input_tensor=tf.abs(self.mag_output - self.z))
self.loss = self.loss_mels + self.loss_dones + self.loss_mags
# Training Scheme
self.optimizer = tf.keras.optimizer.Adam(lr=hp.lr)
## gradient clipping
self.gvs = self.optimizer.compute_gradients(self.loss)
self.clipped = []
for grad, var in self.gvs:
grad = tf.clip_by_value(grad, -1. * hp.max_grad_val, hp.max_grad_val)
grad = tf.clip_by_norm(grad, hp.max_grad_norm)
self.clipped.append((grad, var))
self.train_op = self.optimizer.apply_gradients(self.clipped, global_step=self.global_step)
# Summary
tf.summary.scalar('Train_Loss/LOSS', self.loss)
tf.summary.scalar('Train_Loss/mels', self.loss_mels)
tf.summary.scalar('Train_Loss/dones', self.loss_dones)
tf.summary.scalar('Train_Loss/mags', self.loss_mags)
self.merged = tf.summary.merge_all()

TensorFlow tf.train.Saver() not working on tf.contrib.layers.fully_connected()

So I wrote this generalised TensorFlow code and want to save and restore models. But apparently the error is that there is no variables to save. I did everything as given in this official example. Ignore the __init__ method except the last line, since it only takes relevant parameters to train the model with, also there is no Syntax Errors. The error it produces is given below the code.
class Neural_Network(object):
def __init__(self, numberOfLayers, nodes, activations, learningRate,
optimiser = 'GradientDescent', regularizer = None,
dropout = 0.5, initializer = tf.contrib.layers.xavier_initializer()):
self.numberOfLayers = numberOfLayers
self.nodes = nodes
self.activations = activations
self.learningRate = learningRate
self.regularizer = regularizer
self.dropout = dropout
self.initializer = initializer
if(optimiser == 'GradientDescent'):
self.optimiser = tf.train.GradientDescentOptimizer(self.learningRate)
elif(optimiser == 'AdamOptimiser'):
self.optimiser = tf.train.AdamOptimizer(self.learningRate)
self.saver = tf.train.Saver()
def create_Neural_Net(self, numberOfFeatures):
self.numberOfFeatures = numberOfFeatures
self.X = tf.placeholder(dtype = tf.float32, shape = (None, self.numberOfFeatures), name = 'Input_Dataset')
#self.output = None
for i in range(0, self.numberOfLayers):
if(i == 0):
layer = tf.contrib.layers.fully_connected(self.X, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
elif(i == self.numberOfLayers-1):
self.output = tf.contrib.layers.fully_connected(layer, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
else:
layer = tf.contrib.layers.fully_connected(layer, self.nodes[i],
activation_fn = self.activations[i],
weights_initializer = self.initializer,
biases_initializer = self.initializer)
def train_Neural_Net(self, dataset, labels, epochs):
entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = self.output, labels = labels, name = 'cross_entropy')
loss = tf.reduce_mean(entropy, name = 'loss')
hypothesis = tf.nn.softmax(self.output)
correct_preds = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
train_op = self.optimiser.minimize(loss)
self.loss=[]
self.accuracy = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(0, epochs):
_, l, acc = sess.run([train_op, loss, accuracy], feed_dict = {self.X:dataset})
print('Loss in epoch ' + str(i) + ' is: ' + str(l))
self.loss.append(l)
self.accuracy.append(acc)
self.saver.save(sess, './try.ckpt')
return self.loss, self.accuracy
And ran this code as:
nn = Neural_Network(2, [20,3], [tf.nn.relu, tf.nn.relu], 0.001, optimiser = 'AdamOptimiser')
nn.create_Neural_Net(4)
nn.train_Neural_Net(dataset, labels, 1000)
The error it gives is:
ValueError: No variables to save
So what is wrong in this code? And how can I fix it?

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

No learning occuring in reinforcement learning agent - python

Related

PyTorch Optimizer Doesn’t update Weights

q-agent is really broken, can't decide between a reward of 0 and -1

my deep deterministic policy gradient model is not learning anything even after 2000 iteration

Add training data(text, melspectrogram, spectrogram, label(.wav) files) to Tensorflow 2.0

TensorFlow tf.train.Saver() not working on tf.contrib.layers.fully_connected()

Categories

Resources