I am developing a policy gradient NN with pytorch(version: 1.10.1) and I am having the run time error message as:
The error message as:
RuntimeError: one of the variables needed for gradient computation has been modified by an in-place operation: [torch.FloatTensor [1, 15]] is at version 1; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
I had read some similar discussions and people suggest trying to avoid doing a += 1. There are also some discussions that suggest downgrading the pytorch. People also suggest using the clone() instead modify the tensor. I had tried them all but I still have this error.
The error does not show everytime in the update() function. Sometimes it works well. Why does this happen?
My related code is as following, there are some weird variables names such as previous_R. I used them to avoid in place operation such as a = a + 1
NN class:
class NN(nn.Module):
"""
Feel free to change the architecture for different tasks!
"""
def __init__(self, env):
super(NN, self).__init__()
# 15 in this case
self.state_size = 15
# 31 (1 and -1 for each task-server and void) (m*n*2 + 1)
self.action_size = 31
self.linear1 = nn.Linear(self.state_size, 128)
self.linear2 = nn.Linear(128, 256)
self.linear3 = nn.Linear(256, self.action_size)
def forward(self, state):
output1 = F.relu(self.linear1(state))
output2 = F.relu(self.linear2(output1.clone()))
output3 = self.linear3(output2.clone())
# Note the conversion to Pytorch distribution.
distribution = Categorical(F.softmax(output3.clone(), dim=-1))
return distribution
Reinforcement Learning part related code:
class Agent():
def __init__(self, env, lr, gamma):
self.env = env
self.NN = NN(env)
self.lr = lr
self.optim_NN = optim.Adam(self.NN.parameters(), lr = self.lr)
self.gamma = gamma
def update(self, log_probs,returns):
with torch.autograd.set_detect_anomaly(True):
print("updating")
baselines = self.compute_baselines(returns.clone())
loss = self.compute_loss(log_probs.clone(), returns, baselines)
self.optim_NN.zero_grad()
loss.backward()
self.optim_NN.step()
def compute_returns(self,rewards):
R = 0
returns = []
for r in rewards[::-1]:
pre_R = R
R = r + self.gamma*pre_R
returns.insert(0,R)
returns = torch.tensor(returns)
return returns
def compute_baselines(self,returns):
baselines = []
baselines.append(returns[0])
for v in returns:
t = len(baselines)
b = (baselines[t-1]*t + v)/(t+1)
baselines.append(b)
return baselines
def compute_loss(self, log_probs,returns, baselines):
with torch.autograd.set_detect_anomaly(True):
loss = 0
for i in range(0,len(returns)):
l = log_probs[i].clone()
r = returns[i].clone()
b = baselines[i].clone()
pre_loss = loss
loss =pre_loss + (-l*(r-b))
# losses.append(loss)
# losses.append(-log_probs[i].clone()*(returns[i].clone()-baselines[i].clone()))
policy_loss = loss
return policy_loss
Related
I am trying to get to grips with Pytorch and I wanted to try to reproduce this code:
https://github.com/andy-psai/MountainCar_ActorCritic/blob/master/RL%20Blog%20FINAL%20MEDIUM%20code%2002_12_19.ipynb
in Pytorch.
I am having a problem in that this error is being returned:
RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.
A similar question said to use zero_grad() again after the optimizer step, but this hasn't resolved the issue.
I've included the entire code below so hopefully it should be reproduceable.
Any advice would be much appreciated.
import gym
import os
import os.path as osp
import time
import math
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions import Normal
env = gym.envs.make("MountainCarContinuous-v0")
# Value function
class Value(nn.Module):
def __init__(self, dim_states):
super(Value, self).__init__()
self.net = nn.Sequential(
nn.Linear(dim_states, 400),
nn.ReLU(),
nn.Linear(400,400),
nn.ReLU(),
nn.Linear(400, 1)
)
self.optimizer = optim.Adam(self.parameters(), lr = 1e-3)
self.criterion = nn.MSELoss()
def forward(self, state):
return self.net(torch.from_numpy(state).float())
def compute_return(self, output, target):
self.optimizer.zero_grad()
loss = self.criterion(output, target)
loss.backward()
self.optimizer.step()
self.optimizer.zero_grad()
# Policy network
class Policy(nn.Module):
def __init__(self, dim_states, env):
super(Policy, self).__init__()
self.hidden1 = nn.Linear(dim_states, 40)
self.hidden2 = nn.Linear(40, 40)
self.mu = nn.Linear(40, 1)
self.sigma = nn.Linear(40,1)
self.env = env
self.optimizer = optim.Adam(self.parameters(), lr = 2e-5)
def forward(self, state):
state = torch.from_numpy(state).float()
x = F.relu(self.hidden1(state))
x = F.relu(self.hidden2(x))
mu = self.mu(x)
sigma = F.softmax(self.sigma(x), dim=-1)
action_dist = Normal(mu, sigma)
action_var = action_dist.rsample()
action_var = torch.clip(action_var,
self.env.action_space.low[0],
self.env.action_space.high[0])
return action_var, action_dist
def compute_return(self, action, dist, td_error):
self.optimizer.zero_grad()
loss_actor = -dist.log_prob(action)*td_error
loss_actor.backward()
self.optimizer.step()
self.optimizer.zero_grad()
# Normalise the state space
import sklearn
import sklearn.preprocessing
state_space_samples = np.array(
[env.observation_space.sample() for x in range(10000)])
scaler = sklearn.preprocessing.StandardScaler()
scaler.fit(state_space_samples)
# Normaliser
def scale_state(state):
scaled = scaler.transform([state])
return scaled
##################################
# Parameters
lr_actor = 0.00002
lr_critic = 0.001
actor = Policy(2, env)
critic = Value(2)
# Training loop params
gamma = 0.99
num_episodes = 300
episode_history = []
for episode in range(num_episodes):
# Receive initial state from E
state = env.reset()
reward_total = 0
steps = 0
done = False
while not done:
action, dist = actor(state)
# print(np.squeeze(action))
next_state, reward, done, _ = env.step(
np.array([action.item()]))
if episode % 50 == 0:
env.render()
steps += 1
reward_total += reward
# TD Target
target = reward + gamma * np.squeeze(critic(next_state), axis=0)
td_error = target - np.squeeze(critic(state), axis=0)
# Update actor
actor.compute_return(action, dist, td_error)
# Update critic
critic.compute_return(np.squeeze(critic(state), axis=0), target)
episode_history.append(reward_total)
print(f"Episode: {episode}, N Steps: {steps}, Cumulative reward {reward_total}")
if np.mean(episode_history[-100:]) > 90 and len(episode_history) > 101:
print("Solved")
print(f"Mean cumulative reward over 100 episodes {np.mean(episode_history[-100:])}")
Problem lies in this snippet. When you create target variable, there is a forward pass through critic which generates a computation graph and critic(next_state) is the leaf node of that graph making target a part of the graph (you can check this by printing target which will show you grad_fn=<AddBackward0>). Finally, when you call critic.compute_return(critic_out, target), a new computation graph is generated and passing target(which is a part of the previous computation graph) causes a Runtime error.
Solution is to call detach() on critic(next_state), this will free target variable and it will no longer be a part of the computation graph(again check by printing target).
target = reward + gamma * np.squeeze(critic(next_state).detach(), axis=0)
td_error = target - np.squeeze(critic(state), axis=0)
# Update actor
actor.compute_return(action, dist, td_error)
# Update critic
critic_out = np.squeeze(critic(state), axis=0)
print(critic_out)
critic.compute_return(critic_out, target)
I'm trying to train a model, and it doesn't work because weights aren't updating when I call the following:
self.optimizer = Adam(self.PPO.parameters(), lr=0.1, eps=epsilon)
total_loss = Variable(policy_loss + 0.5*value_loss - entropy_loss.mean() * 0.01, requires_grad=True)
self.optimizer.zero_grad()
(total_loss * 10).backward()
self.optimizer.step()
When I print the weights, they're all the same (loss isn't zero, and learning rate set to 0.1), and when I compare them (even with clone() called on each param) it always returns True. Total loss has a grad_fn attribute too... The optimizer is created in the constructor of my agent class.
My code is based on this repository:
https://github.com/andreiliphd/tennis-ppo/blob/master/agent.py
This is my agent constructor:
def __init__(self, PPO, learning_rate, epsilon, discount_rate, entropy_coefficient, ppo_clip, gradient_clip,
rollout_length, tau):
self.PPO = PPO
self.learning_rate = learning_rate
self.epsilon = epsilon
self.discount_rate = discount_rate
self.entropy_coefficient = entropy_coefficient
self.ppo_clip = 0.2
self.gradient_clip = 5
self.rollout_length = rollout_length
self.tau = tau
self.optimizer = Adam(self.PPO.actor.parameters(), lr=0.1, eps=epsilon)
self.device = torch.device('cpu')
This is my PPO class, which creates two networks with a forward function, and some hidden layers
class PPO(nn.Module):
def __init__(self, state_shape, action_num, mlp_layers, device=torch.device('cpu')):
super(PPO, self).__init__()
self.state_shape = state_shape
self.action_num = action_num
self.mlp_layers = mlp_layers
self.device = torch.device('cpu')
layer_dims = [np.prod(self.state_shape)] + self.mlp_layers
self.actor = PPO_Network(state_shape, action_num, layer_dims, True)
self.actor = self.actor.to(device)
self.critic = PPO_Network(state_shape, 1, layer_dims, False)
self.critic = self.critic.to(device)
self.to(device)
Any indications on why this is happening, and what I am overlooking are very welcome. :)
I can give more info or code if needed.
I fixed this :)
It was very stupid, it was because I was converted one of the values returned by my networks to numpy, and then converted it back to a tensor. It took me a while to realize because of some messy code.
I’ve been learning RL this summer and this week I’ve tried to make a PPO implementation on Pytorch with the help of some repositories from github with similiar algorithms.
The code runs OpenAI’s Lunar Lander but I have several errors that I have not been able to fix, the biggest one being that the algorithm quickly converges to doing the same action regardless of the state. The other major problem I’ve found is that even though I’m using backwards() only once, I get an error asking me to set retain_graph to True.
Because of that, I see no improvement of the rewards obtained over 1000 steps, I don’t know if the algorithm needs more steps to be able to see an improvement.
I’m really sorry if this kind of problems have no place in this forums, I just didn’t know where to post this.
Also I’m sorry for the messy code, it’s my first time doing this kind of algorithms, and I’m fairly new with pytorch and machine learning in general.
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.distributions import Categorical
import gym
class actorCritic(nn.Module):
def __init__(self):
super(actorCritic, self).__init__()
self.fc = nn.Sequential(
nn.Linear(8, 16),
nn.Linear(16, 32),
nn.Linear(32, 64),
nn.ReLU(inplace=True)
)
self.pi = nn.Linear(64, 4)
self.value = nn.Linear(64, 1)
def forward(self, x):
x = self.fc(x)
pi_1 = self.pi(x)
pi_out = F.softmax(pi_1, dim=-1)
value_out = self.value(x)
return pi_out, value_out
def GAE(rewards, values, masks):
gamma = 0.99
lamb = 0.95
advan_t = 0
sizes = rewards.size()
advantages = torch.zeros(1, sizes[1])
for t in reversed(range(sizes[1])):
delta = rewards[0][t] + gamma*values[0][t+1]*masks[0][t] - values[0][t]
advan_t = delta + gamma*lamb*advan_t*mask[0][t]
advantages[0][t] = advan_t
real_values = values[:,:sizes[1]] + advantages
return advantages, real_values
def plot_rewards(rewards):
plt.figure(2)
plt.clf()
plt.plot(rewards)
plt.pause(0.001)
plt.savefig('TruePPO 500 steps.png')
def interact(times, states):
rewards = torch.zeros(1, times)
actions = torch.zeros(1, times)
mask = torch.ones(1, times)
for steps in range(times):
action_probs, _ = network(states[steps])
m = Categorical(action_probs)
action = int(m.sample())
obs, reward, done, _ = env.step(action)
if done:
obs = env.reset()
mask[0][steps] = 0
states[steps+1] = torch.from_numpy(obs).float()
rewards[0][steps] = reward
actions[0][steps] = action
return states, rewards, actions, mask
#Parameters
total_steps = 1000
batch_size = 10
env = gym.make('LunarLander-v2')
network = actorCritic()
old_network = actorCritic()
optimizer = torch.optim.Adam(network.parameters(), lr = 0.001)
states = torch.zeros(batch_size+1, 8)
steps = 0
obs_ = env.reset()
obs = torch.from_numpy(obs_).float()
states[0] = obs
reward_means = []
nn_paramD = network.state_dict()
old_network.load_state_dict(nn_paramD)
while steps < total_steps:
print (steps)
states, rewards, actions, mask = interact(batch_size, states)
#calculate values, GAE, normalize advantages, randomize, calculate loss, backprop,
_, values = network(states)
values = values.view(-1, batch_size+1)
advantages, v_targ = GAE(rewards, values, mask)
advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-5)
optimizer.zero_grad()
for n in range(rewards.size()[1]):
probabilities, _ = network(states[n])
print (probabilities)
m = Categorical(probabilities)
action_prob = m.probs[int(actions[0][n])]
entropia = m.entropy()
old_probabilities, _ = old_network(states[n])
m_old = Categorical(old_probabilities)
old_action_prob = m.probs[actions[0][n].int()]
old_action_prob.detach()
ratio = action_prob / old_action_prob
surr1 = ratio*advantages[0][n]
surr2 = torch.clamp(ratio, min = (1.-0.2), max = (1.+0.2))
policy_loss = -torch.min(surr1, surr2)
value_loss = 0.5*(values[0][n]-v_targ[0][n])**2
entropy_loss = -entropia
total_loss = policy_loss + value_loss + 0.01*entropy_loss
total_loss.backward(retain_graph = True)
optimizer.step()
reward_means.append(rewards.numpy().mean())
old_network.load_state_dict(nn_paramD)
nn_paramD = network.state_dict()
steps += 1
plot_rewards(reward_means)
I'm trying to solve the 'BipedalWalker-v2' problem from Open AI, by using python and Tensorflow. In order to solve it I'm implementing an episodic policy gradient algorithms. Because the 'BipedalWalker-v2' actions are continuous my policy is approximated by a multivariate Gaussian distribution. The mean of this distribution is approximated using a fully connected neural network. My neural network has the following layers: [input:24,hidden:5,hidden:5,output:4]. My problem is that when I train the agent, the training process gets slower and slower until it almost freeze. My guess is that I'm misusing sess.run, I'm not feeding the batches in an efficient way. But is just a guess. My question is: Is my guess correct? if it is correct, how can I improve it? and if it is something else, what it is? I'm not looking for a literal solution I just want to get some lights about how to improve the training.
Thanks in advance,
my computer is a Inspiron 15 7000 Gaming, GeForce nvidia gtx 1050, 8 gb ram,cpu: I5
My CODE:
Libraries:
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np
import gym
import matplotlib.pyplot as plt
Agent class:
class agent_episodic_continuous_action():
def __init__(self, lr, s_size,a_size,batch_size,dist_type):
self.stuck = False
self.gamma = 0.99
self.dist_type = dist_type
self.is_brain_present = False
self.s_size = s_size
self.batch_size=batch_size
self.state_in= tf.placeholder(shape=[None,s_size],dtype=tf.float32)
self.a_size=a_size
self.reward_holder = tf.placeholder(shape=[None],dtype=tf.float32)
self.cov = tf.eye(a_size)
self.reduction = 0.01
if a_size > 1:
self.action_holder = tf.placeholder(shape=[None,a_size],dtype=tf.float32)
else:
self.action_holder = tf.placeholder(shape=[None],dtype=tf.float32)
self.gradient_holders = []
self.optimizer = tf.train.AdamOptimizer(learning_rate=lr)
def save_model(self,path,sess):
self.saver.save(sess, path)
def load_model(self,path,sess):
self.saver.restore(sess, path)
def create_brain(self,hidd_layer,hidd_layer_act_fn,output_act_fn):
self.is_brain_present = True
hidden_output=slim.stack(self.state_in,slim.fully_connected,hidd_layer,activation_fn=hidd_layer_act_fn)
self.output = slim.fully_connected(hidden_output,self.a_size,activation_fn=output_act_fn,biases_initializer=None)
def create_pi_dist(self):
if self.dist_type == "normal":
# amplify= tf.pow(slim.fully_connected(self.output,1,activation_fn=None,biases_initializer=None),2)
mean= self.output
#cov =tf.eye(self.a_size,batch_shape=[self.batch_size])*amplify
normal = tf.contrib.distributions.MultivariateNormalFullCovariance(
loc=mean,
covariance_matrix=self.cov*self.reduction)
self.dist = normal
def create_loss(self):
self.loss = -tf.reduce_mean(tf.log(self.dist.prob(self.action_holder))*self.reward_holder)
def get_gradients_holder(self):
for idx,var in enumerate(self.tvars):
placeholder = tf.placeholder(tf.float32,name=str(idx)+'_holder')
self.gradient_holders.append(placeholder)
def sample_action(self,sess,state):
sample_action= sess.run(self.dist.sample(),feed_dict={self.state_in:state})
return sample_action
def calculate_loss_gradient(self):
self.gradients = tf.gradients(self.loss,self.tvars)
def update_weights(self):
self.update_batch = self.optimizer.apply_gradients(zip(self.gradients,self.tvars))
return self.update_batch
def memorize_data(self,episode,first):
if first:
self.episode_history = episode
self.stuck = False
else:
self.episode_history = np.vstack((self.episode_history,episode))
def shuffle_memories(self):
np.random.shuffle(self.episode_history)
def create_graph_connections(self):
if self.is_brain_present:
self.create_pi_dist()
self.create_loss()
self.tvars = tf.trainable_variables()
self.calculate_loss_gradient()
self.saver = tf.train.Saver()
self.update_weights()
else:
print("initialize brain first")
self.init = tf.global_variables_initializer()
def memory_batch_generator(self):
total=self.episode_history.shape[0]
amount_of_batches= int(total/self.batch_size)
for i in range(amount_of_batches+1):
if i < amount_of_batches:
top=(i+1)*self.batch_size
bottom =i*self.batch_size
yield (self.episode_history[bottom:top,0:self.s_size],self.episode_history[bottom:top,self.s_size:self.s_size+self.a_size],self.episode_history[bottom:top,self.s_size+self.a_size:self.s_size+self.a_size+1],self.episode_history[bottom:top,self.s_size+self.a_size+1:])
else:
yield (self.episode_history[top:,0:self.s_size],self.episode_history[top:,self.s_size:self.s_size+self.a_size],self.episode_history[top:,self.s_size+self.a_size:self.s_size+self.a_size+1],self.episode_history[top:,self.s_size+self.a_size+1:])
def train_with_current_memories(self,sess):
self.sess = sess
for step_sample_batch in self.memory_batch_generator():
sess.run(self.update_weights(), feed_dict={self.state_in:step_sample_batch[0],self.action_holder:step_sample_batch[1],self.reward_holder:step_sample_batch[2].reshape([step_sample_batch[2].shape[0]])})
def get_returns(self):
self.episode_history[:,self.s_size+self.a_size:self.s_size+self.a_size+1] = self.discount_rewards(self.episode_history[:,self.s_size+self.a_size:self.s_size+self.a_size+1])
def discount_rewards(self,r):
""" take 1D float array of rewards and compute discounted reward """
discounted_r = np.zeros_like(r)
running_add = 0
for t in reversed(range(0, r.size)):
running_add = running_add * self.gamma + r[t]
discounted_r[t] = running_add
return discounted_r
def prob_action(self,sess,action,state):
prob = sess.run(self.dist.prob(action),feed_dict={self.state_in:state})
return prob
def check_movement(self):
ep_back = 5
jump = 3
threshold = 3
if len(self.episode_history) > ep_back*2:
difference = sum(abs(self.episode_history[-ep_back:-1,:]-self.episode_history[-ep_back-jump:-1-jump,:]).flatten())
print(difference)
if difference < threshold:
self.stuck = True
def print_last_n_returns(self,n):
if len(self.episode_history[:,self.s_size+self.a_size:self.s_size+self.a_size+1])>n:
n_returns = sum(self.episode_history[-n:,self.s_size+self.a_size:self.s_size+self.a_size+1])/float(n)
print(n_returns)
return n_returns
Training loops:
tf.reset_default_graph()
agent_2= agent_episodic_continuous_action(1e-2,s_size=24,a_size=4,batch_size=30,dist_type="normal")
agent_2.create_brain([5,5],tf.nn.relu,None)
agent_2.create_graph_connections()
env = gym.make('BipedalWalker-v2')
with tf.Session() as sess:
sess.run(agent_2.init)
for i in range(200):
s = env.reset()
d = False
a=agent_2.sample_action(sess,[s])[0]
print(a)
if None in a:
print("None in a! inside for")
print(s)
s1,r,d,_ = env.step(a)
episode = np.hstack((s,a,r,s1))
agent_2.memorize_data(episode=episode,first=True)
count = 0
while not d:
count = count + 1
s = s1
a=agent_2.sample_action(sess,[s])[0]
s1,r,d,_ = env.step(a)
episode = np.hstack((s,a,r,s1))
# env.render()
agent_2.memorize_data(episode=episode,first=False)
# print(s1)
if count % 5 == 0 :
agent_2.check_movement()
if agent_2.stuck:
d = True
agent_2.get_returns()
agent_2.print_last_n_returns(20)
agent_2.shuffle_memories()
agent_2.train_with_current_memories(sess)
env.close()
For each batch of 30 samples I execute Agent.update_weights()
def update_weights(self):
self.update_batch = self.optimizer.apply_gradients(zip(self.gradients,self.tvars))
When I execute:
def train_with_current_memories(self,sess):
self.sess = sess
for step_sample_batch in self.memory_batch_generator():
sess.run(self.update_weights(), feed_dict={self.state_in:step_sample_batch[0],self.action_holder:step_sample_batch[1],self.reward_holder:step_sample_batch[2].reshape([step_sample_batch[2].shape[0]])})
Or maybe this sluggishness is an expected behavior.
The code was slowing down after each iteration because the graph was getting bigger at each iteration. This is because I was creating new graph elements inside the iteration loop.
during each iteration the following function was being called:
def update_weights(self):
self.update_batch = self.optimizer.apply_gradients(zip(self.gradients,self.tvars))
return self.update_batch
This function was creating a new element to the graph.
The best way to avoid "graph leaking" is to add the line
sess.graph.finalize()
as soon as you create your session. In this way, if there is a graph leaking, Tensorflow will raise an exception.
I'm trying to use one_vs_one composition of decision trees for multiclass classification. The problem is, when I pass different object weights to a classifier, the result stays the same.
Do I misunderstand something with weights, or do they just work incorrectly?
Thanks for your replies!
Here is my code:
class AdaLearner(object):
def __init__(self, in_base_type, in_multi_type):
self.base_type = in_base_type
self.multi_type = in_multi_type
def train(self, in_features, in_labels):
model = AdaBoost(self.base_type, self.multi_type)
model.learn(in_features, in_labels)
return model
class AdaBoost(object):
CLASSIFIERS_NUM = 100
def __init__(self, in_base_type, in_multi_type):
self.base_type = in_base_type
self.multi_type = in_multi_type
self.classifiers = []
self.weights = []
def learn(self, in_features, in_labels):
labels_number = len(set(in_labels))
self.weights = self.get_initial_weights(in_labels)
for iteration in xrange(AdaBoost.CLASSIFIERS_NUM):
classifier = self.multi_type(self.base_type())
self.classifiers.append(classifier.train(in_features,
in_labels,
weights=self.weights))
answers = []
for obj in in_features:
answers.append(self.classifiers[-1].apply(obj))
err = self.compute_weighted_error(in_labels, answers)
print err
if abs(err - 0.) < 1e-6:
break
alpha = 0.5 * log((1 - err)/err)
self.update_weights(in_labels, answers, alpha)
self.normalize_weights()
def apply(self, in_features):
answers = {}
for classifier in self.classifiers:
answer = classifier.apply(in_features)
if answer in answers:
answers[answer] += 1
else:
answers[answer] = 1
ranked_answers = sorted(answers.iteritems(),
key=lambda (k,v): (v,k),
reverse=True)
return ranked_answers[0][0]
def compute_weighted_error(self, in_labels, in_answers):
error = 0.
w_sum = sum(self.weights)
for ind in xrange(len(in_labels)):
error += (in_answers[ind] != in_labels[ind]) * self.weights[ind] / w_sum
return error
def update_weights(self, in_labels, in_answers, in_alpha):
for ind in xrange(len(in_labels)):
self.weights[ind] *= exp(in_alpha * (in_answers[ind] != in_labels[ind]))
def normalize_weights(self):
w_sum = sum(self.weights)
for ind in xrange(len(self.weights)):
self.weights[ind] /= w_sum
def get_initial_weights(self, in_labels):
weight = 1 / float(len(in_labels))
result = []
for i in xrange(len(in_labels)):
result.append(weight)
return result
As you can see, it is just a simple AdaBoost (I instantiated it with in_base_type = tree_learner, in_multi_type = one_against_one) and it worked the same way no matter how many base classifiers were engaged. It just acted as one multiclass decision tree.
Then I've made a hack. I chose a random sample of objects on the each iteration with respect to their weights and trained classifiers with a random subset of objects without any weights. And that worked as it was supposed to.
The default tree criterion, namely information gain, does not take the weights into account. If you know of a formula which would do it, I'll implement it.
In the meanwhile, using neg_z1_loss will do it correctly. By the way, there was a slight bug in that implementation, so you will need to use the most current github master.