This is the voted perceptron algorithm:
#this is a pseudo code
#m is the number of examples
initialize k = 0, w1 := 0, c1 := 0
repeat for T epochs:
for i = 1 to i = m (this is one epoch)
if (x[i],y[i]) is classified correctly then
c[k] = c[k] + 1
otherwise:
w[k+1] = w[k] + y[i]x[i]
c[k+1]=1
k = k+1
The algorithm is reported at http://curtis.ml.cmu.edu/w/courses/index.php/Voted_Perceptron
I want to make a voted perceptron dual form. This is my pseudocode:
#m is the number of examples
Initialize k = 0, a1 := 0, c1 := 0
Repeat for T epochs:
for i = 1 to i = m
if (x[i], y[i]) is classified correctly then
c[k] = c[k] + 1
otherwise
k = k + 1
a[k][i] = a[k][i]+1
c[k] = 1
The output is the following list: (a_1, c1),(a_2,c2),...,(a_k,ck) where each a_i is a vector
Is it correct ? Must i add the bias ?
Here i report my python implementation of this voted dual perceptron:
class Perceptron(object):
def __init__(self, kernel = linear_kernel, epochs = 50):
self.kernel = kernel
self.epochs = epochs
def summation(self, a, y, x, xi, b):
s = 0
for j in range(0, len(x)):
s += (a[j]*y[j]*(self.kernel(x[j], xi))) + b
return s
def maxNorm(self, x):
v = np.linalg.norm(x[0])
for i in range(1, len(x)):
if (np.linalg.norm(x[i]) > v):
v = np.linalg.norm(x[i])
return v
def training(self, x, y):
k = 0
a = np.zeros(len(x), dtype=np.int)
alfa = []
alfa.append(a.copy())
print(alfa)
b = 0
bias = []
bias.append(0)
c = []
c.append(0)
for _ in range(self.epochs):
for i in range(len(y)):
if (y[i] * self.summation(a, y, x, x[i], b))<=0:
a[i] = a[i] + 1
alfa.append(a.copy())
b = b + (y[i]*(self.maxNorm(x)**2))
bias.append(b)
c.append(1)
k = k+1
else:
c[k] += 1
self.alfa = alfa
self.b = bias
self.c = c
print("b: ",len(self.b))
print("c: ",len(self.c))
def predict(self,xTest, yTest, xTrain, yTrain):
print("a: ",self.alfa)
print("\nc:", self.c)
print(yTest)
print(yTrain)
SumFin=0
Err = np.zeros(len(xTest))
nErr = 0
yPredict = []
for i in range(len(xTest)):
for j in range(len(self.c)):
print(self.c[j]*(np.sign(self.summation(self.alfa[i], yTrain, xTrain, xTest[i], self.b[i]))))
SumFin += (self.c[j]*(np.sign(self.summation(self.alfa[i], yTrain, xTrain, xTest[i], self.b[i]))))
yPredict.append(np.sign(SumFin))
for i in range(len(yTest)):
print("i = ",i," | yTest = ",yTest[i]," | yPredict = ",yPredict[i])
if(yTest[i] != yPredict[i]):
nErr += 1
Err[i] += 1
print("Error rate: ", ((100*nErr)/len(xTest)), "%")
self.Err = Err
I don't think this code work because if i predict the training set i get 75% of error rate.
Can anyone help me? Thanks
Training:
Voted Perceptron Training Algorithm
def voted_perceptron(x, target=y, nb_epoch=1):
k = v = c = 0
V = C = []
for epoch in nb_epoch:
for i in range(len(x)):
y_pred = sign(v*k)
if y_pred == y:
c += 1
v += y[i]*x[i]
c = 1
k += 1
V.append(v)
C.append(c)
return V,C
Prediction:
Voted Perceptron Prediction Algorithm
def vp_predict(V, C, X):
predictions = []
for x in X:
s = 0
for w,c in zip(V,C):
s = s + c*sign(np.dot(w,x))
predictions.append(sign(s))
return predictions
Related
The error is saying this is not the case - your InputLayer object does not have an attribute Input but I don't know how to fix it thank you for your helps
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
import random
class MultiLayerPerceptron(BaseEstimator, ClassifierMixin):
def __init__(self, params=None):
if (params == None):
self.inputLayer = 14 # Input Layer
self.hiddenLayer = 100 # Hidden Layer
self.outputLayer = 2 # Outpuy Layer
self.learningRate = 0.005 # Learning rate
self.max_epochs = 600 # Epochs
self.iasHiddenValue = -1 # Bias HiddenLayer
self.BiasOutputValue = -1 # Bias OutputLayer
self.activation = self.ativacao['sigmoid'] # Activation function
self.deriv = self.derivada['sigmoid']
else:
self.inputLayer = params['InputLayer']
self.hiddenLayer = params['HiddenLayer']
self.OutputLayer = params['OutputLayer']
self.learningRate = params['LearningRate']
self.max_epochs = params['Epocas']
self.BiasHiddenValue = params['BiasHiddenValue']
self.BiasOutputValue = params['BiasOutputValue']
self.activation = self.ativacao[params['ActivationFunction']]
self.deriv = self.derivada[params['ActivationFunction']]
'Starting Bias and Weights'
self.WEIGHT_hidden = self.starting_weights(self.hiddenLayer, self.inputLayer)
self.WEIGHT_output = self.starting_weights(self.OutputLayer, self.hiddenLayer)
self.BIAS_hidden = np.array([self.BiasHiddenValue for i in range(self.hiddenLayer)])
self.BIAS_output = np.array([self.BiasOutputValue for i in range(self.OutputLayer)])
self.classes_number = 2
pass
def starting_weights(self, x, y):
return [[2 * random.random() - 1 for i in range(x)] for j in range(y)]
ativacao = {
'sigmoid': (lambda x: 1/(1 + np.exp(-x))),
'tanh': (lambda x: np.tanh(x)),
'Relu': (lambda x: x*(x > 0)),
}
derivada = {
'sigmoid': (lambda x: x*(1-x)),
'tanh': (lambda x: 1-x**2),
'Relu': (lambda x: 1 * (x>0))
}
def Backpropagation_Algorithm(self, x):
DELTA_output = []
'Stage 1 - Error: OutputLayer'
ERROR_output = self.output - self.OUTPUT_L2
DELTA_output = ((-1)*(ERROR_output) * self.deriv(self.OUTPUT_L2))
arrayStore = []
'Stage 2 - Update weights OutputLayer and HiddenLayer'
for i in range(self.hiddenLayer):
for j in range(self.OutputLayer):
self.WEIGHT_output[i][j] -= (self.learningRate * (DELTA_output[j] * self.OUTPUT_L1[i]))
self.BIAS_output[j] -= (self.learningRate * DELTA_output[j])
'Stage 3 - Error: HiddenLayer'
delta_hidden = np.matmul(self.WEIGHT_output, DELTA_output)* self.deriv(self.OUTPUT_L1)
'Stage 4 - Update weights HiddenLayer and InputLayer(x)'
for i in range(self.OutputLayer):
for j in range(self.hiddenLayer):
self.WEIGHT_hidden[i][j] -= (self.learningRate * (delta_hidden[j] * x[i]))
self.BIAS_hidden[j] -= (self.learningRate * delta_hidden[j])
def show_err_graphic(self,v_erro,v_epoca):
plt.figure(figsize=(4,14))
plt.plot(v_epoca, v_erro, "m-",color="b", marker=11)
plt.xlabel("Number of Epochs")
plt.ylabel("Squared error (MSE) ");
plt.title("Error Minimization")
plt.show()
def predict(self, X, y):
'Returns the predictions for every element of X'
my_predictions = []
'Forward Propagation'
forward = np.matmul(X,self.WEIGHT_hidden) + self.BIAS_hidden
forward = np.matmul(forward, self.WEIGHT_output) + self.BIAS_output
for i in forward:
my_predictions.append(max(enumerate(i), key=lambda x:x[1])[0])
array_score = []
for i in range(len(my_predictions)):
if my_predictions[i] == 0:
array_score.append([i, 'No', my_predictions[i], y[i]])
elif my_predictions[i] == 1:
array_score.append([i, 'Yes', my_predictions[i], y[i]])
dataframe = pd.DataFrame(array_score, columns=['_id', 'class', 'output', 'hoped_output'])
return my_predictions, dataframe
def fit(self, X, y):
count_epoch = 1
total_error = 0
n = len(X);
epoch_array = []
error_array = []
W0 = []
W1 = []
while(count_epoch <= self.max_epochs):
for idx,inputs in enumerate(X):
self.output = np.zeros(self.classes_number)
'Stage 1 - (Forward Propagation)'
self.OUTPUT_L1 = self.activation((np.dot(self.InputLayer, self.WEIGHT_hidden) + self.BIAS_hidden.T))
self.OUTPUT_L2 = self.activation((np.dot(self.OUTPUT_L1, self.WEIGHT_output) + self.BIAS_output.T))
'Stage 2 - One-Hot-Encoding'
if(y[idx] == 0):
self.output = np.array([1,0,0]) #Class1 {1,0,0}
elif(y[idx] == 1):
self.output = np.array([0,1,0]) #Class2 {0,1,0}
square_error = 0
for i in range(self.OutputLayer):
erro = (self.output[i] - self.OUTPUT_L2[i])**2
square_error = (square_error + (0.05 * erro))
total_error = total_error + square_error
'Backpropagation : Update Weights'
self.Backpropagation_Algorithm(inputs)
total_error = (total_error / n)
if((count_epoch % 50 == 0)or(count_epoch == 1)):
print("Epoch ", count_epoch, "- Total Error: ",total_error)
error_array.append(total_error)
epoch_array.append(count_epoch)
W0.append(self.WEIGHT_hidden)
W1.append(self.WEIGHT_output)
count_epoch += 1
self.show_err_graphic(error_array,epoch_array)
plt.plot(W0[0])
plt.title('Weight Hidden update during training')
plt.legend(['neuron1', 'neuron2', 'neuron3', 'neuron4', 'neuron5'])
plt.ylabel('Value Weight')
plt.show()
plt.plot(W1[0])
plt.title('Weight Output update during training')
plt.legend(['neuron1', 'neuron2', 'neuron3'])
plt.ylabel('Value Weight')
plt.show()
return self
dictionary = {'InputLayer':14, 'HiddenLayer':100, 'OutputLayer':2,
'Epocas':700, 'LearningRate':0.005,'BiasHiddenValue':-1,
'BiasOutputValue':-1, 'ActivationFunction':'sigmoid'}
Perceptron = MultiLayerPerceptron(dictionary)
Perceptron.fit(train_X,train_y)
AttributeError: 'MultiLayerPerceptron' object has no attribute 'InputLayer'
I want after splitting the data to work on MLP Classifier but I found this error "AttributeError: 'MultiLayerPerceptron' object has no attribute 'InputLayer'
I want to implement a multi-layer perceptron.
I found some code on GitHub that classifies MNIST quite well (96%). However, for some reason, it does not cope with the XOR task.
I want to understand why.
Here is the code:
perceptron.py
import random
import numpy as np
class Perceptron:
def __init__(self, *, layer_sizes, activation_functions, cost_function_deriv):
self.layer_sizes = layer_sizes
if len(self.layer_sizes) - 1 != len(activation_functions):
raise ValueError("...")
self.activation_functions = activation_functions
self.cost_function_deriv = cost_function_deriv
self.biases = [np.random.randn(y, 1) for y in layer_sizes[1:]]
self.weights = [np.random.randn(y, x) for x, y in zip(layer_sizes[:-1], layer_sizes[1:])]
def train(self, training_data, test_data, epochs, mini_batch_size, lr):
test_data_len = len(test_data)
for epoch in range(epochs):
random.shuffle(training_data)
mini_batches = [training_data[x: x + mini_batch_size]
for x in range(0, len(training_data), mini_batch_size)]
for mini_batch in mini_batches:
mb_len = len(mini_batch)
gradient_weights = [np.zeros(w.shape) for w in self.weights]
gradient_biases = [np.zeros(b.shape) for b in self.biases]
for x, y in mini_batch:
delta_gradient_biases, delta_gradient_weights = self.backpropagation(np.array(x), y)
gradient_weights = [grad + delta for grad, delta in zip(gradient_weights, delta_gradient_weights)]
gradient_biases = [grad + delta for grad, delta in zip(gradient_biases, delta_gradient_biases)]
self.weights = [w - (lr / mb_len) * grad for w, grad in zip(self.weights, gradient_weights)]
self.biases = [b - (lr / mb_len) * grad for b, grad in zip(self.biases, gradient_biases)]
correct_answers = self.how_many_correct_answers(test_data)
print(f"Epoch number {epoch}: {correct_answers}/{test_data_len} correct answers")
def backpropagation(self, x, y):
gradient_b = [np.zeros(b.shape) for b in self.biases]
gradient_w = [np.zeros(w.shape) for w in self.weights]
activations = [x]
prev_activation = x
for i, (b, w) in enumerate(zip(self.biases, self.weights)):
current_activation = self.activation_functions[i](np.dot(w, prev_activation) + b)
activations.append(current_activation)
prev_activation = current_activation
delta = self.cost_function_deriv(activations[-1], y) * self.activation_functions[-1].deriv(activations[-1])
gradient_b[-1] = delta
gradient_w[-1] = np.dot(delta, activations[-2].T)
for i in range(2, len(self.layer_sizes)):
z = activations[-i]
act_der = self.activation_functions[-i + 1].deriv(z)
delta = np.dot(self.weights[-i + 1].T, delta) * act_der
gradient_b[-i] = delta
gradient_w[-i] = np.dot(delta, activations[-i - 1].T)
# Normal indexing variant:
# for i in range(len(self.layers) - 1, 0, -1):
# z = activations[i]
# act_der = self.activation_functions[i].deriv(z)
# delta = np.dot(self.weights[i].T, delta) * act_der
# gradient_b[i - 1] = delta
# gradient_w[i - 1] = np.dot(delta, activations[i - 1].T)
return gradient_b, gradient_w
def feedforward(self, a):
for i, (b, w) in enumerate(zip(self.biases, self.weights)):
a = self.activation_functions[i](np.dot(w, a) + b)
return a
def how_many_correct_answers(self, test_data):
k = 0
for x, y in test_data:
y_predict = np.argmax(self.feedforward(x))
print(y_predict, y)
k += int(y_predict == y)
return k
main.py
from copy import deepcopy
import numpy as np
from perceptron import Perceptron
class Sigmoid:
out_min_max = [0, 1]
def __call__(self, x):
return 1. / (1. + np.exp(-x))
def deriv(self, y):
# t = self(x)
# return t * (1. - t)
return y * (1. - y)
def cost_function_derivative(y_predict, y_true_label):
label_vector = np.zeros(y_predict.shape)
label_vector[y_true_label] = 1.0
return y_predict - label_vector
def main():
training_data = np.asarray([[[[0], [0]], 0],
[[[0], [1]], 1],
[[[1], [0]], 1],
[[[1], [1]], 0]])
layer_sizes = [2, 8, 2]
model = Perceptron(layer_sizes=layer_sizes,
activation_functions=[Sigmoid(), Sigmoid()],
cost_function_deriv=cost_function_derivative)
model.train(deepcopy(training_data),
deepcopy(training_data),
epochs=10000,
mini_batch_size=4,
lr=0.01)
if __name__ == '__main__':
main()
The final output in format 'y_predict y_true' (after each epoch):
0 0
0 1
0 1
0 0
If remove random.shuffle(training_data) then:
1 0
0 1
1 1
0 0
But not 0 1 1 0
I figured it out. It requires the following.
mini_batch_size=1
# random.shuffle(training_data) -- comment
epochs=10000
And it's better to do this:
lr=0.1
The result in most cases is obtained after ~1000 epochs:
0 0
1 1
1 1
0 0
Hi I am trying to write a trust-region algorithm using the dogleg method with python for a class I have. I have a Newton's Method algorithm and Broyden's Method algorthm that agree with each other but I can't seem to get this Dogleg method to work.
Here is the function I am trying to find the solution to:
def test_function(x):
x1 = float(x[0])
x2 = float(x[1])
r = np.array([[x2**2 - 1],
[np.sin(x1) - x2]])
return r
and here is the jacobian I wrote
def Test_Jacobian(x, size):
e = create_ID_vec(size)
#print(e[0])
epsilon = 10e-8
J = np.zeros([size,size])
#print (J)
for i in range(0, size):
for j in range(0, size):
J[i][j] = ((test_function(x[i]*e[j] + epsilon*e[j])[i] - test_function(x[i]*e[j])[i])/epsilon)
return J
and here is my Trust-Region algorithm:
def Trust_Region(x):
trust_radius = 1
max_trust = 300
eta = rand.uniform(0,.25)
r = test_function(x) # change to correspond with the function you want
J = Test_Jacobian(r, r.size) # change to correspond with function
i = 0
iteration_table = [i]
function_table = [vector_norm(r, r.size)]
while vector_norm(r, r.size) > 10e-10:
print(x, 'at iteration', i, "norm of r is", vector_norm(r, r.size))
p = dogleg(x, r, J, trust_radius)
rho = ratio(x, J, p)
if rho < 0.25:
print('first')
trust_radius = 0.25*vector_norm(p,p.size)
elif rho > 0.75 and vector_norm(p,p.size) == trust_radius:
print('second')
trust_radius = min(2*trust_radius, max_trust)
else:
print('third')
trust_radius = trust_radius
if rho > eta:
print('x changed')
x = x + p
#r = test_function(x)
#J = Test_Jacobian(r, r.size)
else:
print('x did not change')
x = x
r = test_function(x) # change to correspond with the function you want
J = Test_Jacobian(r, r.size) # change to correspond with function
i = i + 1
#print(r)
#print(J)
#print(vector_norm(p,p.size))
print(rho)
#print(trust_radius)
iteration_table.append(i)
function_table.append(vector_norm(r,r.size))
print ('The solution to the non-linear equation is: ', x)
print ('This solution was obtained in ', i, 'iteratations')
plt.figure(figsize=(10,10))
plt.plot(iteration_table, np.log10(function_table))
plt.xlabel('iteration number')
plt.ylabel('function value')
plt.title('Semi-Log Plot for Convergence')
return x, iteration_table, function_table
def dogleg(x, r, J, trust_radius):
tau_k = min(1, vector_norm(J.transpose().dot(r), r.size)**3/(trust_radius*r.transpose().dot(J).dot(J.transpose().dot(J)).dot(J.transpose()).dot(r)))
p_c = -tau_k*(trust_radius/vector_norm(J.transpose().dot(r), r.size))*J.transpose().dot(r)
if vector_norm(p_c, p_c.size) == trust_radius:
print('using p_c')
p_k = p_c
else:
p_j = -np.linalg.inv(J.transpose().dot(J)).dot(J.transpose().dot(r))
print ('using p_j')
tau = tau_finder(x, p_c, p_j, trust_radius, r.size)
p_k = p_c + tau*(p_j-p_c)
return p_k
def ratio(x, J, p):
r = test_function(x)
r_p = test_function(x + p)
print (vector_norm(r, r.size)**2)
print (vector_norm(r_p, r_p.size)**2)
print (vector_norm(r + J.dot(p), r.size)**2)
rho_k =(vector_norm(r, r.size)**2 - vector_norm(r_p, r_p.size)**2)/(vector_norm(r, r.size)**2 - vector_norm(r + J.dot(p), r.size)**2)
return rho_k
def tau_finder(x, p_c, p_j, trust_radius, size):
a = 0
b = 0
c = 0
for i in range(0, size):
a = a + (p_j[i] - p_c[i])**2
b = b + 2*(p_j[i] - p_c[i])*(p_c[i] - x[i])
c = (p_c[i] - x[i])**2
c = c - trust_radius**2
tau_p = (-b + np.sqrt(b**2 - 4*a*c))/(2*a)
tau_m = (-b - np.sqrt(b**2 - 4*a*c))/(2*a)
#print(tau_p)
#print(tau_m)
if tau_p <= 1 and tau_p >=0:
return tau_p
elif tau_m <= 1 and tau_m >=0:
return tau_m
else:
print('error')
return 'error'
def model_function(p):
r = test_function(x)
J = Test_Jacobian(r, r.size)
return 0.5*vector_norm(r + J.dot(p), r.size)**2
The answer should be about [[1.57076525], [1. ]]
but here is the output after about 28-30 iterations:
ZeroDivisionError Traceback (most recent call last)
<ipython-input-359-a414711a1671> in <module>
1 x = create_point(2,1)
----> 2 Trust_Region(x)
<ipython-input-358-7cb77bd44d7b> in Trust_Region(x)
11 print(x, 'at iteration', i, "norm of r is", vector_norm(r, r.size))
12 p = dogleg(x, r, J, trust_radius)
---> 13 rho = ratio(x, J, p)
14
15 if rho < 0.25:
<ipython-input-358-7cb77bd44d7b> in ratio(x, J, p)
71 print (vector_norm(r_p, r_p.size)**2)
72 print (vector_norm(r + J.dot(p), r.size)**2)
---> 73 rho_k =(vector_norm(r, r.size)**2 - vector_norm(r_p, r_p.size)**2)/(vector_norm(r, r.size)**2 - vector_norm(r + J.dot(p), r.size)**2)
74 return rho_k
75
ZeroDivisionError: float division by zero
I could use a second set of eyes on my neural network.
This is the mnist number recognition project.
I'm not sure where the issue is.
I previously implemented the ai with tensor flow successfully.
I'm not looking to use an api as a solution.
I would appreciate any help anyone can give.
Here's the project on github, it's only an init file and then the neural_network.
https://github.com/nealchawn/ai_trial_2
class NeuralNetwork(object):
def __init__(self, sizes):
self.activations = []
self.outputs = []
self.weights = []
self.biases = []
self.sizes = sizes
self.set_random_weights()
self.set_random_biases()
def set_random_weights(self):
for layer_index, layer_size in enumerate(self.sizes[1:], start=1):
layer_weights = []
for size in range(layer_size):
for size in range(self.sizes[layer_index-1]):
layer_weights.append(random.uniform(-5.0, 5.0))
self.weights.append(layer_weights)
def set_random_biases(self):
total_biases = 0
# add extra zero bias to help future indexing
#self.biases.append(0)
for index, size in enumerate(self.sizes[0:-1], start=1):
total_biases += 1
for x in range(total_biases):
self.biases.append(random.uniform(-5.0, 5.0))
def train_network(self, training_data, training_labels):
if len(training_data) != len(training_labels):
print("Error data and labels must be the same length")
data = list(zip(training_data, training_labels))
self.sgd(data)
def sgd(self, data, mini_batch_size = 1000):
# first we'll create batches of training data
n = len(data)
data_batches = [
data[k:k + mini_batch_size]
for k in range(0, n, mini_batch_size)
]
print(len(data_batches))
i = 0
for mini_batch in data_batches:
print("Batch: " + str(i))
i += 1
self.update_mini_batch(mini_batch)
self.network_outputs()
print("Finished All training data!")
def update_mini_batch(self, mini_data_batch):
weight_gradients = []
bias_gradients = []
i = 0
for training_input in mini_data_batch:
training_object, training_label = training_input
self.feedforward(training_object)
weights_gradient, bias_gradient = self.backpropogation(training_label)
weight_gradients.append(weights_gradient)
bias_gradients.append(bias_gradient)
# average gradients
weights_gradient = np.average(weight_gradients,axis=0)
biases_gradient = np.average(bias_gradients, axis=0)
# may need to convert to list
weights_gradient_list = []
for weight_gradient in weights_gradient:
weights_gradient_list.append(weight_gradient.tolist())
#weights_gradient = weights_gradient.tolist()
biases_gradient = biases_gradient.tolist()
for x in range(len(self.biases)):
self.biases[x] -= 0.1*biases_gradient[x]
weight_gradient_index = 0
for layer_index, layer_weights in enumerate(self.weights, start=0):
for weight_index, weight in enumerate(layer_weights):
self.weights[layer_index][weight_index] = weight - 0.1*weights_gradient_list[layer_index][weight_index]
weight_gradient_index += 1
def feedforward(self, training_object):
# set inputs
self.outputs = []
self.activations = []
temp_activations = []
for index in range(self.sizes[0]):
temp_activations.append(training_object[index])
self.activations.append(temp_activations)
for layer_index, layer_size in enumerate(self.sizes[1:], start=0):
layer_weights = self.weights[layer_index]
layer_inputs = self.activations[layer_index]
weight_index = 0
layer_outputs = []
layer_activations = []
for node_index in range(layer_size):
node_weights = []
# get node weights
#print(f"layer size: {layer_size}, previous_layer_size: {self.sizes[layer_index]}, layer weights: {len(layer_weights)}")
for x in range(self.sizes[layer_index]):
node_weights.append(layer_weights[weight_index])
weight_index += 1
output = 0
for indx in range(len(node_weights)):
output += layer_inputs[indx]*node_weights[indx]
output = output + self.biases[layer_index]
layer_outputs.append(output)
layer_activations.append(self.sigmoid(output))
self.outputs.append(layer_outputs)
self.activations.append(layer_activations)
def backpropogation(self, training_label):
costs = []
output_layer_activations = self.activations[-1]
output_layer_outputs = self.outputs[-1]
correct_labels = self.translate_label_to_array(training_label)
costs.append(self.compute_cost_derivative(correct_labels, output_layer_activations))
for cost_index, cost in enumerate(costs[0]):
costs[0][cost_index] = cost*self.sigmoid_prime(output_layer_outputs[cost_index])
# calculate costs for layers
for layer_index, layer_size in enumerate(self.sizes[::-1][1:-1], start=1):
layer_costs = []
layer_weights = self.weights[-layer_index]
layer_outputs = self.outputs[-(layer_index+1)]
previous_layer_costs = costs[layer_index-1]
next_layer_size = self.sizes[::-1][1:][layer_index]
layer_weights_formatted = []
for x in range(layer_size):
layer_weights_formatted.append([])
for weight_index, weight in enumerate(layer_weights, start=0):
#print(f"weight index:{weight_index % next_layer_size} layer_index: {weight_index}")
layer_weights_formatted[weight_index%layer_size].append(layer_weights[weight_index])
#print(f"next_layer_size:{layer_size} costs: {len(previous_layer_costs)}, layer_weights_formatted: {layer_weights_formatted}")
for x in range(layer_size):
node_cost = 0
for y, cost in enumerate(previous_layer_costs,start=0):
node_cost += layer_weights_formatted[x][y]*cost
layer_costs.append(node_cost)
# layer_costs same order as next layer's activations
for cost_index, cost in enumerate(layer_costs):
layer_costs[cost_index] = cost * self.sigmoid_prime(layer_outputs[cost_index])
costs.append(layer_costs)
# calculate weight errors
weight_errors = []
bias_errors = []
for layer_index, layer_costs in enumerate(costs[::-1]):
layer_activations = self.activations[layer_index]
layer_weight_errors = []
for cost_index, cost in enumerate(layer_costs,start=0):
for activation in layer_activations:
layer_weight_errors.append(activation * cost)
weight_errors.append(np.array(layer_weight_errors))
bias_errors.append(sum(layer_costs))
return weight_errors, bias_errors
# conversion tool
def translate_label_to_array(self, y):
translated_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
translated_label[y] = 1
return np.array(translated_label)
# output tools
def network_outputs(self):
print("Output layer: ")
for x in range(self.sizes[-1]):
print("node " + str(x) + ": " + str(self.activations[-1][x]))
def total_activations(self):
print(len(self.activations))
def compute_cost_derivative(self, y, output_activations):
"""Return the vector of partial derivatives \partial C_x /
\partial a for the output activations."""
return (output_activations - y)
def sigmoid(self, z):
""""The sigmoid function."""
return (1.0 / (1.0 + np.exp(-z)))
def sigmoid_prime(self, z):
return (self.sigmoid(z) * (1 - self.sigmoid(z)))
I think I've split my training data in 5 kold, is there a way for me to label/identify each of the 5 splits so I can then send each into my algorithm to calculate their own accuracies?
from sklearn.model_selection import KFold
kf = KFold(n_splits=5)
splits=kf.get_n_splits(X_train)
print(splits)
Separately, I have also tried splitting my data to then run in my logistic regression but this outputs nan % accuracy:
X_train1 = X[0:84]
Y_train1 = Y[0:84]
X_train2 = X[85:170]
Y_train2 = Y[85:170]
X_train3 = X[171:255]
Y_train3 = Y[171:255]
X_train4 = X[256:340]
Y_train4 = Y[256:340]
X_train5 = X[341:426]
Y_train5 = Y[341:426]
def Sigmoid(z):
return 1/(1 + np.exp(-z))
def Hypothesis(theta, x):
return Sigmoid(x # theta)
def Cost_Function(X,Y,theta,m):
hi = Hypothesis(theta, x)
_y = Y.reshape(-1, 1)
J = 1/float(m) * np.sum(-_y * np.log(hi) - (1-_y) * np.log(1-hi))
return J
def Cost_Function_Regularisation(X,Y,theta,m,alpha):
hi = Hypothesis(theta,X)
_y = Y.reshape(-1, 1)
J = alpha/float(m) * X.T # (hi - _y)
return J
def Cost_Function_Regularisation(X,Y,theta,m,alpha):
hi = Hypothesis(theta,X)
_y = Y.reshape(-1, 1)
J = alpha/float(m) * X.T # (hi - _y)
return J
def Gradient_Descent(X,Y,theta,m,alpha):
new_theta = theta - Cost_Function_Regularisation(X,Y,theta,m,alpha)
return new_theta
def Accuracy(theta):
correct = 0
length = len(X_test)
prediction = (Hypothesis(theta, X_test) > 0.5)
_y = Y_test.reshape(-1, 1)
correct = prediction == _y
my_accuracy = (np.sum(correct) / length)*100
print ('LR Accuracy CV: ', my_accuracy, "%")
def Logistic_Regression(X,Y,alpha,theta,num_iters):
m = len(Y)
for x in range(num_iters):
new_theta = Gradient_Descent(X,Y,theta,m,alpha)
theta = new_theta
if x % 100 == 0:
print #('theta: ', theta)
print #('cost: ', Cost_Function(X,Y,theta,m))
Accuracy(theta)
ep = .012
initial_theta = np.random.rand(X_train.shape[1],1) * 2 * ep - ep
alpha = 0.5
iterations = 10000
Logistic_Regression(X_train1,Y_train1,alpha,initial_theta,iterations)
Logistic_Regression(X_train2,Y_train2,alpha,initial_theta,iterations)
Logistic_Regression(X_train3,Y_train3,alpha,initial_theta,iterations)
Logistic_Regression(X_train4,Y_train4,alpha,initial_theta,iterations)
Logistic_Regression(X_train5,Y_train5,alpha,initial_theta,iterations
get_n_splits returns the "number of splits" you configured for skf.
Look at the documentation here for an example : http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html