I try to do the basic ML. So here is my class of binary classificator perceptron.
class perceptron():
def __init__(self, x, y, threshold=0.5, learning_rate=0.1, max_epochs=10):
self.threshold = threshold
self.learning_rate = learning_rate
self.x = x
self.y = y
self.max_epochs = max_epochs
def initialize(self):
self.weights = np.random.rand(len(self.x[0]))
def train(self):
epoch = 0
while True:
error_count = 0
epoch += 1
for (x,y) in zip(self.x, self.y):
error_count += self.train_observation(x, y, error_count)
print('Epoch: {0} Error count: {1}'.format(epoch, error_count))
if error_count == 0:
print('Training successful')
break
if epoch >= self.max_epochs:
print('Reached max epochs')
break
def train_observation(self, x, y, error_count):
result = np.dot(x, self.weights) > self.threshold
error = y - result
if error != 0:
error_count += 1
for index, value in enumerate(x):
self.weights[index] += self.learning_rate * error * value
return error_count
def predict(self, x):
return int(np.dot(x, self.weights) > self.threshold)
I want to classify, if a sum of list values >=0 (means 1) or not(means 0)
so I do 50 arrays len 10, each has random int value [-3, 3]:
def sum01(x):
if sum(x) >= 0:
return 1
else:
return 0
x = np.random.randint(low=-3, high=3, size=(50,10))
y = [sum01(z) for z in a]
Then I initialize and train:
p = perceptron(x, y)
p.initialize()
p.train()
Then I check and a lot of predictions are not correct, what am I doing wrong?
predics = [(p.predict(i), sumab(i)) for i in np.random.randint(low=-3, high=3, size=(10, 10))]
print(predics)
Rerunning your code with small bug fixes, I see the loss reducing to 0 and correct outputs -
p = perceptron(x, y)
p.initialize()
p.train()
Epoch: 1 Error count: 196608
Epoch: 2 Error count: 38654836736
Epoch: 3 Error count: 268437504
Epoch: 4 Error count: 0
Training successful
predics = [(p.predict(i), sum01(i)) for i in np.random.randint(low=-3, high=3, size=(10, 10))]
print(predics)
[(1, 1), (0, 0), (0, 0), (0, 0), (1, 1), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0)]
SOLUTION
There are a few quick changes needed in your code -
While defining x and y:
x = np.random.randint(low=-3, high=3, size=(50,10))
y = [sum01(z) for z in x] #CHANGE THIS TO x INSTEAD OF a
While getting predictions:
#CHANGE sumab TO sum01
predics = [(p.predict(i), sum01(i)) for i in np.random.randint(low=-3, high=3, size=(10, 10))]
It should work then. Your complete code becomes -
class perceptron():
def __init__(self, x, y, threshold=0.5, learning_rate=0.1, max_epochs=10):
self.threshold = threshold
self.learning_rate = learning_rate
self.x = x
self.y = y
self.max_epochs = max_epochs
def initialize(self):
self.weights = np.random.rand(len(self.x[0]))
def train(self):
epoch = 0
while True:
error_count = 0
epoch += 1
for (x,y) in zip(self.x, self.y):
error_count += self.train_observation(x, y, error_count)
print('Epoch: {0} Error count: {1}'.format(epoch, error_count))
if error_count == 0:
print('Training successful')
break
if epoch >= self.max_epochs:
print('Reached max epochs')
break
def train_observation(self, x, y, error_count):
result = np.dot(x, self.weights) > self.threshold
error = y - result
if error != 0:
error_count += 1
for index, value in enumerate(x):
self.weights[index] += self.learning_rate * error * value
return error_count
def predict(self, x):
return int(np.dot(x, self.weights) > self.threshold)
def sum01(x):
if sum(x) >= 0:
return 1
else:
return 0
x = np.random.randint(low=-3, high=3, size=(50,10))
y = [sum01(z) for z in x]
p = perceptron(x, y)
p.initialize()
p.train()
predics = [(p.predict(i), sum01(i)) for i in np.random.randint(low=-3, high=3, size=(10, 10))]
print(predics)
Related
The error is saying this is not the case - your InputLayer object does not have an attribute Input but I don't know how to fix it thank you for your helps
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
import random
class MultiLayerPerceptron(BaseEstimator, ClassifierMixin):
def __init__(self, params=None):
if (params == None):
self.inputLayer = 14 # Input Layer
self.hiddenLayer = 100 # Hidden Layer
self.outputLayer = 2 # Outpuy Layer
self.learningRate = 0.005 # Learning rate
self.max_epochs = 600 # Epochs
self.iasHiddenValue = -1 # Bias HiddenLayer
self.BiasOutputValue = -1 # Bias OutputLayer
self.activation = self.ativacao['sigmoid'] # Activation function
self.deriv = self.derivada['sigmoid']
else:
self.inputLayer = params['InputLayer']
self.hiddenLayer = params['HiddenLayer']
self.OutputLayer = params['OutputLayer']
self.learningRate = params['LearningRate']
self.max_epochs = params['Epocas']
self.BiasHiddenValue = params['BiasHiddenValue']
self.BiasOutputValue = params['BiasOutputValue']
self.activation = self.ativacao[params['ActivationFunction']]
self.deriv = self.derivada[params['ActivationFunction']]
'Starting Bias and Weights'
self.WEIGHT_hidden = self.starting_weights(self.hiddenLayer, self.inputLayer)
self.WEIGHT_output = self.starting_weights(self.OutputLayer, self.hiddenLayer)
self.BIAS_hidden = np.array([self.BiasHiddenValue for i in range(self.hiddenLayer)])
self.BIAS_output = np.array([self.BiasOutputValue for i in range(self.OutputLayer)])
self.classes_number = 2
pass
def starting_weights(self, x, y):
return [[2 * random.random() - 1 for i in range(x)] for j in range(y)]
ativacao = {
'sigmoid': (lambda x: 1/(1 + np.exp(-x))),
'tanh': (lambda x: np.tanh(x)),
'Relu': (lambda x: x*(x > 0)),
}
derivada = {
'sigmoid': (lambda x: x*(1-x)),
'tanh': (lambda x: 1-x**2),
'Relu': (lambda x: 1 * (x>0))
}
def Backpropagation_Algorithm(self, x):
DELTA_output = []
'Stage 1 - Error: OutputLayer'
ERROR_output = self.output - self.OUTPUT_L2
DELTA_output = ((-1)*(ERROR_output) * self.deriv(self.OUTPUT_L2))
arrayStore = []
'Stage 2 - Update weights OutputLayer and HiddenLayer'
for i in range(self.hiddenLayer):
for j in range(self.OutputLayer):
self.WEIGHT_output[i][j] -= (self.learningRate * (DELTA_output[j] * self.OUTPUT_L1[i]))
self.BIAS_output[j] -= (self.learningRate * DELTA_output[j])
'Stage 3 - Error: HiddenLayer'
delta_hidden = np.matmul(self.WEIGHT_output, DELTA_output)* self.deriv(self.OUTPUT_L1)
'Stage 4 - Update weights HiddenLayer and InputLayer(x)'
for i in range(self.OutputLayer):
for j in range(self.hiddenLayer):
self.WEIGHT_hidden[i][j] -= (self.learningRate * (delta_hidden[j] * x[i]))
self.BIAS_hidden[j] -= (self.learningRate * delta_hidden[j])
def show_err_graphic(self,v_erro,v_epoca):
plt.figure(figsize=(4,14))
plt.plot(v_epoca, v_erro, "m-",color="b", marker=11)
plt.xlabel("Number of Epochs")
plt.ylabel("Squared error (MSE) ");
plt.title("Error Minimization")
plt.show()
def predict(self, X, y):
'Returns the predictions for every element of X'
my_predictions = []
'Forward Propagation'
forward = np.matmul(X,self.WEIGHT_hidden) + self.BIAS_hidden
forward = np.matmul(forward, self.WEIGHT_output) + self.BIAS_output
for i in forward:
my_predictions.append(max(enumerate(i), key=lambda x:x[1])[0])
array_score = []
for i in range(len(my_predictions)):
if my_predictions[i] == 0:
array_score.append([i, 'No', my_predictions[i], y[i]])
elif my_predictions[i] == 1:
array_score.append([i, 'Yes', my_predictions[i], y[i]])
dataframe = pd.DataFrame(array_score, columns=['_id', 'class', 'output', 'hoped_output'])
return my_predictions, dataframe
def fit(self, X, y):
count_epoch = 1
total_error = 0
n = len(X);
epoch_array = []
error_array = []
W0 = []
W1 = []
while(count_epoch <= self.max_epochs):
for idx,inputs in enumerate(X):
self.output = np.zeros(self.classes_number)
'Stage 1 - (Forward Propagation)'
self.OUTPUT_L1 = self.activation((np.dot(self.InputLayer, self.WEIGHT_hidden) + self.BIAS_hidden.T))
self.OUTPUT_L2 = self.activation((np.dot(self.OUTPUT_L1, self.WEIGHT_output) + self.BIAS_output.T))
'Stage 2 - One-Hot-Encoding'
if(y[idx] == 0):
self.output = np.array([1,0,0]) #Class1 {1,0,0}
elif(y[idx] == 1):
self.output = np.array([0,1,0]) #Class2 {0,1,0}
square_error = 0
for i in range(self.OutputLayer):
erro = (self.output[i] - self.OUTPUT_L2[i])**2
square_error = (square_error + (0.05 * erro))
total_error = total_error + square_error
'Backpropagation : Update Weights'
self.Backpropagation_Algorithm(inputs)
total_error = (total_error / n)
if((count_epoch % 50 == 0)or(count_epoch == 1)):
print("Epoch ", count_epoch, "- Total Error: ",total_error)
error_array.append(total_error)
epoch_array.append(count_epoch)
W0.append(self.WEIGHT_hidden)
W1.append(self.WEIGHT_output)
count_epoch += 1
self.show_err_graphic(error_array,epoch_array)
plt.plot(W0[0])
plt.title('Weight Hidden update during training')
plt.legend(['neuron1', 'neuron2', 'neuron3', 'neuron4', 'neuron5'])
plt.ylabel('Value Weight')
plt.show()
plt.plot(W1[0])
plt.title('Weight Output update during training')
plt.legend(['neuron1', 'neuron2', 'neuron3'])
plt.ylabel('Value Weight')
plt.show()
return self
dictionary = {'InputLayer':14, 'HiddenLayer':100, 'OutputLayer':2,
'Epocas':700, 'LearningRate':0.005,'BiasHiddenValue':-1,
'BiasOutputValue':-1, 'ActivationFunction':'sigmoid'}
Perceptron = MultiLayerPerceptron(dictionary)
Perceptron.fit(train_X,train_y)
AttributeError: 'MultiLayerPerceptron' object has no attribute 'InputLayer'
I want after splitting the data to work on MLP Classifier but I found this error "AttributeError: 'MultiLayerPerceptron' object has no attribute 'InputLayer'
I just started studying NN. In the class, the teacher gave us a code to experiment with, in google colab. I tried changing the number of nodes in each hidden layer and the number of hidden layers, and print out test accuracy and train accuracy. I've tried many configurations but the accuracies did not change. Like, it will stay exactly at 0.7857142857142857 (this is the actual number) unless I reshuffle the samples.
The teacher said that accuracy can't be changed that easily. But I don't believe her. I think there is something wrong with the code because there are too many similar digits.
Here are the codes I think are necessary to post.
Model
class Model():
def __init__(self):
self.layers = []
self.L = 0
self.W = {}
self.b = {}
self.A = {}
self.Z = {}
self.dA = {}
self.dZ = {}
self.dW = {}
self.db = {}
self.cost = 0.
self.m = 0
self.lam = 0
self.cost_history = []
self.acc_history = []
self.alpha_history = []
self.alpha = 0.
self.iterations = 0
def add_layers(self, list_of_layers):
self.layers = list_of_layers
self.L = len(self.layers) - 1 # Number of layers excluding the input feature layer
def init_params(self):
for i in range(1, self.L + 1):
self.W[str(i)] = np.random.randn(self.layers[i], self.layers[i - 1]) * np.sqrt(2. / self.layers[i - 1])
self.b[str(i)] = np.zeros((self.layers[i], 1))
def forward_prop(self, X):
self.A['0'] = X
for i in range(1, self.L + 1):
self.Z[str(i)] = np.dot(self.W[str(i)], self.A[str(i - 1)]) + self.b[str(i)]
if i == self.L:
# Output layer, Sigmoid activation
self.A[str(i)] = sigmoid(self.Z[str(i)])
else:
# Hidden layer, Relu activataion
self.A[str(i)] = relu(self.Z[str(i)])
def compute_cost(self, Y):
self.cost = -1 * np.sum(np.multiply(Y, np.log(self.A[str(self.L)])) +
np.multiply(1 - Y, np.log(1 - self.A[str(self.L)]))) / self.m
if self.lam != 0:
reg = (self.lam / (2 * self.m))
for i in range(1, self.L + 1):
reg += np.sum(np.dot(self.W[str(i)], self.W[str(i)].T))
self.cost += reg
self.cost_history.append(self.cost)
def backward_prop(self, Y):
# We need dA[str(L)] to start the backward prop computation
self.dA[str(self.L)] = -1 * (np.divide(Y, self.A[str(self.L)]) - np.divide(1 - Y, 1 - self.A[str(self.L)]))
self.dZ[str(self.L)] = np.multiply(self.dA[str(self.L)], sigmoid_derivative(self.Z[str(self.L)]))
self.dW[str(self.L)] = np.dot(self.dZ[str(self.L)], self.A[str(self.L - 1)].T) / self.m + (self.lam/self.m) * self.W[str(self.L)]
self.db[str(self.L)] = np.sum(self.dZ[str(self.L)], axis = 1, keepdims = True) / self.m
self.dA[str(self.L - 1)] = np.dot(self.W[str(self.L)].T, self.dZ[str(self.L)])
for i in reversed(range(1, self.L)):
self.dZ[str(i)] = np.multiply(self.dA[str(i)], relu_derivative(self.Z[str(i)]))
self.dW[str(i)] = np.dot(self.dZ[str(i)], self.A[str(i - 1)].T) / self.m + (self.lam/self.m) * self.W[str(i)]
self.db[str(i)] = np.sum(self.dZ[str(i)], axis = 1, keepdims = True) / self.m
self.dA[str(i - 1)] = np.dot(self.W[str(i)].T, self.dZ[str(i)])
def update_params(self):
for i in range(1, self.L + 1):
self.W[str(i)] = self.W[str(i)] - self.alpha * self.dW[str(i)]
self.b[str(i)] = self.b[str(i)] - self.alpha * self.db[str(i)]
def train(self, X, Y, iterations = 10,
alpha = 0.001, decay = True, decay_iter = 5, decay_rate = 0.9, stop_decay_counter = 100,
verbose = True, lam = 0):
self.m = Y.shape[1]
self.alpha = alpha
self.iterations = iterations
self.lam = lam
# initialize parameters
self.init_params()
for i in range(iterations):
# forward prop
self.forward_prop(X)
# compute cost
self.compute_cost(Y)
# backward prop
self.backward_prop(Y)
# update params
self.update_params()
# evaluate
self.acc_history.append(self.evaluate(X, Y, in_training = True))
# save alpha
self.alpha_history.append(self.alpha)
# learning rate decay
if decay and stop_decay_counter > 0 and i % decay_iter == 0:
self.alpha = decay_rate * self.alpha
stop_decay_counter -= 1
# display cost per iteration
if verbose:
print('Cost after {} iterations: {}'.format(i, self.cost))
def predict(self, X, in_training = False):
if in_training == False:
self.forward_prop(X)
preds = self.A[str(self.L)] >= 0.5
preds = np.squeeze(preds)
return preds
def evaluate(self, X, Y, in_training = False):
examples = X.shape[1]
pred = self.predict(X, in_training = in_training)
pred = pred.reshape(1, examples)
diff = np.sum(abs(pred - Y))
acc = (examples - np.sum(diff)) / examples
return acc
Dataset
import pandas as pd
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/haberman/haberman.data', sep = ',', header = None)
data.head()
X_train = data.iloc[:,:-1]
Y_train = data.iloc[:, -1]
X_train = np.array(X_train)
Y_train = np.array(Y_train)
Y_train = Y_train.reshape(Y_train.shape[0], 1)
mean = np.mean(X_train, axis = 0)
variance = np.var(X_train, axis = 0)
X_train = np.divide((X_train - mean), variance)
Y_train = Y_train - 1
# Changing label 1 to 0 and label 2 to 1
Split & Shuffle data
# Split the data into test and train sets
from sklearn.utils import shuffle
X_train, Y_train = shuffle(X_train, Y_train)
X_test = X_train[250:,:]
Y_test = Y_train[250:,:]
X_train_ = X_train[:250,:]
Y_train_ = Y_train[:250,:]
X_train_ = X_train_.reshape(3, 250)
Y_train_ = Y_train_.reshape(1, 250)
X_test = X_test.reshape(3, 56)
Y_test = Y_test.reshape(1, 56)
Creating a Model
m = Model()
m.add_layers([3, 16, 16, 1])
m.train(X_train_, Y_train_, iterations = 5000, alpha = 0.9
, decay_iter = 10, decay_rate = 0.98, stop_decay_counter = 100
, verbose = False, lam = 2)
Evaluate
print('Test set acc = ', m.evaluate(X_test, Y_test))
print('Train set acc = ', m.evaluate(X_train_, Y_train_))
What I did in the experiment.
Shuffle, train several models (different in number of nodes and hidden layers), and evaluate
# Model examples
m.add_layers([3, 16, 16, 1, 50, 3, 25, 7, 99, 1])
m.add_layers([3, 1, 55, 19, 2, 2, 1, 1, 2, 75, 80, 3, 12, 1])
Reshuffle, evaluate
Result: Every model has the exact same train and test accuracy unless the data is reshuffled.
The teacher told me that it's just my thought, and it's not true.
Could you please tell me what is wrong to get this result?
I want to implement a multi-layer perceptron.
I found some code on GitHub that classifies MNIST quite well (96%). However, for some reason, it does not cope with the XOR task.
I want to understand why.
Here is the code:
perceptron.py
import random
import numpy as np
class Perceptron:
def __init__(self, *, layer_sizes, activation_functions, cost_function_deriv):
self.layer_sizes = layer_sizes
if len(self.layer_sizes) - 1 != len(activation_functions):
raise ValueError("...")
self.activation_functions = activation_functions
self.cost_function_deriv = cost_function_deriv
self.biases = [np.random.randn(y, 1) for y in layer_sizes[1:]]
self.weights = [np.random.randn(y, x) for x, y in zip(layer_sizes[:-1], layer_sizes[1:])]
def train(self, training_data, test_data, epochs, mini_batch_size, lr):
test_data_len = len(test_data)
for epoch in range(epochs):
random.shuffle(training_data)
mini_batches = [training_data[x: x + mini_batch_size]
for x in range(0, len(training_data), mini_batch_size)]
for mini_batch in mini_batches:
mb_len = len(mini_batch)
gradient_weights = [np.zeros(w.shape) for w in self.weights]
gradient_biases = [np.zeros(b.shape) for b in self.biases]
for x, y in mini_batch:
delta_gradient_biases, delta_gradient_weights = self.backpropagation(np.array(x), y)
gradient_weights = [grad + delta for grad, delta in zip(gradient_weights, delta_gradient_weights)]
gradient_biases = [grad + delta for grad, delta in zip(gradient_biases, delta_gradient_biases)]
self.weights = [w - (lr / mb_len) * grad for w, grad in zip(self.weights, gradient_weights)]
self.biases = [b - (lr / mb_len) * grad for b, grad in zip(self.biases, gradient_biases)]
correct_answers = self.how_many_correct_answers(test_data)
print(f"Epoch number {epoch}: {correct_answers}/{test_data_len} correct answers")
def backpropagation(self, x, y):
gradient_b = [np.zeros(b.shape) for b in self.biases]
gradient_w = [np.zeros(w.shape) for w in self.weights]
activations = [x]
prev_activation = x
for i, (b, w) in enumerate(zip(self.biases, self.weights)):
current_activation = self.activation_functions[i](np.dot(w, prev_activation) + b)
activations.append(current_activation)
prev_activation = current_activation
delta = self.cost_function_deriv(activations[-1], y) * self.activation_functions[-1].deriv(activations[-1])
gradient_b[-1] = delta
gradient_w[-1] = np.dot(delta, activations[-2].T)
for i in range(2, len(self.layer_sizes)):
z = activations[-i]
act_der = self.activation_functions[-i + 1].deriv(z)
delta = np.dot(self.weights[-i + 1].T, delta) * act_der
gradient_b[-i] = delta
gradient_w[-i] = np.dot(delta, activations[-i - 1].T)
# Normal indexing variant:
# for i in range(len(self.layers) - 1, 0, -1):
# z = activations[i]
# act_der = self.activation_functions[i].deriv(z)
# delta = np.dot(self.weights[i].T, delta) * act_der
# gradient_b[i - 1] = delta
# gradient_w[i - 1] = np.dot(delta, activations[i - 1].T)
return gradient_b, gradient_w
def feedforward(self, a):
for i, (b, w) in enumerate(zip(self.biases, self.weights)):
a = self.activation_functions[i](np.dot(w, a) + b)
return a
def how_many_correct_answers(self, test_data):
k = 0
for x, y in test_data:
y_predict = np.argmax(self.feedforward(x))
print(y_predict, y)
k += int(y_predict == y)
return k
main.py
from copy import deepcopy
import numpy as np
from perceptron import Perceptron
class Sigmoid:
out_min_max = [0, 1]
def __call__(self, x):
return 1. / (1. + np.exp(-x))
def deriv(self, y):
# t = self(x)
# return t * (1. - t)
return y * (1. - y)
def cost_function_derivative(y_predict, y_true_label):
label_vector = np.zeros(y_predict.shape)
label_vector[y_true_label] = 1.0
return y_predict - label_vector
def main():
training_data = np.asarray([[[[0], [0]], 0],
[[[0], [1]], 1],
[[[1], [0]], 1],
[[[1], [1]], 0]])
layer_sizes = [2, 8, 2]
model = Perceptron(layer_sizes=layer_sizes,
activation_functions=[Sigmoid(), Sigmoid()],
cost_function_deriv=cost_function_derivative)
model.train(deepcopy(training_data),
deepcopy(training_data),
epochs=10000,
mini_batch_size=4,
lr=0.01)
if __name__ == '__main__':
main()
The final output in format 'y_predict y_true' (after each epoch):
0 0
0 1
0 1
0 0
If remove random.shuffle(training_data) then:
1 0
0 1
1 1
0 0
But not 0 1 1 0
I figured it out. It requires the following.
mini_batch_size=1
# random.shuffle(training_data) -- comment
epochs=10000
And it's better to do this:
lr=0.1
The result in most cases is obtained after ~1000 epochs:
0 0
1 1
1 1
0 0
I have tried constructing a neural network for the XOR-function. The network has 1 hidden layer with 2 nodes in addition to a bias node. The Sigmoid function is used as activation function. I have tested the network for multiple learning rates. The result is always the same: the network give the value 0.5 for all inputs. There are some other threads where people report the same problem, but as far as I can see, other mistakes were done in those cases.
The following code shows my network and the results.
import numpy as np
import matplotlib.pyplot as plt
class NN:
""" XOR function test. 1 hidden layer with 2 hidden nodes in addition to bias node."""
def __init__(self, nodeNumbers, learningRate, targetMatrix, inputMatrix, errorTolerance, \
maxIterations):
self.nodeNumbers, self.learningRate, self.targetMatrix, \
self.inputMatrix, self.errorTolerance, self.maxIterations = \
nodeNumbers, learningRate, targetMatrix, inputMatrix, errorTolerance, \
maxIterations
self.numberOfInputs = np.shape(self.inputMatrix)[1]
self.weightMatrices = []
for nodeNumber in range(len(nodeNumbers[1:])):
self.weightMatrices.append(np.random.random_sample((nodeNumbers[nodeNumber+1], \
nodeNumbers[nodeNumber]+1)).T - .5)
def activationFunction(self, x):
return 1./(1+np.exp(-x))
def derivative(self, weightedInputs):
return self.activationFunction(weightedInputs)*(1 - self.activationFunction(weightedInputs))
def run(self):
self.iterationNumber = 0
numberOfAdjustmentsDuringIteration = 1
while (self.iterationNumber < self.maxIterations and numberOfAdjustmentsDuringIteration != 0):
self.iterationNumber += 1
numberOfAdjustmentsDuringIteration = 0
for inputNumber in range(self.numberOfInputs):
self.inputs = self.inputMatrix[:,inputNumber]
self.targets = self.targetMatrix[inputNumber]
self.forward()
self.calculateError()
if abs(self.error2) > self.errorTolerance:
numberOfAdjustmentsDuringIteration +=1
self.backward()
print('Iterations: ', self.iterationNumber, '|Error|: ', self.error2)
def forward(self):
self.u1 = self.weightMatrices[0].T # self.inputMatrix.T[0,:]
z1 = self.activationFunction(self.u1)
self.z1 = np.concatenate([[-1], z1])
self.u2 = self.weightMatrices[1].T # self.z1
self.z2 = self.activationFunction(self.u2)
def calculateError(self):
self.error2 = (self.targets - self.z2)**2
def backward(self, inputs=False, targets=False):
self.delta2 = (self.z2 - self.targets)*self.derivative(self.u2)
delta11 = self.derivative(self.u1[0])*self.delta2* self.weightMatrices[1][0]
delta12 = self.derivative(self.u1[1])*self.delta2* self.weightMatrices[1][1]
self.delta1 = np.concatenate([delta11, delta12])
self.weightMatrices[1][0,0] -= self.learningRate*self.delta2*self.z1[0]
self.weightMatrices[1][1,0] -= self.learningRate*self.delta2*self.z1[1]
self.weightMatrices[1][2,0] -= self.learningRate*self.delta2*self.z1[2]
self.weightMatrices[0][0,0] -= self.learningRate*self.delta1[0]*self.inputs[0]
self.weightMatrices[0][1,0] -= self.learningRate*self.delta1[0]*self.inputs[1]
self.weightMatrices[0][0,1] -= self.learningRate*self.delta1[1]*self.inputs[0]
self.weightMatrices[0][1,1] -= self.learningRate*self.delta1[1]*self.inputs[1]
def predict(self, newInput):
self.inputs = newInput
self.forward()
print('Input: ', newInput, 'Predicted output: ', self.z2)
nodeNumbers = [2,2,1]
activationFunction = activationFunction
derivative = differentiateActivationFunction
learningRate = 0.3
targetMatrix = np.array(((0), (1), (1), (0))).T
inputMatrix = np.array(((-1,0, 0), (-1, 0, 1), (-1,1, 0), (-1,1,1))).T
errorTolerance = 1e-3
maxIterations= 500
nn=NN(nodeNumbers, learningRate, targetMatrix, inputMatrix, errorTolerance, maxIterations)
nn.run()
The results from the above
Iterations: 500 |Error|: [0.26341771]
Making predictions
inputs = np.array(((-1,0, 0), (-1, 0, 1), (-1,1, 0), (-1,1,1))).T
for inp in inputs:
nn.predict(inp)
The results
Input: [-1 0 0] Predicted output: [0.49987204]
Input: [-1 0 1] Predicted output: [0.49987204]
Input: [-1 1 0] Predicted output: [0.49987204]
Input: [-1 1 1] Predicted output: [0.49987204]
Does anybody spot any errors?
I think I've split my training data in 5 kold, is there a way for me to label/identify each of the 5 splits so I can then send each into my algorithm to calculate their own accuracies?
from sklearn.model_selection import KFold
kf = KFold(n_splits=5)
splits=kf.get_n_splits(X_train)
print(splits)
Separately, I have also tried splitting my data to then run in my logistic regression but this outputs nan % accuracy:
X_train1 = X[0:84]
Y_train1 = Y[0:84]
X_train2 = X[85:170]
Y_train2 = Y[85:170]
X_train3 = X[171:255]
Y_train3 = Y[171:255]
X_train4 = X[256:340]
Y_train4 = Y[256:340]
X_train5 = X[341:426]
Y_train5 = Y[341:426]
def Sigmoid(z):
return 1/(1 + np.exp(-z))
def Hypothesis(theta, x):
return Sigmoid(x # theta)
def Cost_Function(X,Y,theta,m):
hi = Hypothesis(theta, x)
_y = Y.reshape(-1, 1)
J = 1/float(m) * np.sum(-_y * np.log(hi) - (1-_y) * np.log(1-hi))
return J
def Cost_Function_Regularisation(X,Y,theta,m,alpha):
hi = Hypothesis(theta,X)
_y = Y.reshape(-1, 1)
J = alpha/float(m) * X.T # (hi - _y)
return J
def Cost_Function_Regularisation(X,Y,theta,m,alpha):
hi = Hypothesis(theta,X)
_y = Y.reshape(-1, 1)
J = alpha/float(m) * X.T # (hi - _y)
return J
def Gradient_Descent(X,Y,theta,m,alpha):
new_theta = theta - Cost_Function_Regularisation(X,Y,theta,m,alpha)
return new_theta
def Accuracy(theta):
correct = 0
length = len(X_test)
prediction = (Hypothesis(theta, X_test) > 0.5)
_y = Y_test.reshape(-1, 1)
correct = prediction == _y
my_accuracy = (np.sum(correct) / length)*100
print ('LR Accuracy CV: ', my_accuracy, "%")
def Logistic_Regression(X,Y,alpha,theta,num_iters):
m = len(Y)
for x in range(num_iters):
new_theta = Gradient_Descent(X,Y,theta,m,alpha)
theta = new_theta
if x % 100 == 0:
print #('theta: ', theta)
print #('cost: ', Cost_Function(X,Y,theta,m))
Accuracy(theta)
ep = .012
initial_theta = np.random.rand(X_train.shape[1],1) * 2 * ep - ep
alpha = 0.5
iterations = 10000
Logistic_Regression(X_train1,Y_train1,alpha,initial_theta,iterations)
Logistic_Regression(X_train2,Y_train2,alpha,initial_theta,iterations)
Logistic_Regression(X_train3,Y_train3,alpha,initial_theta,iterations)
Logistic_Regression(X_train4,Y_train4,alpha,initial_theta,iterations)
Logistic_Regression(X_train5,Y_train5,alpha,initial_theta,iterations
get_n_splits returns the "number of splits" you configured for skf.
Look at the documentation here for an example : http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedKFold.html