I have a neural network (NN) which works perfectly when applied to a single data set. However if I want to run the NN on, for example, one set of data and then create a new instance of the NN to run on different set of data (or even the same set again) then the new instance will produce completely incorrect predictions.
For example, training on an XOR pattern:
test=[[0,0],[0,1],[1,0],[1,1]]
data = [[[0,0], [0]],[[0,1], [0]],[[1,0], [0]],[[1,1], [1]]]
n = NN(2, 3, 1) # Create a neural network with 2 input, 3 hidden and 1 output nodes
n.train(data,500,0.5,0) # Train it for 500 iterations with learning rate 0.5 and momentum 0
prediction = np.zeros((len(test)))
for row in range(len(test)):
prediction[row] = n.runNetwork(test[row])[0]
print prediction
#
# Now do the same thing again but with a new instance and new version of the data.
#
test2=[[0,0],[0,1],[1,0],[1,1]]
data2 = [[[0,0], [0]],[[0,1], [0]],[[1,0], [0]],[[1,1], [1]]]
p = NN(2, 3, 1)
p.train(data2,500,0.5,0)
prediction2 = np.zeros((len(test2)))
for row in range(len(test2)):
prediction2[row] = p.runNetwork(test2[row])[0]
print prediction2
Will output:
[-0.01 -0. -0.06 0.97]
[ 0. 0. 1. 1.]
Notice that the first prediction is quite good where as the second is completely wrong, and I can't see anything wrong with the class:
import math
import random
import itertools
import numpy as np
random.seed(0)
def rand(a, b):
return (b-a)*random.random() + a
def sigmoid(x):
return math.tanh(x)
def dsigmoid(y):
return 1.0 - y**2
class NN:
def __init__(self, ni, nh, no):
# number of input, hidden, and output nodes
self.ni = ni + 1 # +1 for bias node
self.nh = nh + 1
self.no = no
# activations for nodes
self.ai = [1.0]*self.ni
self.ah = [1.0]*self.nh
self.ao = [1.0]*self.no
# create weights (rows=number of features, columns=number of processing nodes)
self.wi = np.zeros((self.ni, self.nh))
self.wo = np.zeros((self.nh, self.no))
# set them to random vaules
for i in range(self.ni):
for j in range(self.nh):
self.wi[i][j] = rand(-5, 5)
for j in range(self.nh):
for k in range(self.no):
self.wo[j][k] = rand(-5, 5)
# last change in weights for momentum
self.ci = np.zeros((self.ni, self.nh))
self.co = np.zeros((self.nh, self.no))
def runNetwork(self, inputs):
if len(inputs) != self.ni-1:
raise ValueError('wrong number of inputs')
# input activations
for i in range(self.ni-1):
#self.ai[i] = sigmoid(inputs[i])
self.ai[i] = inputs[i]
# hidden activations
for j in range(self.nh-1):
sum = 0.0
for i in range(self.ni):
sum = sum + self.ai[i] * self.wi[i][j]
self.ah[j] = sigmoid(sum)
# output activations
for k in range(self.no):
sum = 0.0
for j in range(self.nh):
sum = sum + self.ah[j] * self.wo[j][k]
self.ao[k] = sigmoid(sum)
ao_simplified = [round(a,2) for a in self.ao[:]]
return ao_simplified
def backPropagate(self, targets, N, M):
if len(targets) != self.no:
raise ValueError('wrong number of target values')
# calculate error terms for output
output_deltas = [0.0] * self.no
for k in range(self.no):
error = targets[k]-self.ao[k]
output_deltas[k] = dsigmoid(self.ao[k]) * error
# calculate error terms for hidden
hidden_deltas = [0.0] * self.nh
for j in range(self.nh):
error = 0.0
for k in range(self.no):
error = error + output_deltas[k]*self.wo[j][k]
hidden_deltas[j] = dsigmoid(self.ah[j]) * error
# update output weights
for j in range(self.nh):
for k in range(self.no):
change = output_deltas[k]*self.ah[j]
self.wo[j][k] = self.wo[j][k] + N*change + M*self.co[j][k]
self.co[j][k] = change
#print N*change, M*self.co[j][k]
# update input weights
for i in range(self.ni):
for j in range(self.nh):
change = hidden_deltas[j]*self.ai[i]
self.wi[i][j] = self.wi[i][j] + N*change + M*self.ci[i][j]
self.ci[i][j] = change
# calculate error
error = 0.0
for k in range(len(targets)):
error = error + 0.5*(targets[k]-self.ao[k])**2
return error
def train(self, patterns, iterations=1000, N=0.5, M=0.1):
# N: learning rate
# M: momentum factor
for i in range(iterations):
error = 0.0
for p in patterns:
inputs = p[0]
targets = p[1]
self.runNetwork(inputs)
error = error + self.backPropagate(targets, N, M)
if i % 100 == 0: # Prints error every 100 iterations
print('error %-.5f' % error)
Any help would be greatly appreciated!
Your error -- if there is one -- doesn't have anything to do with the class. As #Daniel Roseman suggested, the natural guess would be that it was a class/instance variable issue, or maybe a mutable default argument, or multiplication of a list, or something, the most common causes of mysterious behaviour.
Here, though, you're getting different results only because you're using different random numbers each time. If you random.seed(0) before you call NN(2,3,1), you get exactly the same results:
error 2.68110
error 0.44049
error 0.39256
error 0.26315
error 0.00584
[ 0.01 0.01 0.07 0.97]
error 2.68110
error 0.44049
error 0.39256
error 0.26315
error 0.00584
[ 0.01 0.01 0.07 0.97]
I can't judge whether your algorithm is right. Incidentally, I think your rand function is reinventing random.uniform.
Related
I was trying to make a neural network with 40 middle neurons.I want to initial Neuron class 40 times by loop and append each iteration to a list and then pass it to final node.but then I've got that when I pass a list like [neuron1,neuron2, ...] it works without any problem but when I pass a list that I've appended it in loop it throws RecursionError: maximum recursion depth exceeded. here is my network initial code:
W1 = Weight('w1', random_weight())
W2 = Weight('w2', random_weight())
neurons = [None] * 2
A = Neuron('A', [i0, i1], [ W1, W2])
neurons[0] = A
B = Neuron('B', [i0, i1], [W1, W2])
neurons[1] = B
out = Neuron('out', [A,B], [W1, W2])
this one works good. but below code has problem !
W1 = Weight('w1', random_weight())
W2 = Weight('w2', random_weight())
neurons = [None] * 2
A = Neuron('A', [i0, i1], [ W1, W2])
neurons[0] = A
B = Neuron('B', [i0, i1], [W1, W2])
neurons[1] = B
out = Neuron('out', neurons, [W1, W2])
here is my Neuron class implementation.
class Neuron(DifferentiableElement):
def __init__(self, name, inputs, input_weights, use_cache=True):
assert len(inputs)==len(input_weights)
for i in range(len(inputs)):
assert isinstance(inputs[i],(Neuron,Input))
assert isinstance(input_weights[i],Weight)
DifferentiableElement.__init__(self)
self.my_name = name
self.my_inputs = inputs # list of Neuron or Input instances
self.my_weights = input_weights # list of Weight instances
self.use_cache = use_cache
self.clear_cache()
self.my_descendant_weights = None
self.my_direct_weights = None
def output(self):
if self.use_cache:
if self.my_output is None:
self.my_output = self.compute_output()
return self.my_output
return self.compute_output()
def compute_output(self):
output = 0
inputs = self.get_inputs()
weights = self.get_weights()
for i in range(len(inputs)):
output += inputs[i].output() * weights[i].get_value()
output = 1 / (1 + math.exp(-1 * output))
return output
You call the output function every time you execute the compute_output function and so on. That's leading to a RecursionError: maximum recursion depth exceeded, becuase there is only a limited time a function can call another function at one direct call.
The exact spot in compute_output is:
def compute_output(self):
…
for i in range(len(inputs)):
output += inputs[i].output() * weights[i].get_value()
you call inputs[i].output() at this point
PS:
Difference between +/ += and append() is mentioned Here
Wrote a neural network in python, as a just for fun thing, want to get this working rather than use pre-existing packages which work easier/better.
I'm at this point only adjusting the bias of the output node with backpropagation. The adjustment looks something like:
bias -= (true value - output value) * (output node delta) * (learning rate)
this is done in the last line of the backprop function.
When run over a sample of data 20 times, the absolute error decreases then increases, and continues to increase indefinitely but at a decreasing rate. The error (true value - output value) is initially very negative, and increases with each successive iteration.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#getting sample data
df = pd.read_csv('City_of_Seattle_Staff_Demographics.csv')
df = df.sample(frac=0.1)
df = pd.get_dummies(df)
df = (df - df.min()) / (df.max() - df.min())
df.reset_index(inplace=True)
inputdata = np.array(df.drop(columns=['Hourly Rate', 'index'])) #index by inputs 2d array
outputdata = np.array(df[['Hourly Rate']]) #1 by index 2d array
#initialising variables
inn = len(inputdata[0]) #number of input nodes
hnn = 16 #number of hidden nodes
onn = len(outputdata[0]) #number of output nodes
inodes = np.empty((1, inn)) #value of input nodes
hi = np.empty((1, hnn)) #value of hidden nodes before logistic function is applied
oi = np.empty((1, onn)) #value of output nodes before logistic function is applied
ho = np.empty((1, hnn)) #value of hidden nodes after logistic function is applied
oo = np.empty((1, onn)) #value of output nodes after logistic function is applied
hdelta = np.empty((1, hnn)) #deltas of each node, given by delta(ho)
odelta = np.empty((1, onn)) #deltas of each node, given by delta(oo)
hbias = np.random.rand(1, hnn) #node biases
obias = np.random.rand(1, onn) #node biases
syn1 = np.random.rand(inn, hnn) #synapse layers
syn2 = np.random.rand(hnn, onn) #synapse layers
lrate = 0.01
error = 0.0
def sigmoid (x):
return 1/(1+np.exp(-x))
def delta (x):
return x*(1-x)
def forwardprop (index):
global inodes, hi, oi, ho, oo, hbias, obias, syn1, syn2
inodes = np.array([inputdata[index]])
hi = np.matmul(inodes, syn1) + hbias
ho = sigmoid(hi)
oi = np.matmul(ho, syn2) + obias
oo = sigmoid(oi)
def backprop (index):
#backprop is only trying to adjust the output node bias
global inodes, hi, oi, ho, oo, hbias, obias, syn1, syn2
oo = np.array([outputdata[index]]) - oo
odelta = delta(oo)
hdelta = delta(ho)
obias -= oo * odelta * lrate
def errorcalc ():
global onn, oo, error
for x in range(onn):
error += oo[0][x]
def fullprop (index):
forwardprop(index)
backprop(index)
errorcalc()
def fulliter (): #iterate over whole sample
global error
error = 0
for x in range(len(inputdata)):
fullprop(x)
print('error: ', error)
for x in range(20):
fulliter()
I'm expecting error to decrease in absolute value like:
-724, -267, -84, -21, 12, -10, 9, -7, ...
instead it is going something like this:
-724, -267, -84, -21, 33, 75, 114, 162, 227, 278, 316 ... 376, 378, 379, 380
I'm trying to create a network, that would help predict stock prices the following day. My input data are: open, high, low and close stock values, volume, index values, a few technical indicators and exchange rate; the output is closing price from the next day. I'm using data uploaded from Excel file.
I wrote a program, that I will paste below, but it doesn't seem to be working correctly. Network always returns 1, 0 or other constant value (between 0 - 1).
I took the following steps so far:
tried to normalise the data like so: X_norm = X/(10 ** d) where d is the smallest number for which this conditon is met: abs(X_norm) < 1. I did that for the whole set in Excel before dividing it into training and test.
shuffled the data before dividing it into training/test, so that learning examples are not from consecutive days
running the network on a smaller data set and on example data set (I generated random numbers and did a simple math using them for an output and tried running network with that)
changing amount of hidden neurons
chaninging number of iterations (up to a 1000, which was a lot for my computer considering the data set, so I didn't try any more because it would take too much time)
changing learning rate.
No matter what steps I took the outcome was always the same. I think my problem could be that I don't have a bias, but perhaps I also have other mistakes in my code that are contributing to this error.
My program:
import numpy as np
import pandas as pd
df = pd.read_excel(r"path", sheet_name="DATA", index_col=0, header=0)
df = df.to_numpy()
np.random.shuffle(df)
X_data = df[:, 0:15]
X_data = X_data.reshape(1000, 1, 15)
print(f"X_data: {X_data}")
Y_data = df[:, 15]
Y_data = Y_data.reshape(1000, 1, 1)
print(f"Y_data: {Y_data}")
X = X_data[0:801]
x_test = X_data[801:]
y = Y_data[0:801]
y_test = Y_data[801:]
print(f"X_train: {X}")
print(f"x_test: {x_test}")
print(f"Y_train: {y}")
print(f"y_test: {y_test}")
rate = 0.2
class NeuralNetwork:
def __init__(self):
self.input_neurons = 15
self.hidden1_neurons = 10
self.hidden2_neurons = 5
self.output_neuron = 1
self.input_to_hidden1_w = (np.random.random((self.input_neurons, self.hidden1_neurons))) # 14x30
self.hidden1_to_hidden2_w = (np.random.random((self.hidden1_neurons, self.hidden2_neurons))) # 30x20
self.hidden2_to_output_w = (np.random.random((self.hidden2_neurons, self.output_neuron))) # 20x1
def activation(self, x):
sigmoid = 1/(1+np.exp(-x))
return sigmoid
def activation_d(self, x):
derivative = x * (1 - x)
return derivative
def feed_forward(self, X):
self.z1 = np.dot(X, self.input_to_hidden1_w)
self.z1_a = self.activation(self.z1)
self.z2 = np.dot(self.z1_a, self.hidden1_to_hidden2_w)
self.z2_a = self.activation(self.z2)
self.z3 = np.dot(self.z2_a, self.hidden2_to_output_w)
output = self.activation(self.z3)
return output
def backward(self, X, y, rate, output):
error = y - output
z3_error_delta = error * self.activation_d(output)
z2_error = np.dot(z3_error_delta, np.transpose(self.hidden2_to_output_w))
z2_error_delta = z2_error * self.activation_d(self.z2)
z1_error = np.dot(z2_error_delta, np.transpose(self.hidden1_to_hidden2_w))
z1_error_delta = z1_error * self.activation_d(self.z1)
self.input_to_hidden1_w += rate * np.dot(np.transpose(X), z1_error_delta)
self.hidden1_to_hidden2_w += rate * np.dot(np.transpose(self.z1), z2_error_delta)
self.hidden2_to_output_w += rate * np.dot(np.transpose(self.z2), z3_error_delta)
def train(self, X, y):
output = self.feed_forward(X)
self.backward(X, y, rate, output)
def save_weights(self):
np.savetxt("w1.txt", self.input_to_hidden1_w, fmt="%s")
np.savetxt("w2.txt", self.hidden1_to_hidden2_w, fmt="%s")
np.savetxt("w3.txt", self.hidden2_to_output_w, fmt="%s")
def check(self, x_test, y_test):
self.feed_forward(x_test)
np.mean(np.square((y_test - self.feed_forward(x_test))))
Net = NeuralNetwork()
for l in range(100):
for i, pattern in enumerate(X):
for j, outcome in enumerate(y):
print(f"#: {l}")
print(f'''
# {str(l)}
# {str(X[i])}
# {str(y[j])}''')
print(f"Predicted output: {Net.feed_forward(X[i])}")
Net.train(X[i], y[j])
print(f"Error training: {(np.mean(np.square(y - Net.feed_forward(X))))}")
Net.save_weights()
for i, pattern in enumerate(x_test):
for j, outcome in enumerate(y_test):
Net.check(x_test[i], y_test[j])
print(f"Error test: {(np.mean(np.square(y_test - Net.feed_forward(x_test))))}")
I'm working around some neural network code. I've wrote my own neuron class, that could be find here. Now, I'm writing the Brain Class, that should sumarize most of code used in a NN. In this class, self.Real_Outputs collects all the outputs and put it into a list to be used after.
I'm boiling my brain to find out why when I add an element in self.Real_Outputs the whole list receive this value. I find here in this topic a discussion similar to mine, but, in my case i've already used 'self' statement. Could you guys help me on that?
class Brain:
def __init__(self, training_set, desired_outputs, bias, learning_tax):
self.Training_Set = training_set
self.Desired_Outputs = desired_outputs
self.Bias = bias
self.Learning_Tax = learning_tax
self.Hidden_Layer = []
self.Hidden_Layer_Outputs = []
self.Hidden_Layer_Errors = []
self.Output_Layer = []
self.Output_Layer_Outputs = []
self.Output_Layer_Errors = []
self.Real_Outputs = [0 for x in self.Desired_Outputs]
def set_hidden_layers(self, number_of_layers, number_of_neurons, activation_function):
self.Hidden_Layer = [[Neuron.Neuron(len(self.Training_Set[0]), activation_function, 1, self.Bias)
for x in range(number_of_neurons)]
for y in range(number_of_layers)]
self.Hidden_Layer_Outputs = [[0 for x in range(number_of_neurons)]
for y in range(number_of_layers)]
self.Hidden_Layer_Errors = [[0 for x in range(number_of_neurons)]
for y in range(number_of_layers)]
def set_output_layer(self, number_of_neurons, activation_function):
self.Output_Layer = [Neuron.Neuron(len(self.Hidden_Layer[0]), activation_function, 0, self.Bias)
for x in range(number_of_neurons)]
self.Output_Layer_Outputs = [0 for x in range(number_of_neurons)]
self.Output_Layer_Errors = [0 for x in range(number_of_neurons)]
def start_converging(self):
j=0
while j < 10:
# Here we're coming inside the training set. If was the n-th time
# you pass here, it's the n-th iteration over the Training Set.
# 'a' represents the Training Set index
for a in range(len(self.Training_Set)):
# Here we're running over the hidden layers
# 'b' represent the layer index
for b in range(len(self.Hidden_Layer)):
# Here we're running over the neurons in the layers
# 'c' represents the neuron index
for c in range(len(self.Hidden_Layer[b])):
if b == 0:
self.Hidden_Layer[b][c].initialize_inputs(self.Training_Set[a])
self.Hidden_Layer[b][c].get_sum()
self.Hidden_Layer_Outputs[b][c] = self.Hidden_Layer[b][c].get_output()
else:
self.Hidden_Layer[b][c].initialize_inputs(self.Hidden_Layer_Outputs[b-1])
self.Hidden_Layer[b][c].get_sum()
self.Hidden_Layer_Outputs[b][c] = self.Hidden_Layer[b][c].get_output()
# Here we're running over the output layer
# 'd' represents the neuron index
for d in range(len(self.Output_Layer)):
self.Output_Layer[d].initialize_inputs(self.Hidden_Layer_Outputs[-1])
self.Output_Layer[d].get_sum()
self.Output_Layer_Outputs[d] = self.Output_Layer[d].get_output()
self.Output_Layer_Errors[d] = self.Output_Layer[d].get_error(0, self.Desired_Outputs[a])
self.Output_Layer[d].update_weights(0, self.Learning_Tax)
self.Real_Outputs[a] = self.Output_Layer_Outputs
# We're updating the hidden layers now. Notice that we should pass backwards, from
# last to first, so, we're using [-(e+1)] indexes.
# '[-(e+1)]' represents the layers index.
for e in range(len(self.Hidden_Layer)):
for f in range(len(self.Hidden_Layer[-(e+1)])):
if e == 0:
self.Hidden_Layer_Errors[-(e + 1)][-(f + 1)] = self.Hidden_Layer[-(e + 1)][-(f + 1)].get_error(0, self.Output_Layer_Errors)
self.Hidden_Layer[-(e + 1)][-(f + 1)].update_weights(0, self.Learning_Tax)
else:
self.Hidden_Layer[-(e + 1)][-(f + 1)].get_error(0, self.Hidden_Layer_Errors[- (e + 1)])
self.Hidden_Layer[-(e + 1)][-(f + 1)].update_weights(0, self.Learning_Tax)
j += 1
print (self.Desired_Outputs)
print (self.Real_Outputs)
I'm new to the world of neural network, which is very interesting. I wrote the basic algorithm of backpropagation on multi-layer NN to solve small problems.
I use the activation function sigmoid (like most of you I think) (x->1/(1+exp(-x))).
I tried my program on several problems :
The first one is the XOR problem. I took a 3 layers network of size [2,2,1] with one bias neuron in the two first layers (so actually the size is more [3,3,1]).
It tried it with 1000 sets of data (i.e. a couple (0/1, 0/1) and its XOR as output), and the algorithm seemed to converge at an error of 0.5 :( I found it weird so i raised the number to 10000, and as it didn't change anything, to 100000 (in despair :p) and it WORKS ! The error fell down to less that 0.02 in average. Does anybody have an idea why it needs so much data to work?
The second one is the sum problem between two numbers (like 4+8 = ?). I took randomly a [2, 5, 5, 1] network with one bias neuron in the three first layers (so actually the size is more [3, 6, 6, 1]). I put a training data set of 100000 couples of numbers below 100 and their sum. This time, the error does not converge at all, while the output of the network always return the number 1. Have you already seen such situations ? Is it a bug code ? (code that i checked many times but perhaps).
import random
import math
class Network:
def initdata(self):
#weights initialization
self.weights.append([])
self.threshold.append([])
for l in range(1,len(self.layers)):
n = self.layers[l]
thresholdl = []
weightsl = []
for i in range(n):
thresholdl.append(-random.random())
weightsli = []
for j in range(self.layers[l-1]):
weightsli.append(random.random()*2-1)
#adding bias neurons
weightsli.append(thresholdl[-1])
weightsl.append(weightsli)
self.weights.append(weightsl)
self.threshold.append(thresholdl)
def __init__(self, layers):
self.layers = layers
self.weights = []
self.threshold = []
self.initdata()
def activation_function(self, x):
return 1/(1+math.exp(-x))
def outputlayer(self, input, l):
if l==0:
return [input]
output = []
prevoutput = self.outputlayer(input, l-1)
for i in range(self.layers[l]):
f = 0
for k in range(len(prevoutput[-1])):
f += self.weights[l][i][k]*prevoutput[-1][k]
f += self.weights[l][i][-1] #bias weight !
output.append(self.activation_function(f))
return prevoutput+[output]
def layersoutput(self, input):
return self.outputlayer(input, len(self.layers)-1)
def finaloutput(self, input):
return self.layersoutput(input)[-1]
def train(self, data, nu):
for (input, finaloutput) in data:
output = self.layersoutput(input)
err = self.errorvector(finaloutput, output[-1])
self.changeweights(err, output, nu)
def changeweights(self, err, output, nu):
deltas = []
for i in range(len(self.layers)):
deltas.append([])
tempweights = self.weights.copy()
def changeweightslayer(layer):
if layer != len(self.layers)-1:
changeweightslayer(layer+1)
for i in range(self.layers[layer]):
delta = 0
if layer != len(self.layers)-1:
delta = output[layer][i]*(1-output[layer][i])*sum([deltas[layer+1][l]*self.weights[layer+1][l][i] for l in range(self.layers[layer+1])])
else:
delta = output[layer][i]*(1-output[layer][i])*err[i]
deltas[layer].append(delta)
for k in range(len(self.weights[layer][i])-1):
tempweights[layer][i][k] += nu*output[layer-1][k]*delta
tempweights[layer][i][-1] += nu*delta
changeweightslayer(1)
self.weights = tempweights
def quadraticerror(self, a, b):
return sum([(a[i]-b[i])**2 for i in range(len(a))])
def errorvector(self, a, b):
return [a[i]-b[i] for i in range(len(a))]
network = Network([2, 5, 5, 1])
print(network.weights)
data = []
for i in range(1000000):
bit1 = random.randrange(100)
bit2 = random.randrange(100)
data.append(([float(bit1), float(bit2)], [float(bit1+bit2)]))
network.train(data, 0.1)
print(network.weights)