Neural network XOR always gives 0.5 - python

I have tried constructing a neural network for the XOR-function. The network has 1 hidden layer with 2 nodes in addition to a bias node. The Sigmoid function is used as activation function. I have tested the network for multiple learning rates. The result is always the same: the network give the value 0.5 for all inputs. There are some other threads where people report the same problem, but as far as I can see, other mistakes were done in those cases.
The following code shows my network and the results.
import numpy as np
import matplotlib.pyplot as plt
class NN:
""" XOR function test. 1 hidden layer with 2 hidden nodes in addition to bias node."""
def __init__(self, nodeNumbers, learningRate, targetMatrix, inputMatrix, errorTolerance, \
maxIterations):
self.nodeNumbers, self.learningRate, self.targetMatrix, \
self.inputMatrix, self.errorTolerance, self.maxIterations = \
nodeNumbers, learningRate, targetMatrix, inputMatrix, errorTolerance, \
maxIterations
self.numberOfInputs = np.shape(self.inputMatrix)[1]
self.weightMatrices = []
for nodeNumber in range(len(nodeNumbers[1:])):
self.weightMatrices.append(np.random.random_sample((nodeNumbers[nodeNumber+1], \
nodeNumbers[nodeNumber]+1)).T - .5)
def activationFunction(self, x):
return 1./(1+np.exp(-x))
def derivative(self, weightedInputs):
return self.activationFunction(weightedInputs)*(1 - self.activationFunction(weightedInputs))
def run(self):
self.iterationNumber = 0
numberOfAdjustmentsDuringIteration = 1
while (self.iterationNumber < self.maxIterations and numberOfAdjustmentsDuringIteration != 0):
self.iterationNumber += 1
numberOfAdjustmentsDuringIteration = 0
for inputNumber in range(self.numberOfInputs):
self.inputs = self.inputMatrix[:,inputNumber]
self.targets = self.targetMatrix[inputNumber]
self.forward()
self.calculateError()
if abs(self.error2) > self.errorTolerance:
numberOfAdjustmentsDuringIteration +=1
self.backward()
print('Iterations: ', self.iterationNumber, '|Error|: ', self.error2)
def forward(self):
self.u1 = self.weightMatrices[0].T # self.inputMatrix.T[0,:]
z1 = self.activationFunction(self.u1)
self.z1 = np.concatenate([[-1], z1])
self.u2 = self.weightMatrices[1].T # self.z1
self.z2 = self.activationFunction(self.u2)
def calculateError(self):
self.error2 = (self.targets - self.z2)**2
def backward(self, inputs=False, targets=False):
self.delta2 = (self.z2 - self.targets)*self.derivative(self.u2)
delta11 = self.derivative(self.u1[0])*self.delta2* self.weightMatrices[1][0]
delta12 = self.derivative(self.u1[1])*self.delta2* self.weightMatrices[1][1]
self.delta1 = np.concatenate([delta11, delta12])
self.weightMatrices[1][0,0] -= self.learningRate*self.delta2*self.z1[0]
self.weightMatrices[1][1,0] -= self.learningRate*self.delta2*self.z1[1]
self.weightMatrices[1][2,0] -= self.learningRate*self.delta2*self.z1[2]
self.weightMatrices[0][0,0] -= self.learningRate*self.delta1[0]*self.inputs[0]
self.weightMatrices[0][1,0] -= self.learningRate*self.delta1[0]*self.inputs[1]
self.weightMatrices[0][0,1] -= self.learningRate*self.delta1[1]*self.inputs[0]
self.weightMatrices[0][1,1] -= self.learningRate*self.delta1[1]*self.inputs[1]
def predict(self, newInput):
self.inputs = newInput
self.forward()
print('Input: ', newInput, 'Predicted output: ', self.z2)
nodeNumbers = [2,2,1]
activationFunction = activationFunction
derivative = differentiateActivationFunction
learningRate = 0.3
targetMatrix = np.array(((0), (1), (1), (0))).T
inputMatrix = np.array(((-1,0, 0), (-1, 0, 1), (-1,1, 0), (-1,1,1))).T
errorTolerance = 1e-3
maxIterations= 500
nn=NN(nodeNumbers, learningRate, targetMatrix, inputMatrix, errorTolerance, maxIterations)
nn.run()
The results from the above
Iterations: 500 |Error|: [0.26341771]
Making predictions
inputs = np.array(((-1,0, 0), (-1, 0, 1), (-1,1, 0), (-1,1,1))).T
for inp in inputs:
nn.predict(inp)
The results
Input: [-1 0 0] Predicted output: [0.49987204]
Input: [-1 0 1] Predicted output: [0.49987204]
Input: [-1 1 0] Predicted output: [0.49987204]
Input: [-1 1 1] Predicted output: [0.49987204]
Does anybody spot any errors?

Related

Deep SARSA, Agent does not work agent after training

I have a few questions.
First of all, I am currently working on Ubuntu 18.04LTS, the python version is 3.5.6 and the keras version is 2.0.3.
What I have a problem with is that the algorithm uses the Deep SARSA algorithm, and if you change it to another env after learning, it acts like an unlearned model.
I will ask about these and attach the code as below.
deep_sarsa_agent.py
import copy
import pylab
import random
import numpy as np
from environment1 import Env1
from keras.layers import Dense
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dropout
from keras import backend as K
import time
EPISODES = 610
# this is DeepSARSA Agent for the GridWorld
# Utilize Neural Network as q function approximator
class DeepSARSAgent:
def __init__(self):
self.load_model = False
# actions which agent can do
self.action_space = [0, 1, 2, 3, 4]
# get size of state and action
self.action_size = len(self.action_space)
self.state_size = 39
self.discount_factor = 0.99
self.learning_rate = 0.001
self.epsilon = 1. # exploration
self.epsilon_decay = .9999
self.epsilon_min = 0.01
self.model = self.build_model()
if self.load_model:
self.epsilon = 0.99
self.model.load_weights('./save_model/deep_sarsa.h5')
# approximate Q function using Neural Network
# state is input and Q Value of each action is output of network
def build_model(self):
model = Sequential()
model.add(Dense(40, input_dim=self.state_size, activation='relu'))
model.add(Dense(40, activation='relu'))
model.add(Dense(self.action_size, activation='linear'))
model.summary()
model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
return model
# get action from model using epsilon-greedy policy
def get_action(self, state):
if np.random.rand() <= self.epsilon:
# The agent acts randomly
return random.randrange(self.action_size)
else:
# Predict the reward value based on the given state
state = np.float32(state)
q_values = self.model.predict(state)
return np.argmax(q_values[0])
def train_model(self, state, action, reward, next_state, next_action, done):
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
state = np.float32(state)
next_state = np.float32(next_state)
target = self.model.predict(state)[0]
# like Q Learning, get maximum Q value at s'
# But from target model
if done:
target[action] = reward
else:
target[action] = (reward + self.discount_factor *
self.model.predict(next_state)[0][next_action])
target = np.reshape(target, [1, 5])
# make minibatch which includes target q value and predicted q value
# and do the model fit!
self.model.fit(state, target, epochs=1, verbose=0)
if __name__ == "__main__":
env = Env1()
agent = DeepSARSAgent()
global_step = 0
local_step = 0
scores, episodes, local_steps = [], [], []
x = 0
for e in range(EPISODES):
done = False
score = 0
state = env.reset()
state = np.reshape(state, [1, 39])
while not done:
# fresh env
global_step += 1
local_step += 1
# get action for the current state and go one step in environment
action = agent.get_action(state)
next_state, reward, done = env.step(action)
next_state = np.reshape(next_state, [1, 39])
next_action = agent.get_action(next_state)
agent.train_model(state, action, reward, next_state, next_action,
done)
state = next_state
# every time step we do training
score += reward
state = copy.deepcopy(next_state)
if done:
scores.append(score)
episodes.append(e)
local_steps.append(local_step)
pylab.plot(episodes, scores, 'b', label='scores')
pylab.plot(episodes, local_steps, 'r', label = 'local_step')
pylab.savefig("./save_graph/env case 10/1.png")
print("episode:", e, " score:", score, "global_step",
global_step, " epsilon:", agent.epsilon)
local_step = 0
if local_step >= 50 and e >= 200:
done = True
local_step = 0
if e % 100 == 0:
agent.model.save_weights("./save_model/deep_sarsa.h5")
environment1.py
import time
import numpy as np
import tkinter as tk
from PIL import ImageTk, Image
PhotoImage = ImageTk.PhotoImage
UNIT = 50 # pixels
HEIGHT = 10 # grid height
WIDTH = 10 # grid width
np.random.seed(1)
class Env1(tk.Tk):
def __init__(self):
super(Env1, self).__init__()
self.action_space = ['u', 'd', 'l', 'r']
self.action_size = len(self.action_space)
self.title('DeepSARSA')
self.geometry('{0}x{1}'.format(HEIGHT * UNIT, HEIGHT * UNIT))
self.shapes = self.load_images()
self.canvas = self._build_canvas()
self.counter = 0
self.local_step = 0
self.rewards = []
self.goal = []
# obstacle
self.set_reward([3, 0], -1)
self.set_reward([0, 1], -1)
self.set_reward([6, 2], -1)
self.set_reward([4, 3], -1)
self.set_reward([5, 5], -1)
self.set_reward([0, 4], -1)
self.set_reward([1, 9], -1)
self.set_reward([8, 1], -1)
self.set_reward([2, 0], -1)
# #goal
self.set_reward([9, 9], 1)
def _build_canvas(self):
canvas = tk.Canvas(self, bg='white',
height=HEIGHT * UNIT,
width=WIDTH * UNIT)
# create grids
for c in range(0, WIDTH * UNIT, UNIT): # 0~400 by 80
x0, y0, x1, y1 = c, 0, c, HEIGHT * UNIT
canvas.create_line(x0, y0, x1, y1)
for r in range(0, HEIGHT * UNIT, UNIT): # 0~400 by 80
x0, y0, x1, y1 = 0, r, HEIGHT * UNIT, r
canvas.create_line(x0, y0, x1, y1)
self.rewards = []
self.goal = []
# add image to canvas
x, y = UNIT/2, UNIT/2
self.rectangle = canvas.create_image(x, y, image=self.shapes[0])
# pack all`
canvas.pack()
return canvas
def load_images(self):
rectangle = PhotoImage(
Image.open("./img/rectangle.png").resize((30, 30)))
triangle = PhotoImage(
Image.open("./img/triangle.png").resize((30, 30)))
circle = PhotoImage(
Image.open("./img/circle.png").resize((30, 30)))
return rectangle, triangle, circle
def reset_reward(self):
for reward in self.rewards:
self.canvas.delete(reward['figure'])
self.rewards.clear()
self.goal.clear()
self.set_reward([3, 0], -1)
self.set_reward([0, 1], -1)
self.set_reward([6, 2], -1)
self.set_reward([4, 3], -1)
self.set_reward([5, 5], -1)
self.set_reward([0, 4], -1)
self.set_reward([1, 9], -1)
self.set_reward([8, 1], -1)
self.set_reward([2, 0], -1)
# #goal
self.set_reward([9, 9], 1)
def set_reward(self, state, reward):
state = [int(state[0]), int(state[1])]
x = int(state[0])
y = int(state[1])
temp = {}
if reward > 0:
temp['reward'] = reward
temp['figure'] = self.canvas.create_image((UNIT * x) + UNIT / 2,
(UNIT * y) + UNIT / 2,
image=self.shapes[2])
self.goal.append(temp['figure'])
elif reward < 0:
temp['direction'] = -1
temp['reward'] = reward
temp['figure'] = self.canvas.create_image((UNIT * x) + UNIT / 2,
(UNIT * y) + UNIT / 2,
image=self.shapes[1])
temp['coords'] = self.canvas.coords(temp['figure'])
temp['state'] = state
self.rewards.append(temp)
# new methods
def check_if_reward(self, state):
check_list = dict()
check_list['if_goal'] = False
rewards = 0
for reward in self.rewards:
if reward['state'] == state:
rewards += reward['reward']
if reward['reward'] == 1:
check_list['if_goal'] = True
check_list['rewards'] = rewards
return check_list
def coords_to_state(self, coords):
x = int((coords[0] - UNIT / 2) / UNIT)
y = int((coords[1] - UNIT / 2) / UNIT)
return [x, y]
def reset(self):
self.update()
time.sleep(0.5)
x, y = self.canvas.coords(self.rectangle)
self.canvas.move(self.rectangle, UNIT / 2 - x, UNIT / 2 - y)
# return observation
self.reset_reward()
return self.get_state()
def step(self, action):
self.counter += 1
self.render()
self.local_step += 1
#if self.counter % 2 == 1:
# self.rewards = self.move_rewards()
next_coords = self.move(self.rectangle, action)
check = self.check_if_reward(self.coords_to_state(next_coords))
done = check['if_goal']
reward = check['rewards']
self.canvas.tag_raise(self.rectangle)
s_ = self.get_state()
return s_, reward, done
def get_state(self):
location = self.coords_to_state(self.canvas.coords(self.rectangle))
agent_x = location[0]
agent_y = location[1]
states = list()
#locations.append(agent_x)
#locations.append(agent_y)
for reward in self.rewards:
reward_location = reward['state']
states.append(reward_location[0] - agent_x)
states.append(reward_location[1] - agent_y)
if reward['reward'] < 0:
states.append(-1)
states.append(reward['direction'])
else:
states.append(1)
return states
def move_rewards(self):
new_rewards = []
for temp in self.rewards:
if temp['reward'] == 1:
new_rewards.append(temp)
continue
temp['coords'] = self.move_const(temp)
temp['state'] = self.coords_to_state(temp['coords'])
new_rewards.append(temp)
return new_rewards
def move_const(self, target):
s = self.canvas.coords(target['figure'])
base_action = np.array([0, 0])
if s[0] == (WIDTH - 1) * UNIT + UNIT / 2:
target['direction'] = 1
elif s[0] == UNIT / 2:
target['direction'] = -1
if target['direction'] == -1:
base_action[0] += UNIT
elif target['direction'] == 1:
base_action[0] -= UNIT
if (target['figure'] is not self.rectangle
and s == [(WIDTH - 1) * UNIT, (HEIGHT - 1) * UNIT]):
base_action = np.array([0, 0])
self.canvas.move(target['figure'], base_action[0], base_action[1])
s_ = self.canvas.coords(target['figure'])
return s_
def move(self, target, action):
s = self.canvas.coords(target)
base_action = np.array([0, 0])
if action == 0: # up
if s[1] > UNIT:
base_action[1] -= UNIT
elif action == 1: # down
if s[1] < (HEIGHT - 1) * UNIT:
base_action[1] += UNIT
elif action == 2: # right
if s[0] < (WIDTH - 1) * UNIT:
base_action[0] += UNIT
elif action == 3: # left
if s[0] > UNIT:
base_action[0] -= UNIT
self.canvas.move(target, base_action[0], base_action[1])
s_ = self.canvas.coords(target)
return s_
def render(self):
time.sleep(0.07)
self.update()
As a result of thinking about them, it seemed to be biased toward only one env, so I learned an additional nine other envs, but the same result occurred.
In addition, learning may not be possible if it exceeds a certain interval during learning.

Neural Network: Different number of nodes and hidden layers, but exact same test & train accuracy

I just started studying NN. In the class, the teacher gave us a code to experiment with, in google colab. I tried changing the number of nodes in each hidden layer and the number of hidden layers, and print out test accuracy and train accuracy. I've tried many configurations but the accuracies did not change. Like, it will stay exactly at 0.7857142857142857 (this is the actual number) unless I reshuffle the samples.
The teacher said that accuracy can't be changed that easily. But I don't believe her. I think there is something wrong with the code because there are too many similar digits.
Here are the codes I think are necessary to post.
Model
class Model():
def __init__(self):
self.layers = []
self.L = 0
self.W = {}
self.b = {}
self.A = {}
self.Z = {}
self.dA = {}
self.dZ = {}
self.dW = {}
self.db = {}
self.cost = 0.
self.m = 0
self.lam = 0
self.cost_history = []
self.acc_history = []
self.alpha_history = []
self.alpha = 0.
self.iterations = 0
def add_layers(self, list_of_layers):
self.layers = list_of_layers
self.L = len(self.layers) - 1 # Number of layers excluding the input feature layer
def init_params(self):
for i in range(1, self.L + 1):
self.W[str(i)] = np.random.randn(self.layers[i], self.layers[i - 1]) * np.sqrt(2. / self.layers[i - 1])
self.b[str(i)] = np.zeros((self.layers[i], 1))
def forward_prop(self, X):
self.A['0'] = X
for i in range(1, self.L + 1):
self.Z[str(i)] = np.dot(self.W[str(i)], self.A[str(i - 1)]) + self.b[str(i)]
if i == self.L:
# Output layer, Sigmoid activation
self.A[str(i)] = sigmoid(self.Z[str(i)])
else:
# Hidden layer, Relu activataion
self.A[str(i)] = relu(self.Z[str(i)])
def compute_cost(self, Y):
self.cost = -1 * np.sum(np.multiply(Y, np.log(self.A[str(self.L)])) +
np.multiply(1 - Y, np.log(1 - self.A[str(self.L)]))) / self.m
if self.lam != 0:
reg = (self.lam / (2 * self.m))
for i in range(1, self.L + 1):
reg += np.sum(np.dot(self.W[str(i)], self.W[str(i)].T))
self.cost += reg
self.cost_history.append(self.cost)
def backward_prop(self, Y):
# We need dA[str(L)] to start the backward prop computation
self.dA[str(self.L)] = -1 * (np.divide(Y, self.A[str(self.L)]) - np.divide(1 - Y, 1 - self.A[str(self.L)]))
self.dZ[str(self.L)] = np.multiply(self.dA[str(self.L)], sigmoid_derivative(self.Z[str(self.L)]))
self.dW[str(self.L)] = np.dot(self.dZ[str(self.L)], self.A[str(self.L - 1)].T) / self.m + (self.lam/self.m) * self.W[str(self.L)]
self.db[str(self.L)] = np.sum(self.dZ[str(self.L)], axis = 1, keepdims = True) / self.m
self.dA[str(self.L - 1)] = np.dot(self.W[str(self.L)].T, self.dZ[str(self.L)])
for i in reversed(range(1, self.L)):
self.dZ[str(i)] = np.multiply(self.dA[str(i)], relu_derivative(self.Z[str(i)]))
self.dW[str(i)] = np.dot(self.dZ[str(i)], self.A[str(i - 1)].T) / self.m + (self.lam/self.m) * self.W[str(i)]
self.db[str(i)] = np.sum(self.dZ[str(i)], axis = 1, keepdims = True) / self.m
self.dA[str(i - 1)] = np.dot(self.W[str(i)].T, self.dZ[str(i)])
def update_params(self):
for i in range(1, self.L + 1):
self.W[str(i)] = self.W[str(i)] - self.alpha * self.dW[str(i)]
self.b[str(i)] = self.b[str(i)] - self.alpha * self.db[str(i)]
def train(self, X, Y, iterations = 10,
alpha = 0.001, decay = True, decay_iter = 5, decay_rate = 0.9, stop_decay_counter = 100,
verbose = True, lam = 0):
self.m = Y.shape[1]
self.alpha = alpha
self.iterations = iterations
self.lam = lam
# initialize parameters
self.init_params()
for i in range(iterations):
# forward prop
self.forward_prop(X)
# compute cost
self.compute_cost(Y)
# backward prop
self.backward_prop(Y)
# update params
self.update_params()
# evaluate
self.acc_history.append(self.evaluate(X, Y, in_training = True))
# save alpha
self.alpha_history.append(self.alpha)
# learning rate decay
if decay and stop_decay_counter > 0 and i % decay_iter == 0:
self.alpha = decay_rate * self.alpha
stop_decay_counter -= 1
# display cost per iteration
if verbose:
print('Cost after {} iterations: {}'.format(i, self.cost))
def predict(self, X, in_training = False):
if in_training == False:
self.forward_prop(X)
preds = self.A[str(self.L)] >= 0.5
preds = np.squeeze(preds)
return preds
def evaluate(self, X, Y, in_training = False):
examples = X.shape[1]
pred = self.predict(X, in_training = in_training)
pred = pred.reshape(1, examples)
diff = np.sum(abs(pred - Y))
acc = (examples - np.sum(diff)) / examples
return acc
Dataset
import pandas as pd
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/haberman/haberman.data', sep = ',', header = None)
data.head()
X_train = data.iloc[:,:-1]
Y_train = data.iloc[:, -1]
X_train = np.array(X_train)
Y_train = np.array(Y_train)
Y_train = Y_train.reshape(Y_train.shape[0], 1)
mean = np.mean(X_train, axis = 0)
variance = np.var(X_train, axis = 0)
X_train = np.divide((X_train - mean), variance)
Y_train = Y_train - 1
# Changing label 1 to 0 and label 2 to 1
Split & Shuffle data
# Split the data into test and train sets
from sklearn.utils import shuffle
X_train, Y_train = shuffle(X_train, Y_train)
X_test = X_train[250:,:]
Y_test = Y_train[250:,:]
X_train_ = X_train[:250,:]
Y_train_ = Y_train[:250,:]
X_train_ = X_train_.reshape(3, 250)
Y_train_ = Y_train_.reshape(1, 250)
X_test = X_test.reshape(3, 56)
Y_test = Y_test.reshape(1, 56)
Creating a Model
m = Model()
m.add_layers([3, 16, 16, 1])
m.train(X_train_, Y_train_, iterations = 5000, alpha = 0.9
, decay_iter = 10, decay_rate = 0.98, stop_decay_counter = 100
, verbose = False, lam = 2)
Evaluate
print('Test set acc = ', m.evaluate(X_test, Y_test))
print('Train set acc = ', m.evaluate(X_train_, Y_train_))
What I did in the experiment.
Shuffle, train several models (different in number of nodes and hidden layers), and evaluate
# Model examples
m.add_layers([3, 16, 16, 1, 50, 3, 25, 7, 99, 1])
m.add_layers([3, 1, 55, 19, 2, 2, 1, 1, 2, 75, 80, 3, 12, 1])
Reshuffle, evaluate
Result: Every model has the exact same train and test accuracy unless the data is reshuffled.
The teacher told me that it's just my thought, and it's not true.
Could you please tell me what is wrong to get this result?

Artificial Neural Network does not train

I have written a Dense Class for a FC layer in a CNN but to test if it works simply as a FC ANN, I tried to train a dataset over it but the loss never falls. I cannot seem to find the issue.
Here's the code:
class Dense:
# Constructor
def __init__(self, size, in_size, activation = 'relu'):
# Assign vars
self.size = size; self.activation = activation
# Initialize Weights and Biases
weights_dims = (size, in_size)
self.weights = np.random.standard_normal(weights_dims) * 0.01
self.biases = np.zeros([size, 1])
# Initialize Accumulators
self.sigma_acc = self.biases * 0
self.delta_acc = self.weights * 0
# ReLU Activation Function
def relu(self, arr):
return arr * (arr > 0)
# Softmax Activation Function
def softmax(self, arr):
arr -= arr.max()
exp = np.exp(arr)
return exp / np.sum(exp)
# Activation Manager Function
def activate(self, arr):
if self.activation == 'relu': return self.relu(arr)
if self.activation == 'softmax': return self.softmax(arr)
# Forward Propagation
def step(self, vec):
# Assign Input
self._in = vec
# Dot
z = np.dot(self.weights, vec) + self.biases
a = self.activate(z)
# Return
self.out = a
return self.out
# Back Propagation
def back(self, grad):
# Calculate sigma
sigma = grad if self.activation == 'softmax' else grad * (self.out > 0)
# Calculate delta
delta = np.dot(sigma, self._in.T)
# Accumulate
self.sigma_acc += sigma
self.delta_acc += delta
# Return global gradient
global_grad = np.dot(self.weights.T, sigma)
return global_grad
# Train
def update(self, alpha, batch_size):
dw = self.delta_acc / batch_size; self.delta_acc *= 0
db = self.sigma_acc / batch_size; self.sigma_acc *= 0
self.weights -= alpha * dw
self.biases -= alpha * db
To connect them as a model, I just add instances of this Dense class into a list and loop through them forwards and backwards using the step() and back() functions respectively.
Kindly inform me if you see any issue! Thanks.
This is how I created my network maybe could help you.
import numpy as np
X = np.array(([0, 0, 0], [0, 0, 1], [0, 1, 0],
[0, 1, 1], [1, 0, 0], [1, 0, 1],
[1, 1, 0], [1, 1, 1]), dtype=float)
y = np.array(([1], [0], [0], [0], [0], [0], [0], [1]), dtype=float)
xPredicted = np.array(([0, 0, 1]), dtype=float)
X = X/np.amax(X, axis=0)
xPredicted = xPredicted/np.amax(X, axis=0)
lossFile = open("Enter file", "w")
class Neural_Network(object):
def __init__(self, inputLayerSize, outputLayerSize, hiddenLayerSize):
self.inputLayerSize = inputLayerSize
self.outputLayerSize = outputLayerSize
self.hiddenLayerSize = hiddenLayerSize
self.W1 = \
np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
self.W2 = \
np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
def feedForward(self, X):
self.z = np.dot(X, self.W1)
self.z2 = self.activationSigmoid(self.z)
self.z3 = np.dot(self.z2, self.W2)
o = self.activationSigmoid(self.z3)
return o
def backwardPropagate(self, X, y, o):
self.o_error = y - o
self.o_delta = self.o_error*self.activationSigmoidPrime(o)
self.z2_error = self.o_delta.dot(self.W2.T)
self.z2_delta = self.z2_error*self.activationSigmoidPrime(self.z2)
self.W1 += X.T.dot(self.z2_delta)
self.W2 += self.z2.T.dot(self.o_delta)
def trainNetwork(self, X, y):
o = self.feedForward(X)
self.backwardPropagate(X, y, o)
def activationSigmoid(self, s):
return 1/(1+np.exp(-s))
def activationSigmoidPrime(self, s):
return s * (1 - s)
def saveSumSquaredLossList(self, i, error):
lossFile.write(str(i)+","+str(error.tolist())+"\n")
def saveWeights(self):
np.savetxt("Enter file", self.W1, fmt="%s")
np.savetxt("Enter file",
self.W2, fmt="%s")
def predictOutput(self):
print("Predicted XOR output data based on trained weights: ")
print("Expected (X1-X3); \n" + str(X))
print("Output (Y1): \n" + str(self.feedForward(xPredicted)))
myNeuralNetwork = Neural_Network(3, 1, 4)
trainingEpochs = 1000
for i in range(trainingEpochs):
print("Epoch # " + str(i) + "\n")
print("Network Input : \n" + str(X))
print("Expected Output of XOR Gate Neural Network: \n" + str(y))
print("Actual Output from XOR Gate Neural Network: \n" +
str(myNeuralNetwork.feedForward(X)))
Loss = np.mean(np.square(y - myNeuralNetwork.feedForward(X)))
myNeuralNetwork.saveSumSquaredLossList(i, Loss)
print("Sum Squared Loss: \n" + str(Loss))
print("\n")
myNeuralNetwork.trainNetwork(X, y)
myNeuralNetwork.saveWeights()
myNeuralNetwork.predictOutput()

My Neural Network algorithm is not working mnist numbers

I could use a second set of eyes on my neural network.
This is the mnist number recognition project.
I'm not sure where the issue is.
I previously implemented the ai with tensor flow successfully.
I'm not looking to use an api as a solution.
I would appreciate any help anyone can give.
Here's the project on github, it's only an init file and then the neural_network.
https://github.com/nealchawn/ai_trial_2
class NeuralNetwork(object):
def __init__(self, sizes):
self.activations = []
self.outputs = []
self.weights = []
self.biases = []
self.sizes = sizes
self.set_random_weights()
self.set_random_biases()
def set_random_weights(self):
for layer_index, layer_size in enumerate(self.sizes[1:], start=1):
layer_weights = []
for size in range(layer_size):
for size in range(self.sizes[layer_index-1]):
layer_weights.append(random.uniform(-5.0, 5.0))
self.weights.append(layer_weights)
def set_random_biases(self):
total_biases = 0
# add extra zero bias to help future indexing
#self.biases.append(0)
for index, size in enumerate(self.sizes[0:-1], start=1):
total_biases += 1
for x in range(total_biases):
self.biases.append(random.uniform(-5.0, 5.0))
def train_network(self, training_data, training_labels):
if len(training_data) != len(training_labels):
print("Error data and labels must be the same length")
data = list(zip(training_data, training_labels))
self.sgd(data)
def sgd(self, data, mini_batch_size = 1000):
# first we'll create batches of training data
n = len(data)
data_batches = [
data[k:k + mini_batch_size]
for k in range(0, n, mini_batch_size)
]
print(len(data_batches))
i = 0
for mini_batch in data_batches:
print("Batch: " + str(i))
i += 1
self.update_mini_batch(mini_batch)
self.network_outputs()
print("Finished All training data!")
def update_mini_batch(self, mini_data_batch):
weight_gradients = []
bias_gradients = []
i = 0
for training_input in mini_data_batch:
training_object, training_label = training_input
self.feedforward(training_object)
weights_gradient, bias_gradient = self.backpropogation(training_label)
weight_gradients.append(weights_gradient)
bias_gradients.append(bias_gradient)
# average gradients
weights_gradient = np.average(weight_gradients,axis=0)
biases_gradient = np.average(bias_gradients, axis=0)
# may need to convert to list
weights_gradient_list = []
for weight_gradient in weights_gradient:
weights_gradient_list.append(weight_gradient.tolist())
#weights_gradient = weights_gradient.tolist()
biases_gradient = biases_gradient.tolist()
for x in range(len(self.biases)):
self.biases[x] -= 0.1*biases_gradient[x]
weight_gradient_index = 0
for layer_index, layer_weights in enumerate(self.weights, start=0):
for weight_index, weight in enumerate(layer_weights):
self.weights[layer_index][weight_index] = weight - 0.1*weights_gradient_list[layer_index][weight_index]
weight_gradient_index += 1
def feedforward(self, training_object):
# set inputs
self.outputs = []
self.activations = []
temp_activations = []
for index in range(self.sizes[0]):
temp_activations.append(training_object[index])
self.activations.append(temp_activations)
for layer_index, layer_size in enumerate(self.sizes[1:], start=0):
layer_weights = self.weights[layer_index]
layer_inputs = self.activations[layer_index]
weight_index = 0
layer_outputs = []
layer_activations = []
for node_index in range(layer_size):
node_weights = []
# get node weights
#print(f"layer size: {layer_size}, previous_layer_size: {self.sizes[layer_index]}, layer weights: {len(layer_weights)}")
for x in range(self.sizes[layer_index]):
node_weights.append(layer_weights[weight_index])
weight_index += 1
output = 0
for indx in range(len(node_weights)):
output += layer_inputs[indx]*node_weights[indx]
output = output + self.biases[layer_index]
layer_outputs.append(output)
layer_activations.append(self.sigmoid(output))
self.outputs.append(layer_outputs)
self.activations.append(layer_activations)
def backpropogation(self, training_label):
costs = []
output_layer_activations = self.activations[-1]
output_layer_outputs = self.outputs[-1]
correct_labels = self.translate_label_to_array(training_label)
costs.append(self.compute_cost_derivative(correct_labels, output_layer_activations))
for cost_index, cost in enumerate(costs[0]):
costs[0][cost_index] = cost*self.sigmoid_prime(output_layer_outputs[cost_index])
# calculate costs for layers
for layer_index, layer_size in enumerate(self.sizes[::-1][1:-1], start=1):
layer_costs = []
layer_weights = self.weights[-layer_index]
layer_outputs = self.outputs[-(layer_index+1)]
previous_layer_costs = costs[layer_index-1]
next_layer_size = self.sizes[::-1][1:][layer_index]
layer_weights_formatted = []
for x in range(layer_size):
layer_weights_formatted.append([])
for weight_index, weight in enumerate(layer_weights, start=0):
#print(f"weight index:{weight_index % next_layer_size} layer_index: {weight_index}")
layer_weights_formatted[weight_index%layer_size].append(layer_weights[weight_index])
#print(f"next_layer_size:{layer_size} costs: {len(previous_layer_costs)}, layer_weights_formatted: {layer_weights_formatted}")
for x in range(layer_size):
node_cost = 0
for y, cost in enumerate(previous_layer_costs,start=0):
node_cost += layer_weights_formatted[x][y]*cost
layer_costs.append(node_cost)
# layer_costs same order as next layer's activations
for cost_index, cost in enumerate(layer_costs):
layer_costs[cost_index] = cost * self.sigmoid_prime(layer_outputs[cost_index])
costs.append(layer_costs)
# calculate weight errors
weight_errors = []
bias_errors = []
for layer_index, layer_costs in enumerate(costs[::-1]):
layer_activations = self.activations[layer_index]
layer_weight_errors = []
for cost_index, cost in enumerate(layer_costs,start=0):
for activation in layer_activations:
layer_weight_errors.append(activation * cost)
weight_errors.append(np.array(layer_weight_errors))
bias_errors.append(sum(layer_costs))
return weight_errors, bias_errors
# conversion tool
def translate_label_to_array(self, y):
translated_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
translated_label[y] = 1
return np.array(translated_label)
# output tools
def network_outputs(self):
print("Output layer: ")
for x in range(self.sizes[-1]):
print("node " + str(x) + ": " + str(self.activations[-1][x]))
def total_activations(self):
print(len(self.activations))
def compute_cost_derivative(self, y, output_activations):
"""Return the vector of partial derivatives \partial C_x /
\partial a for the output activations."""
return (output_activations - y)
def sigmoid(self, z):
""""The sigmoid function."""
return (1.0 / (1.0 + np.exp(-z)))
def sigmoid_prime(self, z):
return (self.sigmoid(z) * (1 - self.sigmoid(z)))

XOR classification using multilayer perceptrons is outputting 1 for all inputs

I'm using a neural network with 1 hidden layer (2 neurons) and 1 output neuron for solving the XOR problem.
Here's the code I'm using. It contains the main run file xor.py which creates a model defined in model.py. Each neuron is defined by the class Neuron in neuron.py
xor.py
from model import Model
import numpy as np
inputs = [[0,0], [0,1], [1,0], [1,1]]
outputs = [0, 1, 1, 0]
m = Model()
m.train(inputs, outputs)
for i in inputs:
p = m.predict(i)
print str(i) + ' => ' + str(p)
model.py
from neuron import HiddenNeuron, OutputNeuron
import numpy as np
class Model(object):
def __init__(self):
self.hidden = [HiddenNeuron(2) for i in range(2)]
self.output = OutputNeuron(2)
def predict(self, input):
temp = []
for x in range(2):
self.hidden[x].forward(input)
temp.append(self.hidden[x].out)
self.output.forward(temp)
return self.output.out
def train(self, inputs, targets):
it = 0
i = 0
size = len(inputs)
while it < 4:
if i == size:
i = 0
feature = inputs[i]
print '\n\nFeature : ' + str(feature) + '\n'
print 'Output weights : ' + str(self.output.weights)
print 'Hidden 1 weights : ' + str(self.hidden[0].weights)
print 'Hidden 2 weights : ' + str(self.hidden[1].weights)
temp = []
for x in range(2):
self.hidden[x].forward(feature)
temp.append(self.hidden[x].out)
self.output.forward(temp)
self.output.backward(targets[i])
deltas = []
deltas.append(self.output.error)
weights = []
weights.append([self.output.weights[0]])
weights.append([self.output.weights[1]])
for x in range(2):
self.hidden[x].backward(deltas, weights[x])
for x in range(2):
self.hidden[x].update(feature)
self.output.update(temp)
it += 1
i += 1
neuron.py
import numpy as np
from random import uniform
class Neuron(object):
def activation(self, fx):
return 1/(1 + np.exp(-fx))
def __init__(self, dim, lrate):
self.dim = dim
self.weights = np.empty([dim])
self.weights = [uniform(0,1) for x in range(dim)]
self.bias = uniform(0, 1)
self.lrate = lrate
self.out = None
self.error = None
def update(self, input):
j = 0
for i in input:
delta = self.lrate * self.error
self.weights[j] -= (delta*i)
self.bias += delta
j+=1
def forward(self, input):
j = 0
sum = self.bias
for f in input:
sum += f * self.weights[j]
j+=1
self.out = self.activation(sum)
def backward(self):
pass
class OutputNeuron(Neuron):
def __init__(self, dim, lrate=0.2):
super(OutputNeuron, self).__init__(dim, lrate)
def backward(self, target):
self.error = self.out * (1 - self.out) * (self.out - target)
class HiddenNeuron(Neuron):
def __init__(self, dim, lrate=0.2):
super(HiddenNeuron, self).__init__(dim, lrate)
def backward(self, deltas, weights):
sum = 0
size = len(deltas)
for x in range(size):
sum += deltas[x] * weights[x]
self.error = self.out * (1 - self.out) * sum
The final output is
[0, 0] => 0.999999991272
[0, 1] => 0.999999970788
[1, 0] => 0.999999952345
[1, 1] => 0.999715564446
I think the error is in neuron.py in the function update(). If you change self.bias += delta to self.bias -= delta it should work, at least it does for me. Otherwise you would modify your biases to ascend towards a maximum on the error surface.
Below you can see the output after 100000 training epochs.
[0, 0] => 0.0174550173543
[0, 1] => 0.983899954593
[1, 0] => 0.983895388655
[1, 1] => 0.0164172288168

Categories

Resources