Neural net decision boundaries are perpendicular to true boundaries - python

I have built a small neural net taking in two inputs. Two neurons in hidden layer, one neuron in output layer. The results are "mirror images" ie the decisions boundaries are perpendicular to the true boundaries. Does anyone know how this might be happening on what mistake I may have made?
linear data
nonlinear data
import random, numpy, math
lr = 0.1 #learning rate
dt = '4' #data type 1: linear 2: curve 3: box 4: XORish
epochs = 100000
tda = 50 #training data amount
def step(x): #step function
if x > 0:
x = 1
else:
x = 0
return x
def error(truth, output):
return 0.5 * (truth - output)**2
def sig(x): #sigmoid activation
return 1/(1+numpy.exp(-x))
#weights
w = [random.random(),random.random(),random.random(),random.random(),random.random(),random.random()]
#biases
b = [random.random(),random.random(),random.random()]
def Net(x, y, t) : # t is truth (or target)
h1 = x*w[0]+y*w[1]+b[0] #summation in h1, first neuron in hidden layer
h1out = sig(h1) #sigmoid activation
h2 = x*w[2]+y*w[3]+b[1]
h2out = sig(h2)
z = h1out*w[4]+h2out*w[5]+b[2] #z is output neuron
zout = sig(z)
e = error(t, zout) # e is error
#backpropagation, partial differentiations to find error at each weight and bias
e5 = (zout-t) * (zout * (1 - zout)) * h1out #e5 is error at weight 5 etc
e6 = (zout-t) * (zout * (1 - zout)) * h2out
e1 = (zout-t) * (zout * (1 - zout)) * w[4] * (h1out * (1 - h1out)) * x
e2 = (zout-t) * (zout * (1 - zout)) * w[4] * (h1out * (1 - h1out)) * y
e3 = (zout-t) * (zout * (1 - zout)) * w[5] * (h2out * (1 - h2out)) * x
e4 = (zout-t) * (zout * (1 - zout)) * w[5] * (h2out * (1 - h2out)) * y
be3 = (zout-t) * (zout * (1 - zout))
be1 = (zout-t) * (zout * (1 - zout)) * w[4] * (h1out * (1 - h1out))
be2 = (zout-t) * (zout * (1 - zout)) * w[5] * (h2out * (1 - h2out))
#updating weights and biases
w[0] = w[0] - (e1 * lr)
w[1] = w[1] - (e2 * lr)
w[2] = w[2] - (e3 * lr)
w[3] = w[3] - (e4 * lr)
w[4] = w[4] - (e5 * lr)
w[5] = w[5] - (e6 * lr)
b[2] = b[2] - (be3 * lr)
b[0] = b[0] - (be1 * lr)
b[1] = b[1] - (be2 * lr)
train_data = []
while len(train_data)<tda: #makes training data
x = random.randrange(100)
y = random.randrange(100)
if dt == '1':
if x+y>100:
truth = 1
else:
truth = 0
elif dt == '2':
if x*y>1000:
truth = 1
else:
truth = 0
elif dt == '3':
if x>50 or y>50:
truth = 1
else:
truth = 0
elif dt == '4':
if x+y>60 and x+y<140:
truth = 1
else:
truth = 0
if [x,y,truth] not in train_data:
train_data.append([x,y,truth])
for n in range(epochs): #EPOCHS
for i in train_data:
Net(i[0], i[1], i[2])
if n%1000 == 0 and n != 0:
print (n, 'epochs')
if n%5000 == 0:
#every 5000 epochs the net is fed new test data and results are plotted
test_data = []
while len(test_data)<(50): #makes test data
x = random.randrange(100)
y = random.randrange(100)
if [x, y] not in test_data and [x, y, 0] not in train_data and [x, y, 1] not in train_data:
test_data.append([x, y])
#classifying the test data with the net
for i in test_data:
x = i[0]
y = i[1]
h1 = x*w[0]+y*w[1]+b[0]
h1out = sig(h1)
h2 = x*w[2]+y*w[3]+b[1]
h2out = sig(h2)
z = h1out*w[4]+h2out*w[5]+b[2]
zout = step(z)
i.append(zout)
print (test_data)
print (w, b)
#plotting results
import matplotlib.pyplot as plt
x0 = []
y0 = []
x1 = []
y1 = []
xt0 = []
yt0 = []
xt1 = []
yt1 = []
for i in train_data:
if i[2] == 0:
x0.append(i[0])
y0.append(i[1])
else:
x1.append(i[0])
y1.append(i[1])
for i in test_data:
if i[2] == 0:
xt0.append(i[0])
yt0.append(i[1])
else:
xt1.append(i[0])
yt1.append(i[1])
plt.clf()
plt.scatter(xt0, yt0, 30, color = 'red')
plt.scatter(xt1, yt1, 30, color = 'blue')
plt.scatter(x0, y0, 10, color = 'orange')
plt.scatter(x1, y1, 10, color = 'green')
plt.title(str(tda)+" training data, "+str(epochs)+" epochs \n Red/Orange are small things, Blue/Green are big things \n Orange/Green are training data, Red/Blue are test data")
plt.xlabel("Width")
plt.ylabel("Height")
plt.savefig('plot.png')

Related

Numerical instability in python

I am trying to make several plots for a project of mine using the following code:
import pprint
import scipy
import scipy.linalg # SciPy Linear Algebra Library
import numpy as np
from scipy.linalg import lu , lu_factor, lu_solve
from scipy.integrate import quad
import matplotlib.pyplot as plt
#Solving the equations for the Prandtl case
K = 100
alpha = 0.1
visc = 5
diff = 5
N = 0.01
L = 5000
height = 250
subdivisions = 100
tick = 10
points = np.arange(0,L/2+tick,tick)
def H(y):
return ( height * (1 + np.cos(2 * np.pi * y/L)) )
def Bsfc(y):
return 0.1
final_system = []
b=[]
for q in range(-K,K+1):
equation1 = []
equation2 = []
equation3 = []
Aki = []
Cki = []
Dki = []
for k in range(-K,K+1):
R = 2 * N**2 * np.cos(alpha)**2 / (visc * diff) * (k * np.pi / L)**2
Q = N**2 * np.sin(alpha)**2 / (3 * visc * diff)
S1 = abs(R + np.sqrt(Q**3 + R**2) )**(1/3)
S2 = - abs( np.sqrt(Q**3 + R**2) -R )**(1/3)
phi = np.sqrt(S1**2 + S2**2 - S1*S2)
Lk = np.arccos(- (S1 + S2)/ (2 * phi) )
m1 = - np.sqrt(S1 + S2)
m2 = - np.sqrt(phi) * np.exp(1j * Lk/2)
m3 = m2.conjugate()
def f1r(y):
return (np.exp(m1 * H(y)) * np.cos(2 * (q - k) * np.pi * y / L) ).real
def f1i(y):
return (np.exp(m1 * H(y)) * np.cos(2 * (q - k) * np.pi * y / L) ).imag
gamma1 = 2/L * (quad(f1r,0,L/2,limit=subdivisions)[0] + quad(f1i,0,L/2,limit=subdivisions)[0]*1j)
def f2r(y):
return (np.exp(m2 * H(y)) * np.cos(2 * (q - k) * np.pi * y / L) ).real
def f2i(y):
return (np.exp(m2 * H(y)) * np.cos(2 * (q - k) * np.pi * y / L) ).imag
gamma2 = 2/L * (quad(f2r,0,L/2,limit=subdivisions)[0] + quad(f2i,0,L/2,limit=subdivisions)[0]*1j)
if k == 0:
equation1.append(2 * gamma2.real)
Cki.append(k)
equation1.append(-2 * gamma2.imag)
Dki.append(k)
else:
equation1.append(gamma1)
Aki.append(k)
equation1.append(2 * gamma2.real)
Cki.append(k)
equation1.append(-2 * gamma2.imag)
Dki.append(k)
if q != 0:
if k == 0:
equation2.append(0)
equation2.append(0)
else:
equation2.append(k * gamma1 / (m1**3) )
equation2.append(2 * k * (gamma2 / (m2**3) ).real)
equation2.append(-2 * k * (gamma2 / (m2**3) ).imag)
if k == 0:
equation3.append(2 * (m2**2 * gamma2).real)
equation3.append(-2 * (m2**2 * gamma2).imag)
else:
equation3.append(m1**2 * gamma1)
equation3.append(2 * (m2**2 * gamma2).real)
equation3.append(-2 * (m2**2 * gamma2).imag)
final_system.append(equation1)
def f4r(y):
return (Bsfc(y) * np.cos(2 * q * np.pi * y / L) ).real
def f4i(y):
return (Bsfc(y) * np.cos(2 * q * np.pi * y / L) ).imag
b.append(2/L * (quad(f4r,0,L/2,limit=subdivisions)[0] + quad(f4i,0,L/2,limit=subdivisions)[0]*1j))
if q != 0:
final_system.append(equation2)
b.append(0)
final_system.append(equation3)
b.append(0)
final_system = np.array(final_system)
b=np.array(b)
#LU solver
P, Ls, U = scipy.linalg.lu(final_system)
Bl = np.linalg.inv(P) # b
Z = np.linalg.solve(Ls,Bl)
X = np.linalg.solve(U,Z)
print (np.allclose(final_system # X, b))
#Getting the values for Ak, Ck and Dk
strings = []
for k in range(-K,K+1):
if k != 0:
strings.append('A')
strings.append('R')
strings.append('I')
Ak = []
Rk = []
Ik = []
for k in range(0,len(X)):
if 'A' in strings[k]:
Ak.append(X[k])
if 'R' in strings[k]:
Rk.append(X[k])
if 'I' in strings[k]:
Ik.append(X[k])
Ck=[]
for k in range(0,len(Rk)):
Ck.append(Rk[k] + Ik[k] * 1j)
Ck = np.array(Ck)
Dk = Ck.conjugate()
Ak = np.array(Ak)
#Getting the Buoyancy value
z = np.arange(0,2010,10)
y = np.arange(-L,L+10,10)
Y,Z = np.meshgrid(y,z)
B = np.ones_like(Y)*[0]
for k in range(-K,K+1):
R = 2 * N**2 * np.cos(alpha)**2 / (visc * diff) * (k * np.pi / L)**2
Q = N**2 * np.sin(alpha)**2 / (3 * visc * diff)
S1 = abs(R + np.sqrt(Q**3 + R**2) )**(1/3)
S2 = - abs( np.sqrt(Q**3 + R**2) -R )**(1/3)
phi = np.sqrt(S1**2 + S2**2 - S1*S2)
Lk = np.arccos(- (S1 + S2)/ (2 * phi) )
m1 = - np.sqrt(S1 + S2)
m2 = -np.sqrt(phi) * np.exp(1j * Lk/2)
m3 = m2.conjugate()
if k != 0:
B = B + ( Ak[Aki.index(k)] * np.exp(m1 * Z) * np.exp(2j * (k) * np.pi * Y / L) )
B = B + ( ( Ck[Cki.index(k)] * np.exp(m2 * Z) + Dk[Dki.index(k)] * np.exp(m3 * Z) ) * np.exp(2j * (k) * np.pi * Y / L) )
for k in range(0,B.shape[0]):
for t in range(0,B.shape[1]):
if Z[k][t] < H(Y[k][t]):
B[k][t] = np.nan
if Z[k][t] == H(Y[k][t]):
print (B[k][t], "B value at the ground")
if abs(Z[k][t] - H(Y[k][t])) < 0.1:
if B[k][t] > 0.101:
print (B[k][t],'error -------------------------------------------------')
# print (B[k][t], Z[k][t], H(Y[k][t]), Y[k][t], '-----------------------------------------------------------------------------' )
Bp = Bsfc(Y) * np.exp(-Z * np.sqrt(N * np.sin(alpha) ) / (4*visc*diff)**(1/4) ) * np.cos(np.sqrt(N*np.sin(alpha)) /((4*visc*diff)**(1/4))*Z )
##Plotting the buoyancy
fig = plt.figure(figsize=(10,10)) # create a figure
plt.rcParams.update({'font.size':16})
plt.title('Buoyancy')
plt.contourf(Y,Z,B,np.arange(-0.2,0.201,0.001),cmap='seismic')
#plt.contourf(Y,Z,B,cmap='seismic')
plt.colorbar(label='1/s')
plt.xlabel("Y axis")
plt.ylabel("Height")
plt.xlim([-L,L])
plt.ylim([0,1500])
plt.show()
The following plot shows a run that yielded a good result:
Buoyancy
However, when I increase the "height" parameter, I start getting unstable results, which I suspect occurs because of numerical instabilities:
Buoyancy unstable
Is there a way to increase numerical precision in python? I have experimented a bit with numpy.double, but with unsuccessful results so far.
Thanks
I guess you'll find your answer here on Stackoverflow
In the standard library, the decimal module may be what you're looking
for. Also, I have found mpmath to be quite helpful...

My Standard Neural Network Cost is Going Up

I have been trying to create a simple standard neural network from scratch but I can't seem to get it to work normally. Sometimes the cost skyrockets, other times the cost doesn't even change. I'm not sure what the problem is but it would be really helpful if someone could help me.
I have all of the information on Github. If any more information is needed kindly reply and I will provide it.
https://github.com/enriquedellxps/Neural-Network
Function for generating batches:
def batcher(data, batch_size):
# get the number of batches
num_batches_norem = data.shape[1] // batch_size
if data.shape[1] % batch_size == 0:
remainder_quantity = 0
else:
remainder_size = data.shape[1] % batch_size
remainder_quantity = 1
num_batches = num_batches_norem + remainder_quantity
changer = 0
for mb in range(num_batches_norem):
current_batch = data[:, changer:changer + batch_size]
changer += batch_size
yield current_batch
for last_mb in range(remainder_quantity):
last_batch = data[:, changer:changer + remainder_size]
yield last_batch
Function for g(z):
def activationer(a, z):
# ACTIVATION FUNCTIONS
# Sigmoid Activation Function
def sigmoid(z):
g = scipy.special.expit(z)
return g
# Tanh (Hyperbolic Tangent Function) Activation Function
def tanh(z):
g = (np.exp(z) - np.exp(-1 * z)) / ((np.exp(z) + np.exp(-1 * z)))
return g
# ReLU (Rectified Linear Unit) Activation Function
def ReLU(z):
g = np.maximum(0, z)
return g
# Leaky ReLU (Leaky Rectified Linear Unit) Activation Function
def Leaky_ReLU(z):
g = np.maximum(0.01 * z, z)
return g
def softmax(z):
z_exp = np.exp(z)
g = z_exp / np.sum(z_exp, axis=0, keepdims=True)
return g
if a == "sigmoid":
res = sigmoid(z)
elif a == "tanh":
res = tanh(z)
elif a == "relu":
res = ReLU(z)
elif a == "leaky relu":
res = Leaky_ReLU(z)
elif a == "softmax":
res = softmax(z)
return res
NN Class:
class DeepNeuralNetwork:
def __init__(self, n_x, n_h, n_y, nl, activations, alpha):
assert nl == len(activations), f"L: {nl}, Number of Activations: {len(activations)}"
# Assign inputs to the self object
self.n_x = n_x
self.n_h = n_h
self.n_y = n_y
self.nl = nl
self.activations = activations
self.alpha = alpha
# Initialize Parameters
def initialize_parameters(self):
n_x = self.n_x
n_h = self.n_h
n_y = self.n_y
activations = self.activations
parameters = []
for l in range(self.nl):
np.random.seed(8)
if l == 0:
if activations[l] == "relu" or "leaky relu":
parameters.append([np.random.randn(n_h, n_x) * np.sqrt(2 / n_x), np.zeros((n_h, 1))]) # aka W1, b1 | Xavier
else:
parameters.append([np.random.randn(n_h, n_x) * np.sqrt(1 / n_x), np.zeros((n_h, 1))]) # aka W1, b1 | He
elif l == self.nl - 1:
if activations[l] == "relu" or "leaky relu":
parameters.append([np.random.randn(n_y, n_h) * np.sqrt(2 / n_h), np.zeros((n_y, 1))]) # aka WL, bL | Xavier
else:
parameters.append([np.random.randn(n_y, n_h) * np.sqrt(1 / n_h), np.zeros((n_y, 1))]) # aka WL, bL | He
else:
if activations[l] == "relu" or "leaky relu":
parameters.append([np.random.randn(n_h, n_h) * np.sqrt(2 / n_h), np.zeros((n_h, 1))]) # hidden params | Xavier
else:
parameters.append([np.random.randn(n_h, n_h) * np.sqrt(1 / n_h), np.zeros((n_h, 1))]) # hidden params | He
return parameters
# Forward Propagation
def forward_propagation(self, parameters, input_data):
batch_size = input_data.shape[1] # Get the amount of examples in the batch
caches = []
self.caches = caches
current_activation = input_data # Set first activation - A0 - as the input
caches.append(current_activation)
for l in range(self.nl):
W, b = parameters[l][0], parameters[l][1] # Get weights and biases for current layer
Z = W # current_activation + b # Compute the linear activation
current_activation = activationer(self.activations[l], Z) # Compute the full activation
caches.append(current_activation)
return current_activation
# Compute Cost
def compute_cost(self, yhat, y):
batch_size = y.shape[1] # Get the amount of examples in the batch
cost = (1 / batch_size) * np.sum(-1 * (y * np.log(yhat) + ((1 - y) * (np.log(1 - yhat))))) # Compute the cross-entropy cost
cost = np.squeeze(cost) # Turn [[17]] to 17
return cost
# Backward Propagation
def backward_propagation(self, parameters, y):
caches = self.caches
batch_size = y.shape[1]
grads = []
for l in reversed(range(1, self.nl + 1)):
if l == self.nl:
dZ = caches[l] - y
dW = (1 / batch_size) * dZ # caches[l - 1].T
db = (1 / batch_size) * np.sum(dZ, axis=1, keepdims=True)
grads.append([dW, db])
else:
dA = parameters[l][0].T # dZ
dZ = dA * np.multiply(caches[l], (1 - caches[l]))
dW = (1 / batch_size) * dZ # caches[l - 1].T
db = (1 / batch_size) * np.sum(dZ, axis=1, keepdims=True)
grads.append([dW, db])
return grads
# Update Parameters
def update_parameters(self, parameters, gradients):
for l in range(self.nl):
parameters[l][0] = parameters[l][0] - self.alpha * gradients[self.nl - l - 1][0]
parameters[l][1] = parameters[l][1] - self.alpha * gradients[self.nl - l - 1][1]
return parameters
Running it:
dnn = DeepNeuralNetwork(12288, 20, 1, 4, ["relu", "relu", "relu", "sigmoid"], 0.001)
params = dnn.initialize_parameters()
epochs = 100
for e in range(epochs):
for i, j in zip(train_x_batched, train_y_batched):
yhat = dnn.forward_propagation(params, i)
cost = dnn.compute_cost(yhat, j)
grads = dnn.backward_propagation(params, j)
params = update_parameters(params, grads)
print(cost) # This usually starts going down then skyrockets. Even if I lower the learning rate to 0.00001
Thanks :)

Python neural network does not train

I have a simple neural network with 2 input neurons, 3 hidden neurons and 1 output neuron. hidden layer has bias.
I'm not used matrix operations to doing feed forward and backpropagation. when I run training function on a simple linear dataset, the error raises up and the predication result is wrong.
import random
from math import exp,pow,tanh
def random_weight():
return random.random()
def sigmoid(x):
return 1.0 / (1.0 + exp(-x))
def sigmoid_drv(x):
return sigmoid(x)*(1.0-sigmoid(x))
w11_I = random_weight()
w12_I = random_weight()
w21_I = random_weight()
w22_I = random_weight()
w31_I = random_weight()
w32_I = random_weight()
w11_II = random_weight()
w12_II = random_weight()
w13_II = random_weight()
b_I = 1
activation = sigmoid
activation_drv = sigmoid_drv
def predict(x1,x2):
global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
a1_I = w11_I*x1 + w12_I*x2 + b_I
z1_I = activation(a1_I)
a2_I = w21_I*x1 + w22_I*x2 + b_I
z2_I = activation(a2_I)
a3_I = w31_I*x1 + w32_I*x2 + b_I
z3_I = activation(a3_I)
a1_II = w11_II*z1_I + w12_II*z2_I + w13_II*z3_I
z1_II = activation(a1_II)
return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II
def train(x1,x2,y,alpha):
global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = predict(x1,x2)
error = 0.5 * pow(y-z1_II,2)
delta = y-z1_II * activation_drv(a1_II)
w11_II += delta * z1_I * alpha
w12_II += delta * z2_I * alpha
w13_II += delta * z3_I * alpha
w11_I += delta * w11_II * activation_drv(a1_I) * x1 * alpha
w12_I += delta * w11_II * activation_drv(a1_I) * x2 * alpha
w21_I += delta * w12_II * activation_drv(a2_I) * x1 * alpha
w22_I += delta * w12_II * activation_drv(a2_I) * x2 * alpha
w31_I += delta * w13_II * activation_drv(a3_I) * x1 * alpha
w32_I += delta * w13_II * activation_drv(a3_I) * x2 * alpha
b_I += (delta * w11_II * activation_drv(a1_I) + delta * w12_II * activation_drv(a2_I) + delta * w13_II * activation_drv(a3_I)) * alpha
return error
data = [
[0,0,0],
[0,1,1],
[1,0,1],
[1,1,1],
]
for i in range(0,10):
err = 0
dt = data[::]
random.shuffle(dt)
for j in dt:
err += train(j[0],j[1],j[2],0.01)
print(err)
print("-"*30)
for j in data:
_, _, _, _, _, _, _, res = predict(j[0],j[1])
print(j[0],",",j[1],"=",res)
For example the result of the code is:
0.363894453262
0.366966815948
0.366406041572
0.369982058232
0.36988850637
0.375869833099
0.378106172616
0.380456639936
0.37901554717
0.383723920259
------------------------------
(0, ',', 0, '=', 0.8439871540493414)
(0, ',', 1, '=', 0.861714406183168)
(1, ',', 0, '=', 0.8515477541104413)
(1, ',', 1, '=', 0.8676931366534011)
---------------- UPDATE ----------------
I change codes to this :
import random
from math import exp,pow
def random_weight():
return random.random()
def sigmoid(x):
return 1.0 / (1.0 + exp(-x))
def sigmoid_drv(x):
return sigmoid(x)*(1.0-sigmoid(x))
w11_I = random_weight()
w12_I = random_weight()
w21_I = random_weight()
w22_I = random_weight()
w31_I = random_weight()
w32_I = random_weight()
w11_II = random_weight()
w12_II = random_weight()
w13_II = random_weight()
b_I = random_weight()
activation = sigmoid
activation_drv = sigmoid_drv
def predict(x1,x2):
global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
a1_I = w11_I*x1 + w12_I*x2 + b_I
z1_I = activation(a1_I)
a2_I = w21_I*x1 + w22_I*x2 + b_I
z2_I = activation(a2_I)
a3_I = w31_I*x1 + w32_I*x2 + b_I
z3_I = activation(a3_I)
a1_II = w11_II*z1_I + w12_II*z2_I + w13_II*z3_I
z1_II = activation(a1_II)
return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II
def train(x1,x2,y,alpha):
global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = predict(x1,x2)
error = 0.5 * pow(z1_II-y,2)
delta = z1_II-y * activation_drv(a1_II)
d_w11_II = delta * z1_I * alpha
d_w12_II = delta * z2_I * alpha
d_w13_II = delta * z3_I * alpha
d_w11_I = delta * w11_II * activation_drv(a1_I) * x1 * alpha
d_w12_I = delta * w11_II * activation_drv(a1_I) * x2 * alpha
d_w21_I = delta * w12_II * activation_drv(a2_I) * x1 * alpha
d_w22_I = delta * w12_II * activation_drv(a2_I) * x2 * alpha
d_w31_I = delta * w13_II * activation_drv(a3_I) * x1 * alpha
d_w32_I = delta * w13_II * activation_drv(a3_I) * x2 * alpha
d_b_I = (delta * w11_II * activation_drv(a1_I) + delta * w12_II * activation_drv(a2_I) + delta * w13_II * activation_drv(a3_I)) * alpha
w11_II -= d_w11_II
w12_II -= d_w12_II
w13_II -= d_w13_II
w11_I -= d_w11_I
w12_I -= d_w12_I
w21_I -= d_w21_I
w22_I -= d_w22_I
w31_I -= d_w31_I
w32_I -= d_w32_I
b_I -= d_b_I
return error
data = [
[0,0,0],
[0,1,0],
[1,0,0],
[1,1,1],
]
for i in range(0,10):
err = 0
dt = data[::]
random.shuffle(dt)
for j in dt:
err += train(j[0],j[1],j[2],0.01)
print(err)
print("-"*30)
for j in data:
_, _, _, _, _, _, _, res = predict(j[0],j[1])
print(j[0],",",j[1],"=",res)
I'm subtract weight errors with weights now. Error of network reduces. But prediction is still wrong.
The result of above code:
0.7793443881847488
0.7577581315356949
0.7432698222320477
0.7316129719356839
0.7160385688813552
0.6943522088277978
0.6862277294774705
0.6656984495700775
0.6584361784187711
0.6410006126876817
------------------------------
0 , 0 = 0.6049212721996029
0 , 1 = 0.6227402202339664
1 , 0 = 0.6139758543180651
1 , 1 = 0.6293581473456563
One possible error is in the calculation of delta:
delta = z1_II-y * activation_drv(a1_II)
Add braces and change this to:
delta = (z1_II-y) * activation_drv(a1_II)
I found the problem
the sigmoid function was not good for this network. I change it to tanh and prediction results is correct now.
the final code :
import random
from math import exp,pow
class ANN:
def random_weight(self):
return random.random()
def sigmoid(self,x):
return 1.0 / (1.0 + exp(-x))
def sigmoid_drv(self,x):
return self.sigmoid(x)*(1.0-self.sigmoid(x))
def tanh(self, x):
return (exp(x) - exp(-x)) / (exp(x) + exp(-x))
def tanh_drv(self,x):
return 1 - pow(self.tanh(x),2)
def __init__(self):
self.w11_I = self.random_weight()
self.w12_I = self.random_weight()
self.w21_I = self.random_weight()
self.w22_I = self.random_weight()
self.w31_I = self.random_weight()
self.w32_I = self.random_weight()
self.w11_II = self.random_weight()
self.w12_II = self.random_weight()
self.w13_II = self.random_weight()
self.b_I = self.random_weight()
self.activation = self.tanh
self.activation_drv = self.tanh_drv
def predict(self,x1,x2):
a1_I = self.w11_I*x1 + self.w12_I*x2 + self.b_I
z1_I = self.activation(a1_I)
a2_I = self.w21_I*x1 + self.w22_I*x2 + self.b_I
z2_I = self.activation(a2_I)
a3_I = self.w31_I*x1 + self.w32_I*x2 + self.b_I
z3_I = self.activation(a3_I)
a1_II = self.w11_II*z1_I + self.w12_II*z2_I + self.w13_II*z3_I
z1_II = self.activation(a1_II)
return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II
def train(self,x1,x2,y,alpha):
a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = self.predict(x1,x2)
error = 0.5 * pow(z1_II-y,2)
delta = (z1_II-y) * self.activation_drv(a1_II)
d_w11_II = delta * z1_I * alpha
d_w12_II = delta * z2_I * alpha
d_w13_II = delta * z3_I * alpha
d_w11_I = delta * self.w11_II * self.activation_drv(a1_I) * x1 * alpha
d_w12_I = delta * self.w11_II * self.activation_drv(a1_I) * x2 * alpha
d_w21_I = delta * self.w12_II * self.activation_drv(a2_I) * x1 * alpha
d_w22_I = delta * self.w12_II * self.activation_drv(a2_I) * x2 * alpha
d_w31_I = delta * self.w13_II * self.activation_drv(a3_I) * x1 * alpha
d_w32_I = delta * self.w13_II * self.activation_drv(a3_I) * x2 * alpha
d_b_I = (delta * self.w11_II * self.activation_drv(a1_I) + delta * self.w12_II * self.activation_drv(a2_I) + delta * self.w13_II * self.activation_drv(a3_I)) * alpha
self.w11_II -= d_w11_II
self.w12_II -= d_w12_II
self.w13_II -= d_w13_II
self.w11_I -= d_w11_I
self.w12_I -= d_w12_I
self.w21_I -= d_w21_I
self.w22_I -= d_w22_I
self.w31_I -= d_w31_I
self.w32_I -= d_w32_I
self.b_I -= d_b_I
return error
model = ANN()
data = [
[0,0,0],
[0,1,0],
[1,0,0],
[1,1,1],
]
for i in range(0,200):
err = 0
dt = data[::]
random.shuffle(dt)
for j in dt:
err += model.train(j[0],j[1],j[2],0.1)
print(err)
print("-"*30)
for j in data:
_, _, _, _, _, _, _, res = model.predict(j[0],j[1])
print(j[0],",",j[1],"=",res)
Result of code :
...
0.1978539306282795
0.19794670251861882
0.19745074826953185
0.19529942727878868
0.19779970636626873
0.19661596298810918
------------------------------
0 , 0 = -0.24217968147818447
0 , 1 = 0.236033934015224
1 , 0 = 0.24457439328909888
1 , 1 = 0.5919949310028919

Understanding timesteps in scipy.integrate.odeint

I am trying to solve a PDE using odeint and the method of lines. My code is definitely wrong - and I'm trying to figure out where it is going wrong.
I am calling the ode solver using odeint(odefunc,y0,tspan) where tspan = np.linspace(0.0, 0.5, 5) & y0 = 1.0*np.ones(3).
I tried printing t within odefunc and am confused by the output. Despite the fact that I am solving up to t=0.5, the last t-value to print is 0.015081203121127767. The number of outputs matches tspan, but I cannot see how it could possibly be solving up to t = 0.5 when the last time in the de function is 0.015. What am I missing?
My DE is time dependent - so this is making it very hard to figure out where things are going wrong because I don't seem to be seeing the times where everything fails.
ETA - this is failing, but running this without some of the irrelevant stuff I am getting the warning ODEintWarning: Excess work done on this call (perhaps wrong Dfun type). Run with full_output = 1 to get quantitative information., which I'm assuming is part of the issue - but it doesn't appear to be halting the code.
MWE
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
import math
import sys
plt.interactive(False)
sigma = 2320
rho = 1000
gravity = 9.81 # [m/s^2]
g = gravity*3600*3600 # [m/hour^2]
S = 0.01
settlingVelocity = 0.02 # [m/s]
ws = settlingVelocity*3600 # [m/hour]
n = 0.04 # [SI]
J = 400 # [Ws/m]
k = 0.02
Cstar = 0.2 * sigma # [kg/m^3]
W = 2 # [m]
D0 = 1.2
Lw = 20
L = 100
tend = 0.5 # in hours
tspan = np.linspace(0.0, tend, 5)
def d(t): # metres
if t < 50: # hours
return 0.5
else:
return 0.05
def Q(t):
return 3600 * (math.sqrt(S)/n)*((W*d(t))**(5/3))/((2*d(t) + W)**(2/3))
def h(t):
return d(t)/2
def beta(t):
return (sigma - rho) * g * h(t)/sigma
def Omega(t):
return rho * g * S * Q(t) # [W/m]
def PsiTime(t):
return rho * g * Q(t) * (D0 - d(t))/(Lw)
N = 10
X = np.linspace(0, L, N)
delX = L/ (N-1)
def odefunc(y, t):
def zetaEh(t):
return k * (PsiTime(t) + Omega(t)) / (J + beta(t))
def zetaEW(t):
return (2*d(t)/(W + 2*d(t))) * k * Omega(t)/(J + beta(t))
def zetaR(t):
return (W/(W + 2*d(t))) * k*Omega(t)/(beta(t))
def zetaEF(t,i):
return (W/(W + 2*d(t))) * k * Omega(t) / (J + beta(t))
C = y[:N]
M = y[N:]
print("time: ", t)
dCdt = np.zeros(X.shape)
dMdt = np.zeros(X.shape)
dCdt[0] = ( # forward difference for dCdx
-Q(t) / (W*d(t)) * (C[1] - C[0]) / delX
+ (zetaEh(t) / (W * d(t))) * ((Cstar - C[0]) / Cstar)
- (ws * C[0] * (beta(t))) / (d(t) * (J + beta(t)))
)
dMdt[0] = 0
# gully channel
for i in range (1, N-1): # central difference
if M[i] + W *C[i] * ws - zetaR(t) * (Cstar - C[i]) / Cstar < 0:
reMass = M[i] + W * C[i] * ws
dCdt[i] = (
-Q(t) / (W*d(t)) * (C[i+1] - C[i - 1]) / (2*delX)
+ 1 / (W * d(t)) * ((zetaEW(t) + zetaEF(t,i)) * (Cstar - C[i]) / Cstar
+ reMass * (1 - (beta(t))/ (J + beta(t))))
- C[i] * ws/d(t)
)
dMdt[i] = -M[i]
else:
dCdt[i] = (
-Q(t) / (W*d(t)) * (C[i+1] - C[i - 1]) / (2*delX)
+ 1 / (W * d(t)) * (zetaEW(t) + zetaR(t)) * (Cstar - C[i]) / Cstar
- C[i] * ws / d(t)
)
dMdt[i] = W * C[i] * ws - zetaR(t) * (Cstar - C[i]) / Cstar
# Final node - backward difference
if M[N-1] + W * C[N-1] * ws - zetaR(t) * (Cstar - C[N-1]) / Cstar < 0:
reMass = M[N-1] + W * C[N-1] * ws
dCdt[N-1] = (
-Q(t) / (W * d(t)) * (C[N-1] - C[N-2]) / delX
+ 1 / (W * d(t)) * ((zetaEW(t) + zetaEF(t, i)) * (Cstar - C[N-1]) / Cstar
+ reMass * (1 - (beta(t)) / (J + beta(t))))
- C[i] * ws / d(t)
)
dMdt[N-1] = -M[N-1]
else:
dCdt[N-1] = (
-Q(t) / (W * d(t)) * (C[N-2] - C[N - 1]) / delX
+ 1 / (W * d(t)) * (zetaEW(t) + zetaR(t)) * (Cstar - C[N-1]) / Cstar
- C[N-1] * ws / d(t)
)
dMdt[N-1] = W * C[N-1] * ws - zetaR(t) * (Cstar - C[N-1]) / Cstar
dydt = np.ravel([dCdt, dMdt])
return dydt
init_C = 0.0 * np.ones(X.shape)
init_M = 0.0 * np.ones(X.shape)
init= np.ravel([init_C, init_M])
sol = odeint(odefunc, init, tspan)
conc = sol[:, :N]

Why my handmade numpy neural network doesn't learn?

As an exercise I was building a neural network in numpy from scratch.
For simplicity I wanted to use it to solve XOR problem. I derived all the equation and put everything together, but it looks like my network doesn't learn. I've spent some time trying to spot the mistake, but without success. Maybe you notice something I'm missing here?
X = [(0,0), (1,0), (0,1), (1,1)]
Y = [0, 1, 1, 0]
w1 = 2 * np.random.random(size=(2,3)) - 1
w2 = 2 * np.random.random(size=(3,1)) - 1
b1 = 2 * np.random.random(size=(1,3)) - 1
b2 = 2 * np.random.random(size=(1,1)) - 1
def sigmoid(x):
return 1./(1 + np.exp(-x))
def dsigmoid(y):
return y*(1-y)
N = 1000
error = np.zeros((N,1))
for n in range(N):
Dw_1 = np.zeros((2,3))
Dw_2 = np.zeros((3,1))
Db_1 = np.zeros((1,3))
Db_2 = np.zeros((1,1))
for i in range(len(X)): # iterate over all examples
x = np.array(X[i])
y = np.array(Y[i])
# Forward pass, 1st layer
act1 = np.dot(w1.T, x) + b1
lay1 = sigmoid(act1)
# Forward pass, 2nd layer
act2 = np.dot(w2.T, lay1.T) + b2
lay2 = sigmoid(act2)
# Computing error
E = 0.5*(lay2 - y)**2
error[n] += E[0]
# Backprop, 2nd layer
delta_l2 = (y-lay2) * dsigmoid(lay2)
corr_w2 = (delta_l2 * lay1).T
corr_b2 = delta_l2 * 1
# Backprop, 1st layer
delta_l1 = np.dot(w2, delta_l2) * dsigmoid(lay1).T
corr_w1 = np.outer(x, delta_l1)
corr_b1 = (delta_l1 * 1).T
Dw_2 += corr_w2
Dw_1 += corr_w1
Db_2 += corr_b2
Db_1 += corr_b1
if n % 1000 == 0:
print y, lay2,
if n % 1000 == 0:
print
w2 = w2 - eta * Dw_2
b2 = b2 - eta * Db_2
w1 = w1 - eta * Dw_1
b1 = b1 - eta * Db_1
error[n] /= len(X)
There were small mistakes in it, I hope this helps you
import numpy as np
import matplotlib.pyplot as plt
X = [(0, 0), (1, 0), (0, 1), (1, 1)]
Y = [0, 1, 1, 0]
eta = 0.7
w1 = 2 * np.random.random(size=(2, 3)) - 1
w2 = 2 * np.random.random(size=(3, 1)) - 1
b1 = 2 * np.random.random(size=(1, 3)) - 1
b2 = 2 * np.random.random(size=(1, 1)) - 1
def sigmoid(x):
return 1. / (1 + np.exp(-x))
def dsigmoid(y):
return y * (1 - y)
N = 2000
error = []
for n in range(N):
Dw_1 = np.zeros((2, 3))
Dw_2 = np.zeros((3, 1))
Db_1 = np.zeros((1, 3))
Db_2 = np.zeros((1, 1))
tmp_error = 0
for i in range(len(X)): # iterate over all examples
x = np.array(X[i]).reshape(1, 2)
y = np.array(Y[i])
layer1 = sigmoid(np.dot(x, w1) + b1)
output = sigmoid(np.dot(layer1, w2) + b2)
tmp_error += np.mean(np.abs(output - y))
d_w2 = np.dot(layer1.T, ((output - y) * dsigmoid(output)))
d_b2 = np.dot(1, ((output - y) * dsigmoid(output)))
d_w1 = np.dot(x.T, (np.dot((output - y) * dsigmoid(output), w2.T) * dsigmoid(layer1)))
d_b1 = np.dot(1, (np.dot((output - y) * dsigmoid(output), w2.T) * dsigmoid(layer1)))
Dw_2 += d_w2
Dw_1 += d_w1
Db_1 += d_b1
Db_2 += d_b2
w2 = w2 - eta * Dw_2
w1 = w1 - eta * Dw_1
b1 = b1 - eta * Db_1
b2 = b2 - eta * Db_2
error.append(tmp_error)
error = np.array(error)
print(error.shape)
plt.plot(error)
plt.show()

Categories

Resources