Python neural network does not train - python

I have a simple neural network with 2 input neurons, 3 hidden neurons and 1 output neuron. hidden layer has bias.
I'm not used matrix operations to doing feed forward and backpropagation. when I run training function on a simple linear dataset, the error raises up and the predication result is wrong.
import random
from math import exp,pow,tanh
def random_weight():
return random.random()
def sigmoid(x):
return 1.0 / (1.0 + exp(-x))
def sigmoid_drv(x):
return sigmoid(x)*(1.0-sigmoid(x))
w11_I = random_weight()
w12_I = random_weight()
w21_I = random_weight()
w22_I = random_weight()
w31_I = random_weight()
w32_I = random_weight()
w11_II = random_weight()
w12_II = random_weight()
w13_II = random_weight()
b_I = 1
activation = sigmoid
activation_drv = sigmoid_drv
def predict(x1,x2):
global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
a1_I = w11_I*x1 + w12_I*x2 + b_I
z1_I = activation(a1_I)
a2_I = w21_I*x1 + w22_I*x2 + b_I
z2_I = activation(a2_I)
a3_I = w31_I*x1 + w32_I*x2 + b_I
z3_I = activation(a3_I)
a1_II = w11_II*z1_I + w12_II*z2_I + w13_II*z3_I
z1_II = activation(a1_II)
return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II
def train(x1,x2,y,alpha):
global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = predict(x1,x2)
error = 0.5 * pow(y-z1_II,2)
delta = y-z1_II * activation_drv(a1_II)
w11_II += delta * z1_I * alpha
w12_II += delta * z2_I * alpha
w13_II += delta * z3_I * alpha
w11_I += delta * w11_II * activation_drv(a1_I) * x1 * alpha
w12_I += delta * w11_II * activation_drv(a1_I) * x2 * alpha
w21_I += delta * w12_II * activation_drv(a2_I) * x1 * alpha
w22_I += delta * w12_II * activation_drv(a2_I) * x2 * alpha
w31_I += delta * w13_II * activation_drv(a3_I) * x1 * alpha
w32_I += delta * w13_II * activation_drv(a3_I) * x2 * alpha
b_I += (delta * w11_II * activation_drv(a1_I) + delta * w12_II * activation_drv(a2_I) + delta * w13_II * activation_drv(a3_I)) * alpha
return error
data = [
[0,0,0],
[0,1,1],
[1,0,1],
[1,1,1],
]
for i in range(0,10):
err = 0
dt = data[::]
random.shuffle(dt)
for j in dt:
err += train(j[0],j[1],j[2],0.01)
print(err)
print("-"*30)
for j in data:
_, _, _, _, _, _, _, res = predict(j[0],j[1])
print(j[0],",",j[1],"=",res)
For example the result of the code is:
0.363894453262
0.366966815948
0.366406041572
0.369982058232
0.36988850637
0.375869833099
0.378106172616
0.380456639936
0.37901554717
0.383723920259
------------------------------
(0, ',', 0, '=', 0.8439871540493414)
(0, ',', 1, '=', 0.861714406183168)
(1, ',', 0, '=', 0.8515477541104413)
(1, ',', 1, '=', 0.8676931366534011)
---------------- UPDATE ----------------
I change codes to this :
import random
from math import exp,pow
def random_weight():
return random.random()
def sigmoid(x):
return 1.0 / (1.0 + exp(-x))
def sigmoid_drv(x):
return sigmoid(x)*(1.0-sigmoid(x))
w11_I = random_weight()
w12_I = random_weight()
w21_I = random_weight()
w22_I = random_weight()
w31_I = random_weight()
w32_I = random_weight()
w11_II = random_weight()
w12_II = random_weight()
w13_II = random_weight()
b_I = random_weight()
activation = sigmoid
activation_drv = sigmoid_drv
def predict(x1,x2):
global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
a1_I = w11_I*x1 + w12_I*x2 + b_I
z1_I = activation(a1_I)
a2_I = w21_I*x1 + w22_I*x2 + b_I
z2_I = activation(a2_I)
a3_I = w31_I*x1 + w32_I*x2 + b_I
z3_I = activation(a3_I)
a1_II = w11_II*z1_I + w12_II*z2_I + w13_II*z3_I
z1_II = activation(a1_II)
return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II
def train(x1,x2,y,alpha):
global w11_I,w12_I,w21_I,w22_I,w31_I,w32_I,w11_II,w12_II,w13_II,b_I
a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = predict(x1,x2)
error = 0.5 * pow(z1_II-y,2)
delta = z1_II-y * activation_drv(a1_II)
d_w11_II = delta * z1_I * alpha
d_w12_II = delta * z2_I * alpha
d_w13_II = delta * z3_I * alpha
d_w11_I = delta * w11_II * activation_drv(a1_I) * x1 * alpha
d_w12_I = delta * w11_II * activation_drv(a1_I) * x2 * alpha
d_w21_I = delta * w12_II * activation_drv(a2_I) * x1 * alpha
d_w22_I = delta * w12_II * activation_drv(a2_I) * x2 * alpha
d_w31_I = delta * w13_II * activation_drv(a3_I) * x1 * alpha
d_w32_I = delta * w13_II * activation_drv(a3_I) * x2 * alpha
d_b_I = (delta * w11_II * activation_drv(a1_I) + delta * w12_II * activation_drv(a2_I) + delta * w13_II * activation_drv(a3_I)) * alpha
w11_II -= d_w11_II
w12_II -= d_w12_II
w13_II -= d_w13_II
w11_I -= d_w11_I
w12_I -= d_w12_I
w21_I -= d_w21_I
w22_I -= d_w22_I
w31_I -= d_w31_I
w32_I -= d_w32_I
b_I -= d_b_I
return error
data = [
[0,0,0],
[0,1,0],
[1,0,0],
[1,1,1],
]
for i in range(0,10):
err = 0
dt = data[::]
random.shuffle(dt)
for j in dt:
err += train(j[0],j[1],j[2],0.01)
print(err)
print("-"*30)
for j in data:
_, _, _, _, _, _, _, res = predict(j[0],j[1])
print(j[0],",",j[1],"=",res)
I'm subtract weight errors with weights now. Error of network reduces. But prediction is still wrong.
The result of above code:
0.7793443881847488
0.7577581315356949
0.7432698222320477
0.7316129719356839
0.7160385688813552
0.6943522088277978
0.6862277294774705
0.6656984495700775
0.6584361784187711
0.6410006126876817
------------------------------
0 , 0 = 0.6049212721996029
0 , 1 = 0.6227402202339664
1 , 0 = 0.6139758543180651
1 , 1 = 0.6293581473456563

One possible error is in the calculation of delta:
delta = z1_II-y * activation_drv(a1_II)
Add braces and change this to:
delta = (z1_II-y) * activation_drv(a1_II)

I found the problem
the sigmoid function was not good for this network. I change it to tanh and prediction results is correct now.
the final code :
import random
from math import exp,pow
class ANN:
def random_weight(self):
return random.random()
def sigmoid(self,x):
return 1.0 / (1.0 + exp(-x))
def sigmoid_drv(self,x):
return self.sigmoid(x)*(1.0-self.sigmoid(x))
def tanh(self, x):
return (exp(x) - exp(-x)) / (exp(x) + exp(-x))
def tanh_drv(self,x):
return 1 - pow(self.tanh(x),2)
def __init__(self):
self.w11_I = self.random_weight()
self.w12_I = self.random_weight()
self.w21_I = self.random_weight()
self.w22_I = self.random_weight()
self.w31_I = self.random_weight()
self.w32_I = self.random_weight()
self.w11_II = self.random_weight()
self.w12_II = self.random_weight()
self.w13_II = self.random_weight()
self.b_I = self.random_weight()
self.activation = self.tanh
self.activation_drv = self.tanh_drv
def predict(self,x1,x2):
a1_I = self.w11_I*x1 + self.w12_I*x2 + self.b_I
z1_I = self.activation(a1_I)
a2_I = self.w21_I*x1 + self.w22_I*x2 + self.b_I
z2_I = self.activation(a2_I)
a3_I = self.w31_I*x1 + self.w32_I*x2 + self.b_I
z3_I = self.activation(a3_I)
a1_II = self.w11_II*z1_I + self.w12_II*z2_I + self.w13_II*z3_I
z1_II = self.activation(a1_II)
return a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II
def train(self,x1,x2,y,alpha):
a1_I, z1_I, a2_I, z2_I, a3_I, z3_I, a1_II, z1_II = self.predict(x1,x2)
error = 0.5 * pow(z1_II-y,2)
delta = (z1_II-y) * self.activation_drv(a1_II)
d_w11_II = delta * z1_I * alpha
d_w12_II = delta * z2_I * alpha
d_w13_II = delta * z3_I * alpha
d_w11_I = delta * self.w11_II * self.activation_drv(a1_I) * x1 * alpha
d_w12_I = delta * self.w11_II * self.activation_drv(a1_I) * x2 * alpha
d_w21_I = delta * self.w12_II * self.activation_drv(a2_I) * x1 * alpha
d_w22_I = delta * self.w12_II * self.activation_drv(a2_I) * x2 * alpha
d_w31_I = delta * self.w13_II * self.activation_drv(a3_I) * x1 * alpha
d_w32_I = delta * self.w13_II * self.activation_drv(a3_I) * x2 * alpha
d_b_I = (delta * self.w11_II * self.activation_drv(a1_I) + delta * self.w12_II * self.activation_drv(a2_I) + delta * self.w13_II * self.activation_drv(a3_I)) * alpha
self.w11_II -= d_w11_II
self.w12_II -= d_w12_II
self.w13_II -= d_w13_II
self.w11_I -= d_w11_I
self.w12_I -= d_w12_I
self.w21_I -= d_w21_I
self.w22_I -= d_w22_I
self.w31_I -= d_w31_I
self.w32_I -= d_w32_I
self.b_I -= d_b_I
return error
model = ANN()
data = [
[0,0,0],
[0,1,0],
[1,0,0],
[1,1,1],
]
for i in range(0,200):
err = 0
dt = data[::]
random.shuffle(dt)
for j in dt:
err += model.train(j[0],j[1],j[2],0.1)
print(err)
print("-"*30)
for j in data:
_, _, _, _, _, _, _, res = model.predict(j[0],j[1])
print(j[0],",",j[1],"=",res)
Result of code :
...
0.1978539306282795
0.19794670251861882
0.19745074826953185
0.19529942727878868
0.19779970636626873
0.19661596298810918
------------------------------
0 , 0 = -0.24217968147818447
0 , 1 = 0.236033934015224
1 , 0 = 0.24457439328909888
1 , 1 = 0.5919949310028919

Related

Numerical instability in python

I am trying to make several plots for a project of mine using the following code:
import pprint
import scipy
import scipy.linalg # SciPy Linear Algebra Library
import numpy as np
from scipy.linalg import lu , lu_factor, lu_solve
from scipy.integrate import quad
import matplotlib.pyplot as plt
#Solving the equations for the Prandtl case
K = 100
alpha = 0.1
visc = 5
diff = 5
N = 0.01
L = 5000
height = 250
subdivisions = 100
tick = 10
points = np.arange(0,L/2+tick,tick)
def H(y):
return ( height * (1 + np.cos(2 * np.pi * y/L)) )
def Bsfc(y):
return 0.1
final_system = []
b=[]
for q in range(-K,K+1):
equation1 = []
equation2 = []
equation3 = []
Aki = []
Cki = []
Dki = []
for k in range(-K,K+1):
R = 2 * N**2 * np.cos(alpha)**2 / (visc * diff) * (k * np.pi / L)**2
Q = N**2 * np.sin(alpha)**2 / (3 * visc * diff)
S1 = abs(R + np.sqrt(Q**3 + R**2) )**(1/3)
S2 = - abs( np.sqrt(Q**3 + R**2) -R )**(1/3)
phi = np.sqrt(S1**2 + S2**2 - S1*S2)
Lk = np.arccos(- (S1 + S2)/ (2 * phi) )
m1 = - np.sqrt(S1 + S2)
m2 = - np.sqrt(phi) * np.exp(1j * Lk/2)
m3 = m2.conjugate()
def f1r(y):
return (np.exp(m1 * H(y)) * np.cos(2 * (q - k) * np.pi * y / L) ).real
def f1i(y):
return (np.exp(m1 * H(y)) * np.cos(2 * (q - k) * np.pi * y / L) ).imag
gamma1 = 2/L * (quad(f1r,0,L/2,limit=subdivisions)[0] + quad(f1i,0,L/2,limit=subdivisions)[0]*1j)
def f2r(y):
return (np.exp(m2 * H(y)) * np.cos(2 * (q - k) * np.pi * y / L) ).real
def f2i(y):
return (np.exp(m2 * H(y)) * np.cos(2 * (q - k) * np.pi * y / L) ).imag
gamma2 = 2/L * (quad(f2r,0,L/2,limit=subdivisions)[0] + quad(f2i,0,L/2,limit=subdivisions)[0]*1j)
if k == 0:
equation1.append(2 * gamma2.real)
Cki.append(k)
equation1.append(-2 * gamma2.imag)
Dki.append(k)
else:
equation1.append(gamma1)
Aki.append(k)
equation1.append(2 * gamma2.real)
Cki.append(k)
equation1.append(-2 * gamma2.imag)
Dki.append(k)
if q != 0:
if k == 0:
equation2.append(0)
equation2.append(0)
else:
equation2.append(k * gamma1 / (m1**3) )
equation2.append(2 * k * (gamma2 / (m2**3) ).real)
equation2.append(-2 * k * (gamma2 / (m2**3) ).imag)
if k == 0:
equation3.append(2 * (m2**2 * gamma2).real)
equation3.append(-2 * (m2**2 * gamma2).imag)
else:
equation3.append(m1**2 * gamma1)
equation3.append(2 * (m2**2 * gamma2).real)
equation3.append(-2 * (m2**2 * gamma2).imag)
final_system.append(equation1)
def f4r(y):
return (Bsfc(y) * np.cos(2 * q * np.pi * y / L) ).real
def f4i(y):
return (Bsfc(y) * np.cos(2 * q * np.pi * y / L) ).imag
b.append(2/L * (quad(f4r,0,L/2,limit=subdivisions)[0] + quad(f4i,0,L/2,limit=subdivisions)[0]*1j))
if q != 0:
final_system.append(equation2)
b.append(0)
final_system.append(equation3)
b.append(0)
final_system = np.array(final_system)
b=np.array(b)
#LU solver
P, Ls, U = scipy.linalg.lu(final_system)
Bl = np.linalg.inv(P) # b
Z = np.linalg.solve(Ls,Bl)
X = np.linalg.solve(U,Z)
print (np.allclose(final_system # X, b))
#Getting the values for Ak, Ck and Dk
strings = []
for k in range(-K,K+1):
if k != 0:
strings.append('A')
strings.append('R')
strings.append('I')
Ak = []
Rk = []
Ik = []
for k in range(0,len(X)):
if 'A' in strings[k]:
Ak.append(X[k])
if 'R' in strings[k]:
Rk.append(X[k])
if 'I' in strings[k]:
Ik.append(X[k])
Ck=[]
for k in range(0,len(Rk)):
Ck.append(Rk[k] + Ik[k] * 1j)
Ck = np.array(Ck)
Dk = Ck.conjugate()
Ak = np.array(Ak)
#Getting the Buoyancy value
z = np.arange(0,2010,10)
y = np.arange(-L,L+10,10)
Y,Z = np.meshgrid(y,z)
B = np.ones_like(Y)*[0]
for k in range(-K,K+1):
R = 2 * N**2 * np.cos(alpha)**2 / (visc * diff) * (k * np.pi / L)**2
Q = N**2 * np.sin(alpha)**2 / (3 * visc * diff)
S1 = abs(R + np.sqrt(Q**3 + R**2) )**(1/3)
S2 = - abs( np.sqrt(Q**3 + R**2) -R )**(1/3)
phi = np.sqrt(S1**2 + S2**2 - S1*S2)
Lk = np.arccos(- (S1 + S2)/ (2 * phi) )
m1 = - np.sqrt(S1 + S2)
m2 = -np.sqrt(phi) * np.exp(1j * Lk/2)
m3 = m2.conjugate()
if k != 0:
B = B + ( Ak[Aki.index(k)] * np.exp(m1 * Z) * np.exp(2j * (k) * np.pi * Y / L) )
B = B + ( ( Ck[Cki.index(k)] * np.exp(m2 * Z) + Dk[Dki.index(k)] * np.exp(m3 * Z) ) * np.exp(2j * (k) * np.pi * Y / L) )
for k in range(0,B.shape[0]):
for t in range(0,B.shape[1]):
if Z[k][t] < H(Y[k][t]):
B[k][t] = np.nan
if Z[k][t] == H(Y[k][t]):
print (B[k][t], "B value at the ground")
if abs(Z[k][t] - H(Y[k][t])) < 0.1:
if B[k][t] > 0.101:
print (B[k][t],'error -------------------------------------------------')
# print (B[k][t], Z[k][t], H(Y[k][t]), Y[k][t], '-----------------------------------------------------------------------------' )
Bp = Bsfc(Y) * np.exp(-Z * np.sqrt(N * np.sin(alpha) ) / (4*visc*diff)**(1/4) ) * np.cos(np.sqrt(N*np.sin(alpha)) /((4*visc*diff)**(1/4))*Z )
##Plotting the buoyancy
fig = plt.figure(figsize=(10,10)) # create a figure
plt.rcParams.update({'font.size':16})
plt.title('Buoyancy')
plt.contourf(Y,Z,B,np.arange(-0.2,0.201,0.001),cmap='seismic')
#plt.contourf(Y,Z,B,cmap='seismic')
plt.colorbar(label='1/s')
plt.xlabel("Y axis")
plt.ylabel("Height")
plt.xlim([-L,L])
plt.ylim([0,1500])
plt.show()
The following plot shows a run that yielded a good result:
Buoyancy
However, when I increase the "height" parameter, I start getting unstable results, which I suspect occurs because of numerical instabilities:
Buoyancy unstable
Is there a way to increase numerical precision in python? I have experimented a bit with numpy.double, but with unsuccessful results so far.
Thanks
I guess you'll find your answer here on Stackoverflow
In the standard library, the decimal module may be what you're looking
for. Also, I have found mpmath to be quite helpful...

Python: Write-Length Control

I am trying to create a specific output pattern for a textfile I wanted to later load into C++.
I wrote a file in Python that creates random coordinates and movement values in a circle.
The output is supposed to have the output:
Place_1 Place_2 Place_3 Movement_1 Movement_2 Movement_3\n
Place_1 Place_2 Place_3 Movement_1 Movement_2 Movement_3\n
Place_1 Place_2 Place_3 Movement_1 Movement_2 Movement_3
The Code is use is
import numpy as np
file = open('log.txt', 'a')
def f(n, center, radius, ecc):
pos = np.zeros((n,6))
r = ecc * radius
for i in range(n):
while 1:
x_1 = -1 + 2 * np.random.rand(1)
x_2 = -1 + 2 * np.random.rand(1)
if (x_1*x_1 + x_2*x_2)<1 :
pos[i,0] = center[0] + r * 2 * x_1 * np.sqrt(1 - x_1*x_1 - x_2*x_2)
pos[i,1] = center[1] + r * 2 * x_2 * np.sqrt(1 - x_1*x_1 - x_2*x_2)
pos[i,2] = center[2] + r * (1 - 2 * (x_1*x_1 + x_2*x_2))
pos[i,3] = (-1 + 2 * np.random.rand(1))
pos[i,4] = (-1 + 2 * np.random.rand(1))
pos[i,5] = (-1 + 2 * np.random.rand(1))
break
string = str(pos[i,:]).strip('[]').rstrip('\n')
file.write(string)
return
f(10000, np.array((127,127,127)), 92, 0.9)
file.close()
The log I create is however very badly formated. How can I get the required format?
You're going to a lot of trouble here that you don't need. This seems to solve the problem, simply.
import numpy as np
file = open('log.txt', 'a')
def f(n, center, radius, ecc):
r = ecc * radius
for i in range(n):
while 1:
pos = [0]*6
x_1 = -1 + 2 * np.random.rand(1)[0]
x_2 = -1 + 2 * np.random.rand(1)[0]
if (x_1*x_1 + x_2*x_2) < 1:
pos[0] = center[0] + r * 2 * x_1 * np.sqrt(1 - x_1*x_1 - x_2*x_2)
pos[1] = center[1] + r * 2 * x_2 * np.sqrt(1 - x_1*x_1 - x_2*x_2)
pos[2] = center[2] + r * (1 - 2 * (x_1*x_1 + x_2*x_2))
pos[3] = (-1 + 2 * np.random.rand(1)[0])
pos[4] = (-1 + 2 * np.random.rand(1)[0])
pos[5] = (-1 + 2 * np.random.rand(1)[0])
break
print(*pos, file=file)
f(10000, np.array((127,127,127)), 92, 0.9)
file.close()
A solution without numpy:
import math
import random
file = open('log.txt', 'a')
def f(n, center, radius, ecc):
r = ecc * radius
for _ in range(n):
pos = [0]*6
while 1:
x_1 = 2 * random.random() - 1
x_2 = 2 * random.random() - 1
vector = x_1*x_1 + x_2*x_2
if vector < 1:
break
pos[0] = center[0] + r * 2 * x_1 * math.sqrt(1 - vector)
pos[1] = center[1] + r * 2 * x_2 * math.sqrt(1 - vector)
pos[2] = center[2] + r * (1 - 2 * vector)
pos[3] = 2 * random.random() -1
pos[4] = 2 * random.random() -1
pos[5] = 2 * random.random() -1
print(*pos, file=file)
f(10000, (127,127,127), 92, 0.9)
file.close()
Use np.savetxt:
import numpy as np
def f(n, center, radius, ecc):
pos = np.zeros((n,6))
r = ecc * radius
for i in range(n):
while 1:
x_1 = -1 + 2 * np.random.rand(1)
x_2 = -1 + 2 * np.random.rand(1)
if (x_1*x_1 + x_2*x_2)<1 :
pos[i,0] = center[0] + r * 2 * x_1 * np.sqrt(1 - x_1*x_1 - x_2*x_2)
pos[i,1] = center[1] + r * 2 * x_2 * np.sqrt(1 - x_1*x_1 - x_2*x_2)
pos[i,2] = center[2] + r * (1 - 2 * (x_1*x_1 + x_2*x_2))
pos[i,3] = (-1 + 2 * np.random.rand(1))
pos[i,4] = (-1 + 2 * np.random.rand(1))
pos[i,5] = (-1 + 2 * np.random.rand(1))
break
np.savetxt('file.txt',pos,delimiter=';')
return
f(100, np.array((127,127,127)), 92, 0.9)
For the formatting issue you can make the coordinates into characters (What I mean is that turn coordinates into a character that would be printed like _ or / something like that) Also you can put empty space in a print like print(" Hello.") also to go to a different line in the console do print("\n Hello."). This probably was not what you were looking for but I hope this somewhat helps.

Neural net decision boundaries are perpendicular to true boundaries

I have built a small neural net taking in two inputs. Two neurons in hidden layer, one neuron in output layer. The results are "mirror images" ie the decisions boundaries are perpendicular to the true boundaries. Does anyone know how this might be happening on what mistake I may have made?
linear data
nonlinear data
import random, numpy, math
lr = 0.1 #learning rate
dt = '4' #data type 1: linear 2: curve 3: box 4: XORish
epochs = 100000
tda = 50 #training data amount
def step(x): #step function
if x > 0:
x = 1
else:
x = 0
return x
def error(truth, output):
return 0.5 * (truth - output)**2
def sig(x): #sigmoid activation
return 1/(1+numpy.exp(-x))
#weights
w = [random.random(),random.random(),random.random(),random.random(),random.random(),random.random()]
#biases
b = [random.random(),random.random(),random.random()]
def Net(x, y, t) : # t is truth (or target)
h1 = x*w[0]+y*w[1]+b[0] #summation in h1, first neuron in hidden layer
h1out = sig(h1) #sigmoid activation
h2 = x*w[2]+y*w[3]+b[1]
h2out = sig(h2)
z = h1out*w[4]+h2out*w[5]+b[2] #z is output neuron
zout = sig(z)
e = error(t, zout) # e is error
#backpropagation, partial differentiations to find error at each weight and bias
e5 = (zout-t) * (zout * (1 - zout)) * h1out #e5 is error at weight 5 etc
e6 = (zout-t) * (zout * (1 - zout)) * h2out
e1 = (zout-t) * (zout * (1 - zout)) * w[4] * (h1out * (1 - h1out)) * x
e2 = (zout-t) * (zout * (1 - zout)) * w[4] * (h1out * (1 - h1out)) * y
e3 = (zout-t) * (zout * (1 - zout)) * w[5] * (h2out * (1 - h2out)) * x
e4 = (zout-t) * (zout * (1 - zout)) * w[5] * (h2out * (1 - h2out)) * y
be3 = (zout-t) * (zout * (1 - zout))
be1 = (zout-t) * (zout * (1 - zout)) * w[4] * (h1out * (1 - h1out))
be2 = (zout-t) * (zout * (1 - zout)) * w[5] * (h2out * (1 - h2out))
#updating weights and biases
w[0] = w[0] - (e1 * lr)
w[1] = w[1] - (e2 * lr)
w[2] = w[2] - (e3 * lr)
w[3] = w[3] - (e4 * lr)
w[4] = w[4] - (e5 * lr)
w[5] = w[5] - (e6 * lr)
b[2] = b[2] - (be3 * lr)
b[0] = b[0] - (be1 * lr)
b[1] = b[1] - (be2 * lr)
train_data = []
while len(train_data)<tda: #makes training data
x = random.randrange(100)
y = random.randrange(100)
if dt == '1':
if x+y>100:
truth = 1
else:
truth = 0
elif dt == '2':
if x*y>1000:
truth = 1
else:
truth = 0
elif dt == '3':
if x>50 or y>50:
truth = 1
else:
truth = 0
elif dt == '4':
if x+y>60 and x+y<140:
truth = 1
else:
truth = 0
if [x,y,truth] not in train_data:
train_data.append([x,y,truth])
for n in range(epochs): #EPOCHS
for i in train_data:
Net(i[0], i[1], i[2])
if n%1000 == 0 and n != 0:
print (n, 'epochs')
if n%5000 == 0:
#every 5000 epochs the net is fed new test data and results are plotted
test_data = []
while len(test_data)<(50): #makes test data
x = random.randrange(100)
y = random.randrange(100)
if [x, y] not in test_data and [x, y, 0] not in train_data and [x, y, 1] not in train_data:
test_data.append([x, y])
#classifying the test data with the net
for i in test_data:
x = i[0]
y = i[1]
h1 = x*w[0]+y*w[1]+b[0]
h1out = sig(h1)
h2 = x*w[2]+y*w[3]+b[1]
h2out = sig(h2)
z = h1out*w[4]+h2out*w[5]+b[2]
zout = step(z)
i.append(zout)
print (test_data)
print (w, b)
#plotting results
import matplotlib.pyplot as plt
x0 = []
y0 = []
x1 = []
y1 = []
xt0 = []
yt0 = []
xt1 = []
yt1 = []
for i in train_data:
if i[2] == 0:
x0.append(i[0])
y0.append(i[1])
else:
x1.append(i[0])
y1.append(i[1])
for i in test_data:
if i[2] == 0:
xt0.append(i[0])
yt0.append(i[1])
else:
xt1.append(i[0])
yt1.append(i[1])
plt.clf()
plt.scatter(xt0, yt0, 30, color = 'red')
plt.scatter(xt1, yt1, 30, color = 'blue')
plt.scatter(x0, y0, 10, color = 'orange')
plt.scatter(x1, y1, 10, color = 'green')
plt.title(str(tda)+" training data, "+str(epochs)+" epochs \n Red/Orange are small things, Blue/Green are big things \n Orange/Green are training data, Red/Blue are test data")
plt.xlabel("Width")
plt.ylabel("Height")
plt.savefig('plot.png')

Understanding timesteps in scipy.integrate.odeint

I am trying to solve a PDE using odeint and the method of lines. My code is definitely wrong - and I'm trying to figure out where it is going wrong.
I am calling the ode solver using odeint(odefunc,y0,tspan) where tspan = np.linspace(0.0, 0.5, 5) & y0 = 1.0*np.ones(3).
I tried printing t within odefunc and am confused by the output. Despite the fact that I am solving up to t=0.5, the last t-value to print is 0.015081203121127767. The number of outputs matches tspan, but I cannot see how it could possibly be solving up to t = 0.5 when the last time in the de function is 0.015. What am I missing?
My DE is time dependent - so this is making it very hard to figure out where things are going wrong because I don't seem to be seeing the times where everything fails.
ETA - this is failing, but running this without some of the irrelevant stuff I am getting the warning ODEintWarning: Excess work done on this call (perhaps wrong Dfun type). Run with full_output = 1 to get quantitative information., which I'm assuming is part of the issue - but it doesn't appear to be halting the code.
MWE
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
import math
import sys
plt.interactive(False)
sigma = 2320
rho = 1000
gravity = 9.81 # [m/s^2]
g = gravity*3600*3600 # [m/hour^2]
S = 0.01
settlingVelocity = 0.02 # [m/s]
ws = settlingVelocity*3600 # [m/hour]
n = 0.04 # [SI]
J = 400 # [Ws/m]
k = 0.02
Cstar = 0.2 * sigma # [kg/m^3]
W = 2 # [m]
D0 = 1.2
Lw = 20
L = 100
tend = 0.5 # in hours
tspan = np.linspace(0.0, tend, 5)
def d(t): # metres
if t < 50: # hours
return 0.5
else:
return 0.05
def Q(t):
return 3600 * (math.sqrt(S)/n)*((W*d(t))**(5/3))/((2*d(t) + W)**(2/3))
def h(t):
return d(t)/2
def beta(t):
return (sigma - rho) * g * h(t)/sigma
def Omega(t):
return rho * g * S * Q(t) # [W/m]
def PsiTime(t):
return rho * g * Q(t) * (D0 - d(t))/(Lw)
N = 10
X = np.linspace(0, L, N)
delX = L/ (N-1)
def odefunc(y, t):
def zetaEh(t):
return k * (PsiTime(t) + Omega(t)) / (J + beta(t))
def zetaEW(t):
return (2*d(t)/(W + 2*d(t))) * k * Omega(t)/(J + beta(t))
def zetaR(t):
return (W/(W + 2*d(t))) * k*Omega(t)/(beta(t))
def zetaEF(t,i):
return (W/(W + 2*d(t))) * k * Omega(t) / (J + beta(t))
C = y[:N]
M = y[N:]
print("time: ", t)
dCdt = np.zeros(X.shape)
dMdt = np.zeros(X.shape)
dCdt[0] = ( # forward difference for dCdx
-Q(t) / (W*d(t)) * (C[1] - C[0]) / delX
+ (zetaEh(t) / (W * d(t))) * ((Cstar - C[0]) / Cstar)
- (ws * C[0] * (beta(t))) / (d(t) * (J + beta(t)))
)
dMdt[0] = 0
# gully channel
for i in range (1, N-1): # central difference
if M[i] + W *C[i] * ws - zetaR(t) * (Cstar - C[i]) / Cstar < 0:
reMass = M[i] + W * C[i] * ws
dCdt[i] = (
-Q(t) / (W*d(t)) * (C[i+1] - C[i - 1]) / (2*delX)
+ 1 / (W * d(t)) * ((zetaEW(t) + zetaEF(t,i)) * (Cstar - C[i]) / Cstar
+ reMass * (1 - (beta(t))/ (J + beta(t))))
- C[i] * ws/d(t)
)
dMdt[i] = -M[i]
else:
dCdt[i] = (
-Q(t) / (W*d(t)) * (C[i+1] - C[i - 1]) / (2*delX)
+ 1 / (W * d(t)) * (zetaEW(t) + zetaR(t)) * (Cstar - C[i]) / Cstar
- C[i] * ws / d(t)
)
dMdt[i] = W * C[i] * ws - zetaR(t) * (Cstar - C[i]) / Cstar
# Final node - backward difference
if M[N-1] + W * C[N-1] * ws - zetaR(t) * (Cstar - C[N-1]) / Cstar < 0:
reMass = M[N-1] + W * C[N-1] * ws
dCdt[N-1] = (
-Q(t) / (W * d(t)) * (C[N-1] - C[N-2]) / delX
+ 1 / (W * d(t)) * ((zetaEW(t) + zetaEF(t, i)) * (Cstar - C[N-1]) / Cstar
+ reMass * (1 - (beta(t)) / (J + beta(t))))
- C[i] * ws / d(t)
)
dMdt[N-1] = -M[N-1]
else:
dCdt[N-1] = (
-Q(t) / (W * d(t)) * (C[N-2] - C[N - 1]) / delX
+ 1 / (W * d(t)) * (zetaEW(t) + zetaR(t)) * (Cstar - C[N-1]) / Cstar
- C[N-1] * ws / d(t)
)
dMdt[N-1] = W * C[N-1] * ws - zetaR(t) * (Cstar - C[N-1]) / Cstar
dydt = np.ravel([dCdt, dMdt])
return dydt
init_C = 0.0 * np.ones(X.shape)
init_M = 0.0 * np.ones(X.shape)
init= np.ravel([init_C, init_M])
sol = odeint(odefunc, init, tspan)
conc = sol[:, :N]

Why my handmade numpy neural network doesn't learn?

As an exercise I was building a neural network in numpy from scratch.
For simplicity I wanted to use it to solve XOR problem. I derived all the equation and put everything together, but it looks like my network doesn't learn. I've spent some time trying to spot the mistake, but without success. Maybe you notice something I'm missing here?
X = [(0,0), (1,0), (0,1), (1,1)]
Y = [0, 1, 1, 0]
w1 = 2 * np.random.random(size=(2,3)) - 1
w2 = 2 * np.random.random(size=(3,1)) - 1
b1 = 2 * np.random.random(size=(1,3)) - 1
b2 = 2 * np.random.random(size=(1,1)) - 1
def sigmoid(x):
return 1./(1 + np.exp(-x))
def dsigmoid(y):
return y*(1-y)
N = 1000
error = np.zeros((N,1))
for n in range(N):
Dw_1 = np.zeros((2,3))
Dw_2 = np.zeros((3,1))
Db_1 = np.zeros((1,3))
Db_2 = np.zeros((1,1))
for i in range(len(X)): # iterate over all examples
x = np.array(X[i])
y = np.array(Y[i])
# Forward pass, 1st layer
act1 = np.dot(w1.T, x) + b1
lay1 = sigmoid(act1)
# Forward pass, 2nd layer
act2 = np.dot(w2.T, lay1.T) + b2
lay2 = sigmoid(act2)
# Computing error
E = 0.5*(lay2 - y)**2
error[n] += E[0]
# Backprop, 2nd layer
delta_l2 = (y-lay2) * dsigmoid(lay2)
corr_w2 = (delta_l2 * lay1).T
corr_b2 = delta_l2 * 1
# Backprop, 1st layer
delta_l1 = np.dot(w2, delta_l2) * dsigmoid(lay1).T
corr_w1 = np.outer(x, delta_l1)
corr_b1 = (delta_l1 * 1).T
Dw_2 += corr_w2
Dw_1 += corr_w1
Db_2 += corr_b2
Db_1 += corr_b1
if n % 1000 == 0:
print y, lay2,
if n % 1000 == 0:
print
w2 = w2 - eta * Dw_2
b2 = b2 - eta * Db_2
w1 = w1 - eta * Dw_1
b1 = b1 - eta * Db_1
error[n] /= len(X)
There were small mistakes in it, I hope this helps you
import numpy as np
import matplotlib.pyplot as plt
X = [(0, 0), (1, 0), (0, 1), (1, 1)]
Y = [0, 1, 1, 0]
eta = 0.7
w1 = 2 * np.random.random(size=(2, 3)) - 1
w2 = 2 * np.random.random(size=(3, 1)) - 1
b1 = 2 * np.random.random(size=(1, 3)) - 1
b2 = 2 * np.random.random(size=(1, 1)) - 1
def sigmoid(x):
return 1. / (1 + np.exp(-x))
def dsigmoid(y):
return y * (1 - y)
N = 2000
error = []
for n in range(N):
Dw_1 = np.zeros((2, 3))
Dw_2 = np.zeros((3, 1))
Db_1 = np.zeros((1, 3))
Db_2 = np.zeros((1, 1))
tmp_error = 0
for i in range(len(X)): # iterate over all examples
x = np.array(X[i]).reshape(1, 2)
y = np.array(Y[i])
layer1 = sigmoid(np.dot(x, w1) + b1)
output = sigmoid(np.dot(layer1, w2) + b2)
tmp_error += np.mean(np.abs(output - y))
d_w2 = np.dot(layer1.T, ((output - y) * dsigmoid(output)))
d_b2 = np.dot(1, ((output - y) * dsigmoid(output)))
d_w1 = np.dot(x.T, (np.dot((output - y) * dsigmoid(output), w2.T) * dsigmoid(layer1)))
d_b1 = np.dot(1, (np.dot((output - y) * dsigmoid(output), w2.T) * dsigmoid(layer1)))
Dw_2 += d_w2
Dw_1 += d_w1
Db_1 += d_b1
Db_2 += d_b2
w2 = w2 - eta * Dw_2
w1 = w1 - eta * Dw_1
b1 = b1 - eta * Db_1
b2 = b2 - eta * Db_2
error.append(tmp_error)
error = np.array(error)
print(error.shape)
plt.plot(error)
plt.show()

Categories

Resources