Why is my MLP always outputting -1 no matter what? - python

I need to take two dimensional input data generated uniformly from the unit square, and then label the data either 1 or -1 based on the XOR of two hypothesis functions, h1(x) = w1^T*x and h2(x) = w2^T*x where w1 = [0, 1, -1] and w2 = [0, 1, 1]. From there, I am supposed to run the data through a three-layer multilayer perceptron using the sign function for theta. For some reason, my MLP outputs -1 for all points no matter what. Where is my error? Here's my code:
import numpy as np
import matplotlib.pyplot as plt
x = np.random.uniform(-.5, .5, (100,3))
x[:,0] = 1
y = np.ones(100)
w1 = np.array([0, 1, -1]).T
w2 = np.array([0, 1, 1]).T
h1 = np.sign(w1[1]*x[:,1] + w1[2]*x[:,2])
h2 = np.sign(w2[1]*x[:,1] + w2[2]*x[:,2])
for i in range(np.size(x,0)):
if (h1[i]<0 and h2[i]>0) or (h1[i]>0 and h2[i]<0):
y[i] = 1
y[i] = -1
#nodes 21 and 31 are just 1 with weights -1.5 and 1.5, respectively
node22 = np.ones((100,1))
node23 = np.ones((100,1))
node32 = np.ones((100,1))
node33 = np.ones((100,1))
out = np.ones((100,1))
for j in range(np.size(x,0)):
node22[j] = np.matmul(w1,x[j,:])
node23[j] = np.matmul(w2,x[j,:])
node32[j] = np.sign(-1.5 + node22[j] - node23[j])
node33[j] = np.sign(-1.5 - node22[j] + node23[j])
out[j] = np.sign(1.5 + node32[j] + node33[j])
print("Layer 2, Node 2:")
print("Layer 2, Node 3:")
print("Layer 3, Node 2:")
print("Layer 3, Node 3:")
error = 0
for k in range(np.size(x,0)):
if y[k] != out[k]:
error +=1
error = error/np.size(x,0)

I figured it out. For nodes 22 and 23, the calculation needs to be inside a sign function, so the j for loop should look like this:
for j in range(np.size(x,0)):
node22[j] = np.sign(np.matmul(w1,x[j,:]))
node23[j] = np.sign(np.matmul(w2,x[j,:]))
node32[j] = np.sign(-1.5 + node22[j] - node23[j])
node33[j] = np.sign(-1.5 - node22[j] + node23[j])
out[j] = np.sign(1.5 + node32[j] + node33[j])


Perli noise - hashing function doesn't return the same vector

I'm trying to implement perlin noise using permutation table and hashing function.
The hashing function should return same vector for a grid point, but it seems that it's not doing it.
I've made some test with a small permutation table
P = [0, 3, 1, 2] where the values 0 to 3 are then used to assaign a vector
def get_constant_vector(p):
# p is the value from the permutation table
h = p % 3 # p & 3
if h == 0:
return [1.0, 1.0]
elif h == 1:
return [-1.0, 1.0]
elif h == 2:
return [1.0, -1.0]
return [-1.0, -1.0]
so if the value from the table is the same and the vector will be the same.
This is how I determine the value from the permutation table for each corner of the bounding box around a point.
P = [0, 3, 1, 2]
P = P * 42 # to absolutely prevent index out of range
for x in range(5):
x = x + 0.5
for y in range(5):
y = y + 0.5
X = int(x) % 4
Y = int(y) % 4
value_TL = P[P[X] + Y + 1] # top left corner
value_TR = P[P[X + 1] + Y + 1] # top right corner
value_BR = P[P[X + 1] + Y] # bottom right corner
value_BL = P[P[X] + Y] # bottom left corner
print(f"x={x}, y={y}, BL = {value_BL}, BR = {value_BR}, TL = {value_TL}, TR = {value_TR}")
This produces the following:
x=1.5, y=1.5, BL = 0, BR = 1, TL = 3, TR = 2 vector here is BR 1
x=1.5, y=2.5, BL = 3, BR = 2, TL = 1, TR = 0 vector here is TR 0
x=2.5, y=1.5, BL = 1, BR = 2, TL = 2, TR = 0 vector here is BL 1
x=2.5, y=2.5, BL = 2, BR = 0, TL = 0, TR = 3 vector here is TL 0
They suppose to be the same, but they're not.
Where am I wrong in my assumptions?

Python simple Convolution in numpy

I have written this simple convolution function in numpy. But the final array values are all still zero.
Please help me correct this function.
def convolve(a_prev, w, b):
pad = 0
stride = 1
s1 = a_prev.shape
s2 = w.shape
f = s2[1]
m = s1[0]
n_c = s2[0]
n_h = int((s1[1] - f + 2 * pad) / stride) + 1
n_w = int((s1[2] - f + 2 * pad) / stride) + 1
a = np.zeros((m,n_h,n_w,n_c), dtype=np.float32)
for n in range(m):
for z in range(n_c):
y = 0
x = 0
while ((y+f) <= n_h):
# Edit: forget to inialize the x = 0
while ((x+f) <= n_w):
#a[n,y,x,z] = np.sum(a_prev[n,y:y+f,x:x+f]*w[z]) + b[z,0]
a[n,y,x,z] = np.sum(np.multiply(a_prev[n,y:y+f,x:x+f],w[z])) + b[z,0]
x += stride
y += stride
return a
shape of a_prev is [num_exmamples,height, width, 3] and w is [num_filters,3,3,3]
I found the reason, why it was not working, i make a programming error and forget to initialize the x = 0 before while loop.
Its working fine now.
Below is the correct function.
def convolve(a_prev, kernel, b, pad = 0, stride = 1):
m = a_prev.shape[0]
prev_h = a_prev.shape[1]
prev_w = a_prev.shape[2]
f = kernel.shape[1]
n_c = kernel.shape[0]
new_h = int((prev_h - f + 2 * pad) / stride) + 1
new_w = int((prev_w - f + 2 * pad) / stride) + 1
az = np.zeros((m,new_h,new_w,n_c), dtype=np.float32)
for n in range(m):
for z in range(n_c):
y = 0
while (y+f) <= prev_h:
x = 0
while (x+f) <= prev_w:
az[n,y,x,z] = np.sum(a_prev[n,y:y+f,x:x+f]*kernel[z]) + b[z,0]
x += stride
y += stride
return az
There is an easier way to do convolution without using the previous input as a function input.
import numpy as np
def convolution(x, h):
# x and h are numpy arrays
M, N = np.size(x), np.size(h)
y = np.zeros(M+N-1)
# Initialise y with the length of the output signal
for m in np.arange(M):
for n in np.arange(N):
y[m+n] += x[m]*h[n]
return y
This function uses the basic definition of convolution for discrete signals

autograd differentiation example in PyTorch - should be 9/8?

In the example for the Torch tutorial for Python, they use the following graph:
x = [[1, 1], [1, 1]]
y = x + 2
z = 3y^2
o = mean( z ) # 1/4 * x.sum()
Thus, the forward pass gets us this:
x_i = 1, y_i = 3, z_i = 27, o = 27
In code this looks like:
import torch
# define graph
x = torch.ones(2, 2, requires_grad=True)
y = x + 2
z = y * y * 3
out = z.mean()
# if we don't do this, torch will only retain gradients for leaf nodes, ie: x
# does a forward pass
print(z, out)
however, I get confused at the gradients computed:
# now let's run our backward prop & get gradients
print(f'do/dz = {z.grad[0,0]}')
which outputs:
do/dx = 4.5
By chain rule, do/dx = do/dz * dz/dy * dy/dx, where:
dy/dx = 1
dz/dy = 9/2 given x_i=1
do/dz = 1/4 given x_i=1
which means:
do/dx = 1/4 * 9/2 * 1 = 9/8
However this doesn't match the gradients returned by Torch (9/2 = 4.5). Perhaps I have a math error (something with the do/dz = 1/4 term?), or I don't understand autograd in Torch.
Any pointers?
do/dz = 1 / 4
dz/dy = 6y = 6 * 3 = 18
dy/dx = 1
therefore, do/dx = 9/2

Error in Optimization with Lagrange Multiplier

I'm trying to maximize/minimize a function with two variables using Lagrange Multiplier method, below is my code
import numpy as np
from scipy.optimize import fsolve
Sa = 200
Sm = 100
n = 90
mu1 = 500
mu2 = 150
sigma1 = 25
sigma2 = 10
f = 0.9
def func(X):
u1 = X[0]
u2 = X[1]
L = X[2] # this is the multiplier. lambda is a reserved keyword in python
'function --> f(u1,u2) = u1**2 + u2**2'
'constraint --> g(u1,u2) = (Snf/a)**1/b - n = 0'
Snf = Sa/(1-Sm/(sigma1*u1 + mu1))
a = (f*(sigma1*u1 + mu1)**2)/(sigma2*u2 + mu2)
b = -1/3*(f*(sigma1*u1 + mu1))/(sigma2*u2 + mu2)
return (u1**2+u2**2 - L * ((Snf/a)**1/b) - n)
def dfunc(X):
dLambda = np.zeros(len(X))
h = 1e-3 # this is the step size used in the finite difference.
for i in range(len(X)):
dX = np.zeros(len(X))
dX[i] = h
dLambda[i] = (func(X+dX)-func(X-dX))/(2*h);
return dLambda
# this is the max
X1 = fsolve(dfunc, [1, 1, 0])
print (X1, func(X1))
# this is the min
X2 = fsolve(dfunc, [-1, -1, 0])
print (X2, func(X2))
When I try to do a simple function as the constraint such as u1+u2=4 or u1^2+u2^2 = 20, it works just fine , but when I try my actual constraint function it always gives this error, is there a reason why?? THanks for the help
RuntimeWarning: The iteration is not making good progress, as measured by the
improvement from the last five Jacobian evaluations.
warnings.warn(msg, RuntimeWarning)

change scaling for the schrodinger equation

I need to change the scaling for my plot on the Schrodinger equation, y axis to show a difference between the theoretical calculation and ours which is about a 0.01 percent difference. so on the plot I am getting the scale is not small enough to show a difference. Here is the code from my project.
# -*- coding: utf-8 -*-
Created on Sat Nov 05 12:25:14 2016
#author: produce
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
c = .5 / 500 # c = delta x
x = np.arange(0, .5, c) # creates array of argument values from 0 to 1/2 in increments
# of delta x = c
psi = np.zeros(len(x)) # creates array of zeros which will be replaced by y values
k = 20 # starting energy for calculator of E
ans = 0 # The value of k, when we have y as between 0.004 and 0
ansPsi = 0
diff = 0.001
increment = 0.0001
done = False
while 1:
# print k
psi[0] = 1
psi[1] = 1
for i in range(0, len(x) - 2):
psi[i + 2] = psi[i + 1] + (psi[i + 1] - psi[i]) - 2 * k * c * c * psi[i]
# plt.plot(x,psi)
# print(x,psi)
# print (psi[i+2]--->)
if (float(psi[i + 2]) < 0.004 and float(psi[i + 2]) > 0):
ans = k
ansPsi = psi[i + 2]
while 1: # would be an infinite loop, but have a break statement
# k = k - 0.00001
k = k + increment
for i in range(0, len(x) - 2):
psi[i + 2] = psi[i + 1] + (psi[i + 1] - psi[i]) - 2 * k * c * c * psi[i]
plt.plot(x, psi, 'r') #red solid line
if (psi[i + 2] > ansPsi or psi[i + 2] < 0):
done = True
ansPsi = psi[i + 2]
ans = k
# print (k, psi[i+2])
if done:
k = k - diff
print("Value of k:", ans, "Value of Y:", ansPsi) # prints our answer for energy and psi[1/2]
k1 = 10 # 1st Higher Energy Value
k2 = 7 # 2nd Higher Energy Value
k3 = 3 # 1st Lower Energy Value
k4 = 1 # 2nd Lower Energy Value
kt = np.pi * np.pi * .5 # theoretical value
psi1 = np.zeros(len(x))
psi1[0] = 1
psi1[1] = 1
for i in range(0, len(x) - 2):
psi1[i + 2] = psi1[i + 1] + (psi1[i + 1] - psi1[i]) - 2 * k1 * c * c * psi1[i]
# psi2 = np.zeros(len(x))
# psi2[0] = 1
# psi2[1] = 1
# for i in range (0,len(x)-2):
# psi2[i+2] = psi2[i+1] + (psi2[i+1] - psi2[i]) - 2*k2*c*c*psi2[i]
# plt.plot(x,psi2,'k')
# psi3 = np.zeros(len(x))
# psi3[0] = 1
# psi3[1] = 1
# for i in range (0,len(x)-2):
# psi3[i+2] = psi3[i+1] + (psi3[i+1] - psi3[i]) - 2*k3*c*c*psi3[i]
# plt.plot(x,psi3,'p')
psi4 = np.zeros(len(x))
psi4[0] = 1
psi4[1] = 1
for i in range(0, len(x) - 2):
psi4[i + 2] = psi4[i + 1] + (psi4[i + 1] - psi4[i]) - 2 * k4 * c * c * psi4[i]
plt.plot(x, psi, 'r-', label='Corrected Energy')
psiT = np.zeros(len(x))
psiT[0] = 1
psiT[1] = 1
for i in range(0, len(x) - 2):
psiT[i + 2] = psiT[i + 1] + (psiT[i + 1] - psiT[i]) - 2 * kt * c * c * psiT[i]
plt.plot(x, psiT, 'b-', label='Theoretical Energy')
plt.ylabel("Value of Psi")
plt.xlabel("X value from 0 to 0.5")
plt.title("Schrodingers equation for varying inital energy")
The code you shared fails since plt.yscale() needs an argument. I simply commented that line out.
Because your theoretical energy curve and your corrected energy curve differ by so little, it is not possible to scale the y-axis and still see both curves over the full range of x (ie - from 0 to 0.5). Instead, maybe you should plot the difference of the two curves?
plt.plot(x, psiT-psi)
plt.title("Size of Correction for Varying Initial Energy")
plt.xlabel("X value from 0 to 0.5")
Also, it might be nice to tack some units on the x and y labels. :)

