I tried writing a Python code to solve the following optimization problem, but the code has many issues. Please help me fix the errors.
Problem:
max ∑i∈M ∑j∈U (Rij · xij)
x,h
subject to C1 : ∑i∈M xij = 1, ∀j, k
C2 : xij ∈ {0, 1}, ∀j ∈ U, i ∈ M
C3 : ∑j∈U xij ≤ Nj , ∀i ∈ M
C4 : ∑j∈U xij *̃hij ≥ hi_th, ∀i ∈ M
C5 : ∑i∈M Rij ≥ Rjmin , ∀j ∈ U
C6 : Pr {τij ≥ τj,max} ≤ τj, ∀j
where M is the number of base stations and U is the number of users, Rij is the data rate, xij is the association vector, Nj is the maximum number of users that a base station can accept based on its capacity, hij is the channel gain, hi_th is a certain threshold, Rjmin is the minimum acceptable data rate for a user, τij is the delay of data between user and base station, τj,max is the maximum delay that a user can tolerate, τj is a certain delay threshold.
Also, note that Pr {τij ≥ τj,max} is the delay outage probability of user and is equal to e^−(Rij −dj,max) *τj,max ≤ τj where dj,max is the maximum data arrival rate of user j
from SciPy. Optimize import minimize
import numpy as np
import matplotlib.pyplot as plt
# Define the number of base stations and users
M = 10
U = 5
# Define the random variables
w = np.random.uniform(0,10e9, size = (M,U)) # total bandwidth
u = np.random.randint(1,10, size = (M,U)) # number of users
transmitted_power = np.random.uniform(0,100, size = (M,U)) # transmitted power
antenna_gain = np.random.uniform(0,10, size = (M,U)) # antenna gain
distance = np.random.uniform(0,1, size = (M,U)) # distance
noise = np.random.uniform(0,1e-9, size = (M,U)) # noise
interference = np.random.uniform(0,1e-9, size = (M,U)) # interference
# Define the maximum number of users that a base station can accept
N = np.random.randint(1,10,size=M)
# Define the channel gain
h = np.random.rand(M,U)
hi_th = 0.5 # certain threshold
Rjmin = np.random.rand(U)
Rjmin_new = np.zeros((U,1))
Rjmin_new[:,0] = Rjmin
τj_max = np.random.rand(M, U)
τj = np.random.rand(M, U)
τ = np.random.rand(M, U)
# Define the data rate function
def data_rate(h,transmitted_power,antenna_gain,distance,w,u,noise,interference):
SINR = (transmitted_power*antenna_gain*distance*h)/(noise+interference)
rate = (w/u)*np.log2(1+SINR)
return rate
# Define the objective function
def objective(x, h,transmitted_power,antenna_gain,distance,w,u,noise,interference):
x = x.reshape(M, U)
R = data_rate(h,transmitted_power,antenna_gain,distance,w,u,noise,interference)
obj = np.sum(R * x)
return obj
def constraint1(x, h,transmitted_power,antenna_gain,distance,w,u,noise,interference):
x = x.reshape(M, U)
return np.ravel(np.sum(x, axis=0) - 1)
def constraint2(x):
return np.ravel(x.astype(int))
def constraint3(x, h,transmitted_power,antenna_gain,distance,w,u,noise,interference):
x = x.reshape(M, U)
return np.ravel(np.sum(x, axis=1) - N)
def constraint4(x, h,transmitted_power,antenna_gain,distance,w,u,noise,interference):
x = x.reshape(M, U)
R = data_rate(h,transmitted_power,antenna_gain,distance,w,u,noise,interference)
return np.ravel(Rjmin_new - np.min(R * x, axis=1))
def constraint5(x, h,transmitted_power,antenna_gain,distance,w,u,noise,interference):
x = x.reshape(M, U)
τj = np.random.rand(M, U)
τ = np.random.rand(M, U)
return np.ravel(τj - np.max(τ * x, axis=1, keepdims=True)[:, np.newaxis])
def constraint6(x, h,transmitted_power,antenna_gain,distance,w,u,noise,interference):
x = x.reshape(M, U)
τj_max = np.random.rand(M, U)
return np.ravel(np.max(τj_max * x, axis=1, keepdims=True)[:, np.newaxis] - τj)
# Define the optimization problem
x0 = np.random.rand(M, U)
x0 = x0.flatten()
b = (1,1)
bnds = (b, b)
bnds = tuple(bnds for _ in range(M*U))
# Define the constraints
con1 = {'type':'eq','fun': constraint1,'args': (h,transmitted_power,antenna_gain,distance,w,u,noise,interference)}
con2 = {'type':'eq','fun': constraint2,'args': (h,transmitted_power,antenna_gain,distance,w,u,noise,interference)}
con3 = {'type':'eq','fun': constraint3,'args': (h,transmitted_power,antenna_gain,distance,w,u,noise,interference)}
con4 = {'type':'eq','fun': constraint4,'args': (h,transmitted_power,antenna_gain,distance,w,u,noise,interference)}
con5 = {'type':'eq','fun': constraint5,'args': (h,transmitted_power,antenna_gain,distance,w,u,noise,interference)}
con6 = {'type':'eq','fun': constraint6,'args': (h,transmitted_power,antenna_gain,distance,w,u,noise,interference)}
cons = ([con1, con2, con3, con4, con5, con6])
solution = minimize(objective, x0, args=(h,transmitted_power,antenna_gain,distance,w,u,noise,interference), bounds=bnds, constraints=cons)
print(solution)
x = solution.x.reshape(M, U)
plt.imshow(x, cmap='gray', interpolation='nearest')
plt.show()`
I tried reshaping the function because there was a mismatch in sizes. I need to correct the code
Related
As mentioned above, the function below works, however its very slow. I am very interested in using faster/optimised numpy (or other) vectorized alternatives. I have not posted the entire script here due to it being too large.
My specific question is - are there suitable numpy (or other) functions that I can use to 1) reduce run time and 2) reduce code volume of this function, specifically the for loop?
Edit: mass, temp, U and dpdh are functions that carry out simple algebraic calculations and return constants
def my_system(t, y, n, hIn, min, mAlumina, cpAlumina, sa, V):
dydt = np.zeros(3 * n) #setting up zeros array for solution (solving for [H0,Ts0,m0,H1,Ts1,m1,H2,Ts2,m2,..Hn,Tsn,mn])
# y = [h_0, Ts_0, m_0, ... h_n, Ts_n, m_n]
# y[0] = hin
# y[1] = Ts0
# y[2] = minL
i=0
## Using thermo
T = temp(y[i],P) #initial T
m = mass(y[i],P) #initial m
#initial values
dydt[i] = (min * (hIn - y[i]) + (U(hIn,P,min) * sa * (y[i + 1] - T))) / m # dH/dt (eq. 2)
dydt[i + 1] = -(U(hIn,P,min) * sa * (y[i + 1] - T)) / (mAlumina * cpAlumina) # dTs/dt from eq.3
dmdt = dydt[i] * dpdh(y[i], P) * V # dm/dt (holdup variation) eq. 4b
dydt[i + 2] = min - dmdt # mass flow out (eq.4a)
for i in range(3, 3 * n, 3): #starting at index 3, and incrementing by 3 because we are solving for 'triplets' [h,Ts,m] in each loop
## Using thermo
T = temp(y[i],P)
m = mass(y[i],P)
# [h, TS, mdot]
dydt[i] = (dydt[i-1] * (y[i - 3] - y[i]) + (U(y[i-3], P, dydt[i-1]) * sa * (y[i + 1] - T))) / m # dH/dt (eq.2), dydt[i-1] is the mass of the previous tank
dydt[i + 1] = -(U(y[i-3], P, dydt[i-1]) * sa * (y[i + 1] - T)) / (mAlumina * cpAlumina) # dTs/dt eq. (3)
dmdt = dydt[i] * dpdh(y[i], P) * V # Equation 4b
dydt[i + 2] = dydt[i-1] - dmdt # Equation 4a
return dydt
The functions mass, temp, U, and dpdh used inside the my_system function all take numbers as input, perform some simple algebraic operation and return a number (no need to optimise these I am just providing them for further context)
def temp(H,P):
# returns temperature given enthalpy (after processing function)
T = flasher.flash(H=H, P=P, zs=zs, retry=True).T
return T
def mass(H, P):
# returns mass holdup in mol
m = flasher.flash(H=H, P=P, zs=zs, retry=True).rho()*V
return m
def dpdh(H, P):
res = flasher.flash(H=H, P=P, zs=zs, retry=True)
if res.phase_count == 1:
if res.phase == 'L':
drho_dTf = res.liquid0.drho_dT()
else:
drho_dTf = res.gas.drho_dT()
else:
drho_dTf = res.bulk._equilibrium_derivative(of='rho', wrt='T', const='P')
dpdh = drho_dTf/res.dH_dT_P()
return dpdh
def U(H,P,m):
# Given T, P, m
air = Mixture(['nitrogen', 'oxygen'], Vfgs=[0.79, 0.21], H=H, P=P)
mu = air.mu*1000/mWAir #mol/m.s
cp = air.Cpm #J/mol.K
kg = air.k #W/m.K
g0 = m/areaBed #mol/m2.s
a = sa*n/vTotal #m^2/m^3 #QUESTIONABLE
psi = 1
beta = 10
pr = (mu*cp)/kg
re = (6*g0)/(a*mu*psi)
hfs = ((2.19*(re**1/3)) + (0.78*(re**0.619)))*(pr**1/3)*(kg)/diameterParticle
h = 1/((1/hfs) + ((diameterParticle/beta)/kAlumina))
return h
Reference Image:
enter image description here
For improving the speed, you can see Numba, which is useable if you use NumPy a lot but not every code can be used with Numba. Apart from that, the formulation of the equation system is confusing. You are solving 3 equations and adding the result to a single dydt list by 3 elements each. You can simply create three lists, solve each equation and add them to their respective list. For this, you need to re-write my_system as:
import numpy as np
def my_system(t, RHS, hIn, Ts0, minL, mAlumina, cpAlumina, sa, V):
# get initial boundary condition values
y1 = RHS[0]
y2 = RHS[1]
y3 = RHS[2]
## Using thermo
T = # calculate T
m = # calculate m
# [h, TS, mdot] solve dy1dt for h, dy2dt for TS and dy3dt for mdot
dy1dt = # dH/dt (eq.2), y1 corresponds to initial or previous value of dy1dt
dy2dt = # dTs/dt eq. (3), y2 corresponds to initial or previous value of dy2dt
dmdt = # Equation 4b
dy3dt = # Equation 4a, y3 corresponds to initial or previous value of dy3dt
# Left-hand side of ODE
LHS = np.zeros([3,])
LHS[0] = dy1dt
LHS[1] = dy2dt
LHS[2] = dy3dt
return LHS
In this function, you can pass RHS as a list with initial values ([dy1dt, dy2dt, dy3dt]) which will be unpacked as y1, y2, and y3 respectively and use them for respective differential equations. The solved equations (next values) will be saved to dy1dt, dy2dt, and dy3dt which will be returned as a list LHS.
Now you can solve this using scipy.integrate.odeint. Therefore, you can leave the for loop structure and solve the equations by using this method as follows:
hIn = #some val
Ts0 = #some val
minL = #some val
mAlumina = #some vaL
cpAlumina = #some val
sa = #some val
V = #some val
P = #some val
## Using thermo
T = temp(hIn,P) #initial T
m = mass(hIn,P) #initial m
#initial values
y01 = # calculate dH/dt (eq. 2)
y02 = # calculate dTs/dt from eq.3
dmdt = # calculate dm/dt (holdup variation) eq. 4b
y03 = # calculatemass flow out (eq.4a)
n = # time till where you want to solve the equation system
y0 = [y01, y02, y03]
step_size = 1
t = np.linspace(0, n, int(n/step_size)) # use that start time to which initial values corresponds
res = odeint(my_sytem, y0, t, args=(hIn, Ts0, minL, mAlumina, cpAlumina, sa, V,), tfirst=True)
print(res[:,0]) # print results for dH/dt
print(res[:,1]) # print results for dTs/dt
print(res[:,2]) # print results for Equation 4a
Here, I have passed all the initial values as y0 and chosen a step size of 1 which you can change as per your need.
I'm trying to solve the next numerical optimization problem: find the vector x such that minimizes the cost function 0.5 * norm(Bx - v, 2)^2, where B is matrix and v is a vector. I have implemented two gradient descent algorithms. In one of them I manually tune the step-size, and in the other I calculate it automatically with equation (2.5) from ftp://lsec.cc.ac.cn/pub/yyx/papers/p0504.pdf. The gradient of the cost function is B^T(B*x - v).
Additionally, I compare my implementations with the solve(A, B) function from numpy.linalg, noting that the solution of the optimization problem is the solution of the linear system A*x = b, where A = B^T * B, b = B^T * v. So far, I'm getting poor results: large errors and long running times. I don't know it there is an error in my implementation or this is how these algorithms work in the computational experiments that I set up.
In the computational experiments, I generate random "solution" vectors x, and matrices B. Then compute A and b accordingly.
Any feedback is appreciated.
This is my code:
import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
import time
def residue(x, B, v):
aux = np.dot(B, x) - v
aux = pow(LA.norm(aux, 2), 2)
aux = aux / pow(LA.norm(v, 2), 2)
return aux
def gradGD(x, B, v):
aux = np.dot(B, x) - v
return np.dot(B.T, aux)
def gradientDescent(B, v, alpha, tol, x0):
A = np.dot(B.T, B)
b = np.dot(B.T, v)
x = x0
while True:
res = residue(x, B, v)
print('Residue ', res)
if (res < tol):
break
x = x - alpha * gradGD(x, B, v)
return x
# Gradient descent with auto step-size
def gradientDescentBB(B, v, tol, x0):
x = x0
xpre = np.zeros((N, 1))
flag = 0
while True:
res = residue(x, B, v)
#print('Residue ', res)
if (res < tol):
break
if (flag == 0):
grad = gradGD(x, B, v)
x = x - (1e-06) * grad
flag = 1
continue
gradpre = grad
grad = gradGD(x, B, v)
y = grad - gradpre
s = x - xpre
# print('dot', np.dot(s.T, y))
# print('||y||_2 = ', LA.norm(y, 2))
alpha = np.dot(s.T, y) / pow(LA.norm(y, 2), 2)
# print("alpha = ", alpha)
xpre = x
x = x - alpha * grad
return x
# Solves the optimization problem via Ax * b
def solver(B, v):
A = np.dot(B.T, B)
b = np.dot(B.T, v)
return np.linalg.solve(A, b)
# Main routine
N = 1000
epsilon = 1.0e-6
a = 1/N - epsilon
iter = 20
mytime_iter = []
time2_iter = []
myeabs_iter = []
myerel_iter = []
myepercent_iter = []
cgseabs_iter = []
cgserel_iter = []
cgsepercent_iter = []
# Running the experiment many times
for i in range(iter):
print('Iteration: ', i)
B = a * np.random.randn(N, N) + np.ones((N, N))
#print(B)
x0 = np.random.randn(N, 1) # Real solution of the optmization problem
v = np.dot(B, x0)
mystart = time.time()
# x = gradientDescent(B, v, alpha=1999100e-09, tol=1e-05, x0=np.zeros((N, 1))) # Gradient Descent: Method 1
x = gradientDescentBB(B, v, tol=1e-05, x0=np.zeros((N, 1))) # Gradient Descent: Method 2
myend = time.time()
mytime = myend - mystart
start2 = time.time()
xalt = solver(B, v) # Solution of the optimization problem by solving A*x = b
end2 = time.time()
time2 = start2 - end2
myeabs = LA.norm(x - x0, 2)
myerel = myeabs / LA.norm(x0, 2)
myepercent = myerel * 100
cgseabs = LA.norm(xalt - x0, 2)
cgserel = cgseabs / LA.norm(x0, 2)
cgsepercent = cgserel * 100
mytime_iter.append(mytime)
time2_iter.append(time2)
myeabs_iter.append(myeabs)
myerel_iter.append(myerel)
myepercent_iter.append(myepercent)
cgseabs_iter.append(cgseabs)
cgserel_iter.append(cgserel)
cgsepercent_iter.append(cgsepercent)
plt.figure(1)
plt.plot(mytime_iter, 'bo', label="GD")
plt.plot(time2_iter, 'ro', label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Time (s)")
# plt.ylim(-1.5, 2.0) --
plt.figure(2)
plt.plot(myeabs_iter, "-b", label="GD")
plt.plot(cgseabs_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Absolute error")
plt.figure(3)
plt.plot(myerel_iter, "-b", label="GD")
plt.plot(cgserel_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Relative error")
plt.figure(4)
plt.plot(myepercent_iter, "-b", label="GD")
plt.plot(cgsepercent_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.ylabel("Relative error (%)")
plt.show()
I was making Baysien model Type-II to finding the maximum likelihood to estimate “most probable” values for hyper-parameters. After I try to run the code below I got some of error that shown in compute_posterior function. It might be something wrong with the shape.
def compute_posterior(PHI, t, alph, s2):
M = PHI.shape[1]
beta = 1/s2
H = beta*(PHI.T # PHI) + alph*np.eye(M)
SIGMA = np.linalg.inv(H)
Mu = beta * (SIGMA # (PHI.T # t))
#
return Mu, SIGMA
# Marginal log likelihood
#
# Version 1 Log Marginal (ideal)
#
def compute_log_marginal(PHI, t, alph, s2):
#
# Exploit the shape of C and the fact that M < N (usually)
#
N, M = PHI.shape
beta = 1 / s2
Mu, SIGMA = compute_posterior(PHI, t, alph, s2)
#
# Constant factor
#
logML = -N * np.log(2 * np.pi)
#
# log determinant factor (log|C|)
#
# If SIGMA becomes singular, sgn<0
#
sgn, logdet = np.linalg.slogdet(SIGMA)
#
if sgn < 0:
print("Error with alpha={0}, s2={1}".format(alph, s2))
raise np.linalg.LinAlgError("logdet sign is negative - something is wrong!")
#
logML += logdet + N*np.log(beta) + M*np.log(alph)
#
# data term (t'Ct)
#
logML -= beta * (t.T # (t - PHI # Mu))
#
logML = logML / 2.0
#
return logML
log_alph = np.linspace(-3, 6, n)
log_s2 = np.linspace(-4, 4, n)
x = df.values[:, 0:8]
t = df.values[:, 8]
n = 100
Z = np.empty((n, n))
Z_max = []
al = []
rr = []
for i, a in enumerate(log_alph):
for j, r in enumerate(log_s2):
Z[i, j] = compute_log_marginal(x,t,log_alph,log_s2)
Z_max.append(Z[i, j])
maximum = max(Z_max)
print(maximum)
# if Z[i, j] == -11.627510277032485:
# al.append(a)
# rr.append(r)
# maximum = max(Z_max)
# print(al)
# print(rr)
# print(maximum)
# maximum = -11.627510277032485
plt.contourf(log_a, log_r, Z.T)
print('The maximum point is:',maximum )
print('The max log_alpha is:',al[0] )
print('The max log_r is:',rr[0] )
plt.xlabel('log alpha')
plt.ylabel('log r')
plt.title('Contour of log alpha and log r')
After I compile I got this error, I still don't know how to figure it out
'
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-260-c7f0396b4046> in <module>
14 for i, a in enumerate(log_alph):
15 for j, r in enumerate(log_s2):
---> 16 Z[i, j] = compute_log_marginal(x,t,log_alph,log_s2)
17 Z_max.append(Z[i, j])
18 maximum = max(Z_max)
<ipython-input-233-45cfae272a38> in compute_log_marginal(PHI, t, alph, s2)
19 N, M = PHI.shape
20 beta = 1 / s2
---> 21 Mu, SIGMA = compute_posterior(PHI, t, alph, s2)
22 #
23 # Constant factor
<ipython-input-233-45cfae272a38> in compute_posterior(PHI, t, alph, s2)
2 M = PHI.shape[1]
3 beta = 1/s2
----> 4 H = beta*(PHI.T # PHI) + alph*np.eye(M)
5 SIGMA = np.linalg.inv(H)
6 Mu = beta * (SIGMA # (PHI.T # t))
ValueError: operands could not be broadcast together with shapes (100,) (8,8)
'
H = beta*(PHI.T # PHI) + alph*np.eye(M)
split operations in this line and figure out which one is throwing errors
If your logic is ok, check if all matrices have the shape that you expect.
If shapes are ok note that # is matrix multiplication and * is elementwise multiplication.
Nex time you post code try to include smaller part that is more readable or include code that everyone will be able to execute, to debug it.
I am trying to implement GMM Clustering for both 24 Dimension feature vector and 32 dimension feature vector, where assignment of initial parameters are done by Kmeans algorightm (K mean clustering is providing cluster centers - MU - only).
I am following this link, where it's implemented only for 2D feature vector and predefined Mu and sigma.
If anyone have the code for GMM clustering kindly post.
Predefined Lib for GMM is also there in sklearn, but it's not giving me likelyhood for each iteration. sklearn GMM
def kmeans(dataSet, k, c):
# 1. Randomly choose clusters
rng = np.random.RandomState(c)
p = rng.permutation(dataSet.shape[0])[:k]
centers = dataSet[p]
while True:
labels = pairwise_distances_argmin(dataSet, centers)
new_centers = np.array([dataSet[labels == i].mean(0) for i in range(k)]
if np.all(centers == new_centers):
break
centers = new_centers
cluster_data = [dataSet[labels == i] for i in range(k)]
l = []
covs = []
for i in range(k):
l.append(len(cluster_data[i]) * 1.0 / len(dataSet))
covs.append(np.cov(np.array(cluster_data[i]).T))
return centers, l, covs, cluster_data
return new_mu, new_covs, cluster_data
class gaussian_Mix_Model:
def __init__(self, k = 8, eps = 0.0000001):
self.k = k ## number of clusters
self.eps = eps ## threshold to stop `epsilon`
def calculate_Exp_Maxim(self, X, max_iters = 1000):
# n = number of data-points, d = dimension of data points
n, d = X.shape
mu, Cov = [], []
for i in range(1,k):
new_mu, new_covs, cluster_data = kmeans(dataSet, k, c)
# Initialize new
mu[k] = new_mu
Cov[k]= new_cov
# initialize the weights
w = [1./self.k] * self.k
R = np.zeros((n, self.k))
### LLhoods
LLhoods = []
P = lambda mu, s: np.linalg.det(s) ** -.5 ** (2 * np.pi) ** (-X.shape[1]/2.) \
* np.exp(-.5 * np.einsum('ij, ij -> i',\
X - mu, np.dot(np.linalg.inv(s) , (X - mu).T).T ) )
# Iterate till max_iters iterations
while len(LLhoods) < max_iters:
# Expectation Calcultion
## membership for each of K Clusters
for k in range(self.k):
R[:, k] = w[k] * P(mu[k], Cov[k])
# Finding the log likelihood
LLhood = np.sum(np.log(np.sum(R, axis = 1)))
# Now store the log likelihood to the list.
LLhoods.append(LLhood)
# Number of data points to each clusters
R = (R.T / np.sum(R, axis = 1)).T
N_ks = np.sum(R, axis = 0)
# Maximization and calculating the new parameters.
for k in range(self.k):
# Calculate the new means
mu[k] = 1. / N_ks[k] * np.sum(R[:, k] * X.T, axis = 1).T
x_mu = np.matrix(X - mu[k])
# Calculate new cov
Cov[k] = np.array(1 / N_ks[k] * np.dot(np.multiply(x_mu.T, R[:, k]), x_mu))
# Calculate new PiK
w[k] = 1. / n * N_ks[k]
# check for convergence
if (np.abs(LLhood - LLhoods[-2]) < self.eps) and (iteration < max_iters): break
else:
Continue
from collections import namedtuple
self.params = namedtuple('params', ['mu', 'Cov', 'w', 'LLhoods', 'num_iters'])
self.params.mu = mu
self.params.Cov = Cov
self.params.w = w
self.params.LLhoods = LLhoods
self.params.num_iters = len(LLhoods)
return self.params
# Call the GMM to find the model
gmm = gaussian_Mix_Model(3, 0.000001)
params = gmm.fit_EM(X, max_iters= 150)
# Plotting of Log-Likelihood VS Iterations.
plt.plot(LLhoods[0])
plt.savefig('Dataset_2A_GMM_Class_1_K_16.png')
plt.clf()
plt.plot(LLhoods[1])
plt.savefig('Dataset_2A_GMM_Class_2_K_16.png')
plt.clf()
plt.plot(LLhoods[2])
plt.savefig('Dataset_2A_GMM_Class_3_K_16.png')
plt.clf()
I am trying to make my own implementation of a simple neural network to classify points. I heard about a specific type of activation function that I am interested in testing, the Gaussian. I do not just want to use relus or sigmoids, I am trying to build a network that takes as input about 300 x and y values, then in the first layer computes the Gaussian function on these values with about 50 neurons which each have a separate x and y value as their means (I will keep the sigma constant). Mathematically I anticipate this to look like
exp(- [(x-Mx)^2 + (y-My)^2] / (2 * sigma^2) ) / (sqrt(2*pi*sigma))
then I will perform a weighted sum of these terms over all the neurons in the first layer, add a bias, and pass it through a sigmoid to get my prediction. I will perform this step for each training example and get a list of predictions. I think that I do the forward propagation but I will include the code for that in case someone can spot an obvious error in my implementation. Then I perform the back-propogation. I have tested my updating of the weights and bias, and I believe that they are not the problem. I think that there is something wrong with my implementation of the gradient for the means however because they always cluster to a single point which clearly does not maximize the cost function. I have already tried using a couple of different data sets, and varying some hyper parameters, all to no avail. Can anyone figure out what the problem is?
Here is my code.
# libraries
import matplotlib.patches as patches
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pdb
# functions
def gaussian(sq_error, sigma):
return ((1/np.sqrt(2*np.pi*sigma**2))) * np.exp(-(sq_error)/(2*sigma**2))
def calc_X1(X0, Mx, My, m, sigma):
X1 = [] # shape will be (10, m)
for ex in range(0, m):
sq_error = (X0[0][ex] - Mx) **2 + (X0[1][ex] - My) **2
X1.append(gaussian(sq_error, sigma))
X1 = np.array(X1)
return X1.T
def sigmoid(Z):
return 1 / (1 + np.exp(-Z))
def calc_X2(W2, X1, b2):
return sigmoid(np.dot(W2, X1) + b2)
def cost(X2, Y, m):
return -1/m * ( np.dot(Y, np.log(X2.T)) + np.dot(1-Y, np.log(1-X2.T))) [0]
def calc_dZ2(X2, Y):
return X2 - Y
def calc_dM(dZ2, W2, X1, sigma, M, m, xOrY, X0):
cur_dM = np.zeros(M.shape)
for i in range(0, m):
# pdb.set_trace()
cur_dM += dZ2[0][i] * float(np.dot(W2, X1.T[i])) * 1/sigma**2 * (X0[xOrY][i] - M)
return cur_dM / m
def train_correct(X2, Y, m):
ct = 0
for i in range(0, m):
if np.round(X2[0][i]) == Y[i]:
ct += 1
return ct / m
# graphing functions
def plot_train_data(X, Y, m, ax):
for ex in range(0, m):
xCur = X[0][ex]
yCur = X[1][ex]
if Y[ex] == 1:
color=(1, 0, 0)
else:
color=(0,0,1)
ax.scatter(xCur, yCur, c=color)
def probability_hash(pr):
return (float(pr), float(np.round(pr)), float(1-pr))
def probability_hash_1d(pr):
return float(pr)
def plot_boundary(Mx, My, sigma, W2, b2, ax):
boundsx = [-5, 5]
boundsy = [-5, 5]
samples = [10, 10]
width = (boundsx[1] - boundsx[0]) / samples[0]
height = (boundsy[1] - boundsy[0]) / samples[1]
pt = np.zeros((2,1))
for x in np.linspace(boundsx[0], boundsx[1], samples[0]):
for y in np.linspace(boundsy[0], boundsy[1], samples[1]):
pt[0][0] = x
pt[1][0] = y
X1_cur = calc_X1(pt, Mx, My, 1, sigma)
X2_cur = calc_X2(W2, X1_cur, b2)
# ax.add_patch(patches.Rectangle((x, y), width, height, facecolor=probability_hash(X2_cur)))
ax.scatter(x, y, c=probability_hash(X2_cur))
def cool_plot_boundary(Mx, My, sigma, W2, b2, ax):
boundsx = [-2, 2]
boundsy = [-2, 2]
samples = [50, 50]
width = (boundsx[1] - boundsx[0]) / samples[0]
height = (boundsy[1] - boundsy[0]) / samples[1]
pt = np.zeros((2,1))
heats = []
xs = np.linspace(boundsx[0], boundsx[1], samples[0])
ys = np.linspace(boundsy[0], boundsy[1], samples[1])
for x in xs:
heats.append([])
for y in ys:
pt[0][0] = x
pt[1][0] = y
X1_cur = calc_X1(pt, Mx, My, 1, sigma)
X2_cur = calc_X2(W2, X1_cur, b2)
heats[-1].append(probability_hash_1d(X2_cur))
# xticks = []
# yticks = []
# for i in range(0, len(xs)):
# if i % 3 == 0:
# xticks.append(round(xs[i], 2))
# for i in range(0, len(ys)):
# if i % 3 == 0:
# yticks.append(round(ys[i], 2))
xticks = []
yticks = []
sns.heatmap(heats, ax=ax, cbar=True, xticklabels=xticks, yticklabels=yticks)
def plot_m(Mx, My, n1, ax):
for i in range(0, n1):
ax.scatter(Mx[i], My[i], c="k")
# initialize parameters
file = "data/disk2.csv"
df = pd.read_csv(file)
sigma = 2
itterations = 10000
learning_rate = 0.9
n0 = 2 # DO NOT CHANGE, formality
X0 = np.row_stack((df["0"], df["1"])) # shape is (2, m)
Y = np.array(df["2"])
m = len(Y)
n1 = 50
Mx = np.random.randn(n1)
My = np.random.randn(n1)
X1 = calc_X1(X0, Mx, My, m, sigma)
n2 = 1 # DO NOT CHANGE, formality
small_number = 0.01
W2 = np.random.randn(1, n1) * small_number
b2 = 0
X2 = calc_X2(W2, X1, b2)
J = cost(X2, Y, m)
Js = []
itters = []
fig = plt.figure()
plotGap = 200
for i in range(0, itterations):
# forward propogation
X1 = calc_X1(X0, Mx, My, m, sigma)
X2 = calc_X2(W2, X1, b2)
J = cost(X2, Y, m)
if i % plotGap == 0:
fig.clear()
costAx = fig.add_subplot(311)
plotAx = fig.add_subplot(312)
pointsAx = fig.add_subplot(313)
cool_plot_boundary(Mx, My, sigma, W2, b2, plotAx)
# plot_boundary(Mx, My, sigma, W2, b2, plotAx)
plot_train_data(X0, Y, m, pointsAx)
Js.append(J)
itters.append(i)
costAx.plot(itters, Js, c="k")
print("cost = " + str(J) + "\ttraining correct = " + str(train_correct(X2, Y, m)))
plot_m(Mx, My, n1, pointsAx)
plt.pause(0.1)
# back propogation
dZ2 = calc_dZ2(X2, Y)
dW2 = np.dot(dZ2, X1.T) / m
db2 = np.sum(dZ2) / m
dMx = calc_dM(dZ2, W2, X1, sigma, Mx, m, 0, X0)
dMy = calc_dM(dZ2, W2, X1, sigma, My, m, 1, X0)
b2 -= learning_rate * db2
W2 -= learning_rate * dW2
Mx -= learning_rate * dMx
My -= learning_rate * dMy
For data I have a csv with a bunch of point locations and labels. You can use this code to generate a similar csv. (Make sure you have a folder called data in the folder you run this from).
# makes data in R2 to learn
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
n = 2
# number of exaples
m = 300
X = []
Y = []
# hyperparamers for data
rApprox = 1
error = 0.4
noise = 0.1
name = "data/disk2"
plt.cla()
for ex in range(0, m):
xCur = np.random.randn(2)
X.append(xCur)
if abs(np.linalg.norm(xCur) + np.random.randn()*noise - rApprox) < error:
Y.append(1)
color="r"
else:
Y.append(0)
color="b"
plt.scatter(xCur[0], xCur[1], c=color)
if abs(np.random.randn()) < 0.01:
plt.pause(0.1)
plt.pause(1)
plt.savefig(name + ".png")
X = np.array(X)
Y = np.array(Y)
df = pd.DataFrame(X)
df[2] = Y
df.to_csv(name + ".csv", index=False)
Thanks for your help.
Substitute this function for the calculate dm function. You must be careful when multiplying, it is not just enough that the dimensions work out.
def calculuate_dMs(X0, X1, X2, Mx, My, W2, dZ2, sigma, m, n1):
# pdb.set_trace()
X0x_big = np.dot(np.ones((n1, 1)), X0[0].reshape(1, m))
X0y_big = np.dot(np.ones((n1, 1)), X0[1].reshape(1, m))
Mx_big = np.dot(Mx.reshape(n1, 1), np.ones((1, m)))
My_big = np.dot(My.reshape(n1, 1), np.ones((1, m)))
W2_big = np.dot(W2.reshape(n1, 1), np.ones((1, m)))
dZ2_big = np.dot(np.ones((n1, 1)), dZ2.reshape(1, m))
dxTemp = np.multiply(np.multiply(np.multiply((X0x_big - Mx_big), X1), W2_big), dZ2_big)
dyTemp = np.multiply(np.multiply(np.multiply((X0y_big - My_big), X1), W2_big), dZ2_big)
return (np.sum(dxTemp, axis=1)/m, np.sum(dyTemp, axis=1)/m)