Numerical optimization with Gradient Descent in Python

Numerical optimization with Gradient Descent in Python - python

I'm trying to solve the next numerical optimization problem: find the vector x such that minimizes the cost function 0.5 * norm(Bx - v, 2)^2, where B is matrix and v is a vector. I have implemented two gradient descent algorithms. In one of them I manually tune the step-size, and in the other I calculate it automatically with equation (2.5) from ftp://lsec.cc.ac.cn/pub/yyx/papers/p0504.pdf. The gradient of the cost function is B^T(B*x - v).
Additionally, I compare my implementations with the solve(A, B) function from numpy.linalg, noting that the solution of the optimization problem is the solution of the linear system A*x = b, where A = B^T * B, b = B^T * v. So far, I'm getting poor results: large errors and long running times. I don't know it there is an error in my implementation or this is how these algorithms work in the computational experiments that I set up.
In the computational experiments, I generate random "solution" vectors x, and matrices B. Then compute A and b accordingly.
Any feedback is appreciated.
This is my code:
import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
import time
def residue(x, B, v):
aux = np.dot(B, x) - v
aux = pow(LA.norm(aux, 2), 2)
aux = aux / pow(LA.norm(v, 2), 2)
return aux
def gradGD(x, B, v):
aux = np.dot(B, x) - v
return np.dot(B.T, aux)
def gradientDescent(B, v, alpha, tol, x0):
A = np.dot(B.T, B)
b = np.dot(B.T, v)
x = x0
while True:
res = residue(x, B, v)
print('Residue ', res)
if (res < tol):
break
x = x - alpha * gradGD(x, B, v)
return x
# Gradient descent with auto step-size
def gradientDescentBB(B, v, tol, x0):
x = x0
xpre = np.zeros((N, 1))
flag = 0
while True:
res = residue(x, B, v)
#print('Residue ', res)
if (res < tol):
break
if (flag == 0):
grad = gradGD(x, B, v)
x = x - (1e-06) * grad
flag = 1
continue
gradpre = grad
grad = gradGD(x, B, v)
y = grad - gradpre
s = x - xpre
# print('dot', np.dot(s.T, y))
# print('||y||_2 = ', LA.norm(y, 2))
alpha = np.dot(s.T, y) / pow(LA.norm(y, 2), 2)
# print("alpha = ", alpha)
xpre = x
x = x - alpha * grad
return x
# Solves the optimization problem via Ax * b
def solver(B, v):
A = np.dot(B.T, B)
b = np.dot(B.T, v)
return np.linalg.solve(A, b)
# Main routine
N = 1000
epsilon = 1.0e-6
a = 1/N - epsilon
iter = 20
mytime_iter = []
time2_iter = []
myeabs_iter = []
myerel_iter = []
myepercent_iter = []
cgseabs_iter = []
cgserel_iter = []
cgsepercent_iter = []
# Running the experiment many times
for i in range(iter):
print('Iteration: ', i)
B = a * np.random.randn(N, N) + np.ones((N, N))
#print(B)
x0 = np.random.randn(N, 1) # Real solution of the optmization problem
v = np.dot(B, x0)
mystart = time.time()
# x = gradientDescent(B, v, alpha=1999100e-09, tol=1e-05, x0=np.zeros((N, 1))) # Gradient Descent: Method 1
x = gradientDescentBB(B, v, tol=1e-05, x0=np.zeros((N, 1))) # Gradient Descent: Method 2
myend = time.time()
mytime = myend - mystart
start2 = time.time()
xalt = solver(B, v) # Solution of the optimization problem by solving A*x = b
end2 = time.time()
time2 = start2 - end2
myeabs = LA.norm(x - x0, 2)
myerel = myeabs / LA.norm(x0, 2)
myepercent = myerel * 100
cgseabs = LA.norm(xalt - x0, 2)
cgserel = cgseabs / LA.norm(x0, 2)
cgsepercent = cgserel * 100
mytime_iter.append(mytime)
time2_iter.append(time2)
myeabs_iter.append(myeabs)
myerel_iter.append(myerel)
myepercent_iter.append(myepercent)
cgseabs_iter.append(cgseabs)
cgserel_iter.append(cgserel)
cgsepercent_iter.append(cgsepercent)
plt.figure(1)
plt.plot(mytime_iter, 'bo', label="GD")
plt.plot(time2_iter, 'ro', label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Time (s)")
# plt.ylim(-1.5, 2.0) --
plt.figure(2)
plt.plot(myeabs_iter, "-b", label="GD")
plt.plot(cgseabs_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Absolute error")
plt.figure(3)
plt.plot(myerel_iter, "-b", label="GD")
plt.plot(cgserel_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.xlabel("# Iteration")
plt.ylabel("Relative error")
plt.figure(4)
plt.plot(myepercent_iter, "-b", label="GD")
plt.plot(cgsepercent_iter, "-r", label="solve()")
plt.legend(loc="upper right")
plt.ylabel("Relative error (%)")
plt.show()

Related

How to carry over variables from past defined functions?

I am writing a program that operates out of a main() function. I am unable to change the main function (this is part of a class). How would I go about carrying over a variable from one function to the next, without changing the main()?
def read_data(fname) :
file = fname
x = []
y = []
with open(file) as f:
for line in f:
xi, yi = [float(x) for x in line.split(",")]
x.append(xi)
y.append(yi)
return x, y
def compute_m_and_b(x, y) :
sx, sy, sx2, sxy, sy2 = 0, 0, 0, 0, 0
for i in range(len(x)):
sx += x[i]
sy += y[i]
sx2 += (x[i] ** 2)
sy2 += (y[i] ** 2)
sxy += (x[i] * y[i])
m = (sxy * len(x) - sx * sy) / (sx2 * len(x) - sx**2)
b = (sy - m * sx) / len(x)
return m, b
def compute_fx_residual(x, y, m, b) :
fx = []
for xi in x:
fx.append(m * xi + b)
residual = []
for i in range(len(y)):
residual.append(y[i] - fx[i])
return fx, residual
def compute_sum_of_squared_residuals(residual) :
least_squares_r = 0
for i in range(len(y)) :
least_squares_r += (residual[i]) ** 2
return least_squares_r
def compute_total_sum_of_squares(y) :
sum_squares = 0
ymean = sum(y) / len(y)
for i in range(len(y)) :
sum_squares += (yi - ymean) ** 2
return sum_squares
as you can see, I am restricted to only pulling the variables listed in the parentheses of the def functions. This leads to variables calculated in prior functions being undefined. How can I import them without needing to change the main()?
Please let me know if I should be more clear. I can provide more examples, but I wanted to maintain some brevity.
EDIT: here is the main function:
def main():
fname = input("Enter Input Filename: ")
x, y = regress.read_data(fname)
print()
print("Input File: ", fname)
print("Data points: ", len(x))
#compute coefficients m and b
m, b = regress.compute_m_and_b(x, y)
#compute fx and residual
fx, residual = regress.compute_fx_residual(x, y, m, b)
#compute sum of squared residuals
least_squares_r = regress.compute_sum_of_squared_residuals(residual)
#compute sum of squares
sum_squares = regress.compute_total_sum_of_squares(y)
#compute coefficeint of determination
coeff_of_d = regress.compute_coeff_of_determination(least_squares_r, sum_squares)
regress.print_least_squares(x, y, m, b, fx, residual, least_squares_r, sum_squares, coeff_of_d)
#compute pearson coefficient
pearson_r = regress.compute_pearson_coefficient(x, y)
regress.print_pearson(pearson_r)
return
main()

You haven't provided the main function so it's unclear how you're currently using it.
Looks to me like you can just get the variable for each consecutive function and pass them into the next:
fname = "some/path/to/file.txt"
x, y = read_data(fname)
m, b = compute_m_and_b(x, y, m, b)
fx, residual = compute_fx_residual(x, y, m, b)
least_squares_r = compute_sum_of_squared_residuals(residual)
sum_squares = compute_total_sum_of_squares(y)

Why is tempered mcmc fit not convering well?

I am trying to fit a simple straight line y=mx+c type to some synthetic data using parallel-tempered mcmc. My goal is to just be able to understand how to use it, so that I can apply to some more complex models later. The example I am trying is the replica of what has already been done in a simple emcee code :
http://dfm.io/emcee/current/user/line/
but instead of using mcmc, I want to use parallel-tempered mcmc:
http://dfm.io/emcee/current/user/pt/
Here is a working code:
import numpy as np
from emcee import PTSampler
import emcee
# Choose the "true" parameters.
m_true = -0.9594
b_true = 4.294
f_true = 0.534
# Generate some synthetic data from the model.
N = 50
x = np.sort(10*np.random.rand(N))
yerr = 0.1+0.5*np.random.rand(N)
y = m_true*x+b_true
y += np.abs(f_true*y) * np.random.randn(N)
y += yerr * np.random.randn(N)
def lnlike(theta, x, y, yerr):
m, b, lnf = theta
model = m * x + b
inv_sigma2 = 1.0/(yerr**2 + model**2*np.exp(2*lnf))
return -0.5*(np.sum((y-model)**2*inv_sigma2 - np.log(inv_sigma2)))
def lnprior(theta):
m, b, lnf = theta
if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0:
return 0.0
return -np.inf
def lnprob(theta, x, y, yerr):
lp = lnprior(theta)
if not np.isfinite(lp):
return -np.inf
return lp + lnlike(theta, x, y, yerr)
import scipy.optimize as op
nll = lambda *args: -lnlike(*args)
result = op.minimize(nll, [m_true, b_true, np.log(f_true)], args=(x, y, yerr))
m_ml, b_ml, lnf_ml = result["x"]
init = [0.5, m_ml, b_ml, lnf_ml]
ntemps = 10
nwalkers = 100
ndim = 3
from multiprocessing import Pool
pos = np.random.uniform(low=-1, high=1, size=(ntemps, nwalkers, ndim))
for i in range(ntemps):
#initialize parameters near scipy optima
pos[i:,] = np.array([result["x"] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)])
pool = Pool(processes=4)
sampler=PTSampler(ntemps,nwalkers, ndim, lnlike, lnprior, loglargs=(x, y, yerr), pool=pool)# args=(x, y, yerr))
#burn-in
sampler.run_mcmc(pos, 1000)
sampler.reset()
sampler.run_mcmc(pos, 10000, thin=10)
samples = sampler.chain.reshape((-1, ndim))
print('Number of posterior samples is {}'.format(samples.shape[0]))
#print best fit value together with errors
print(map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
zip(*np.percentile(samples, [16, 50, 84],
axis=0))))
import corner
fig = corner.corner(samples, labels=["$m$", "$b$", "$\ln\,f$"],
truths=[m_true, b_true, np.log(f_true)])
fig.savefig("triangle.png")
The only problem when running this code is I get optimal parameters value which are way off the true values. And increasing the number of walkers or samples is not helping in any sense. Can anyone please advice why tempered-mcmc is not working here?
Update:
I found out a useful package called ptemcee (https://pypi.org/project/ptemcee/#description), although the documentation of this package is non-existent. It seems that this package might be useful, any help on how to implement the same linear fitting with this package would also be highly appreciated.

I have modified some lines
import time
import numpy as np
from emcee import PTSampler
import corner
import matplotlib.pyplot as plt
import scipy.optimize as op
t1 = time.time()
np.random.seed(6) # To reproduce results
# Choose the "true" parameters.
m_true = -0.9594
b_true = 4.294
f_true = 0.534
# Generate some synthetic data from the model.
N = 50
x = np.sort(10 * np.random.rand(N))
yerr = 0.1 + 0.5 * np.random.rand(N)
y_1 = m_true * x + b_true
y = np.abs(f_true * y_1) * np.random.randn(N) + y_1
y += yerr * np.random.randn(N)
plt.plot(x, y, 'o')
# With emcee
def lnlike(theta, x, y, yerr):
m, b, lnf = theta
model = m * x + b
inv_sigma2 = 1.0/(yerr**2 + model**2*np.exp(2*lnf))
return -0.5*(np.sum((y-model)**2*inv_sigma2 - np.log(inv_sigma2)))
def lnprior(theta):
m, b, lnf = theta
if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0:
return 0.0
return -np.inf
def lnprob(theta, x, y, yerr):
lp = lnprior(theta)
if not np.isfinite(lp):
return -np.inf
return lp + lnlike(theta, x, y, yerr)
nll = lambda *args: -lnlike(*args)
result = op.minimize(nll, [m_true, b_true, np.log(f_true)], args=(x, y, yerr))
m_ml, b_ml, lnf_ml = result["x"]
init = [0.5, m_ml, b_ml, lnf_ml]
ntemps = 10
nwalkers = 100
ndim = 3
pos = np.random.uniform(low=-1, high=1, size=(ntemps, nwalkers, ndim))
for i in range(ntemps):
pos[i:, :] = np.array([result["x"] + 1e-4*np.random.randn(ndim) for i in range(nwalkers)])
sampler = PTSampler(ntemps, nwalkers, ndim, lnlike, lnprior, loglargs=(x, y, yerr), threads=4) # args=(x, y, yerr))
#burn-in
print(pos.shape)
sampler.run_mcmc(pos, 100)
sampler.reset()
sampler.run_mcmc(pos, 5000, thin=10)
samples = sampler.chain.reshape((-1, ndim))
print('Number of posterior samples is {}'.format(samples.shape[0]))
#print best fit value together with errors
p1, p2, p3 = map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]),
zip(*np.percentile(samples, [16, 50, 84],
axis=0)))
print(p1, '\n', p2, '\n', p3)
fig = corner.corner(samples, labels=["$m$", "$b$", "$\ln\,f$"],
truths=[m_true, b_true, np.log(f_true)])
t2 = time.time()
print('It took {:.3f} s'.format(t2 - t1))
plt.show()
The figure I get with corner is:
The important line is
sampler = PTSampler(ntemps, nwalkers, ndim, lnlike, lnprior, loglargs=(x, y, yerr), threads=4)
I have used threads=4 instead of Pool.
Look closely at this line print(p1, '\n', p2, '\n', p3), it prints the values of m_true, b_true and f_true you get:
(-1.277782877669762, 0.5745273177144817, 2.0813620981463297)
(4.800481378230051, 3.1747356851201163, 2.245189235990341)
(-0.9391847529845194, 1.1196053087321716, 3.6017609114364273)
For f, you need np.exp(-0.93918), which is 0.3909, which is close to 0.534. The values you get are close (-1.277 compared to -0.9594 and 4.8 compared to 4.294), although the errors are not bad (except for f). I mean, are you expecting to get the exact numbers? With this method, in my computer, it takes 111 s to complete, is that normal?
Let's try something different. Let's be clear: the problem is not easy when f_true is added. I will use pymc3 (you don't need to know how to use pymc3, I want to check the results found by emcee).
import time
import numpy as np
import corner
import matplotlib.pyplot as plt
import pymc3 as pm
t1 = time.time()
np.random.seed(6)
# Choose the "true" parameters.
m_true = -0.9594
b_true = 4.294
f_true = 0.534
# Generate some synthetic data from the model.
N = 50
x = np.sort(10 * np.random.rand(N))
yerr = 0.1 + 0.5 * np.random.rand(N)
y_1 = m_true * x + b_true
y = np.abs(f_true * y_1) * np.random.randn(N) + y_1
y += yerr * np.random.randn(N)
plt.plot(x, y, 'o')
with pm.Model() as model: # model specifications in PyMC3 are wrapped in a with-statement
# Define priors
f = pm.HalfCauchy('f', beta=5)
m = pm.Normal('m', 0, sd=20)
b = pm.Normal('b', 0, sd=20)
mu2 = b + m * x
sigma2 = yerr**2 + f**2 * (y_1)**2
post = pm.Normal('y', mu=mu2, sd=pm.math.sqrt(sigma2), observed=y)
with model:
trace = pm.sample(2000, tune=2000)
print(pm.summary(trace))
pm.traceplot(trace)
all_values = np.stack([trace.get_values('b'), trace.get_values('m'), trace.get_values('f')], axis=1)
fig2 = corner.corner(all_values, labels=["$b$", "$m$", "$f$"],
truths=[b_true, m_true, f_true])
t2 = time.time()
print('It took {:.3f} s'.format(t2 - t1))
plt.show()
The summary is
mean sd mc_error hpd_2.5 hpd_97.5 n_eff Rhat
m -0.995545 0.067818 0.001174 -1.123187 -0.857653 2685.610018 1.000121
b 4.398158 0.332526 0.005585 3.767336 5.057909 2746.736563 1.000201
f 0.425442 0.063884 0.000904 0.311037 0.554446 4195.591204 1.000309
The important part is the column mean, you see that the values found by pymc3 are close to the true values. The columns hpd_2.5 and hpd_97.5 are the errors for f, b and m. And it took 14 s.
The figure I get with corner is
You will say that the results of emcee are not quite good, but if you really want more accuracy, you have to modify this function:
def lnprior(theta):
m, b, lnf = theta
if -5.0 < m < 0.5 and 0.0 < b < 10.0 and -10.0 < lnf < 1.0:
return 0.0
return -np.inf
The famous prior. In this case, it is flat, and since there are a lot of priors...

How to implement GMM Clustering EM algorighm(Expectation Maximisation algorithm) which work for N Dimension feature vector in python

I am trying to implement GMM Clustering for both 24 Dimension feature vector and 32 dimension feature vector, where assignment of initial parameters are done by Kmeans algorightm (K mean clustering is providing cluster centers - MU - only).
I am following this link, where it's implemented only for 2D feature vector and predefined Mu and sigma.
If anyone have the code for GMM clustering kindly post.
Predefined Lib for GMM is also there in sklearn, but it's not giving me likelyhood for each iteration. sklearn GMM

def kmeans(dataSet, k, c):
# 1. Randomly choose clusters
rng = np.random.RandomState(c)
p = rng.permutation(dataSet.shape[0])[:k]
centers = dataSet[p]
while True:
labels = pairwise_distances_argmin(dataSet, centers)
new_centers = np.array([dataSet[labels == i].mean(0) for i in range(k)]
if np.all(centers == new_centers):
break
centers = new_centers
cluster_data = [dataSet[labels == i] for i in range(k)]
l = []
covs = []
for i in range(k):
l.append(len(cluster_data[i]) * 1.0 / len(dataSet))
covs.append(np.cov(np.array(cluster_data[i]).T))
return centers, l, covs, cluster_data
return new_mu, new_covs, cluster_data
class gaussian_Mix_Model:
def __init__(self, k = 8, eps = 0.0000001):
self.k = k ## number of clusters
self.eps = eps ## threshold to stop `epsilon`
def calculate_Exp_Maxim(self, X, max_iters = 1000):
# n = number of data-points, d = dimension of data points
n, d = X.shape
mu, Cov = [], []
for i in range(1,k):
new_mu, new_covs, cluster_data = kmeans(dataSet, k, c)
# Initialize new
mu[k] = new_mu
Cov[k]= new_cov
# initialize the weights
w = [1./self.k] * self.k
R = np.zeros((n, self.k))
### LLhoods
LLhoods = []
P = lambda mu, s: np.linalg.det(s) ** -.5 ** (2 * np.pi) ** (-X.shape[1]/2.) \
* np.exp(-.5 * np.einsum('ij, ij -> i',\
X - mu, np.dot(np.linalg.inv(s) , (X - mu).T).T ) )
# Iterate till max_iters iterations
while len(LLhoods) < max_iters:
# Expectation Calcultion
## membership for each of K Clusters
for k in range(self.k):
R[:, k] = w[k] * P(mu[k], Cov[k])
# Finding the log likelihood
LLhood = np.sum(np.log(np.sum(R, axis = 1)))
# Now store the log likelihood to the list.
LLhoods.append(LLhood)
# Number of data points to each clusters
R = (R.T / np.sum(R, axis = 1)).T
N_ks = np.sum(R, axis = 0)
# Maximization and calculating the new parameters.
for k in range(self.k):
# Calculate the new means
mu[k] = 1. / N_ks[k] * np.sum(R[:, k] * X.T, axis = 1).T
x_mu = np.matrix(X - mu[k])
# Calculate new cov
Cov[k] = np.array(1 / N_ks[k] * np.dot(np.multiply(x_mu.T, R[:, k]), x_mu))
# Calculate new PiK
w[k] = 1. / n * N_ks[k]
# check for convergence
if (np.abs(LLhood - LLhoods[-2]) < self.eps) and (iteration < max_iters): break
else:
Continue
from collections import namedtuple
self.params = namedtuple('params', ['mu', 'Cov', 'w', 'LLhoods', 'num_iters'])
self.params.mu = mu
self.params.Cov = Cov
self.params.w = w
self.params.LLhoods = LLhoods
self.params.num_iters = len(LLhoods)
return self.params
# Call the GMM to find the model
gmm = gaussian_Mix_Model(3, 0.000001)
params = gmm.fit_EM(X, max_iters= 150)
# Plotting of Log-Likelihood VS Iterations.
plt.plot(LLhoods[0])
plt.savefig('Dataset_2A_GMM_Class_1_K_16.png')
plt.clf()
plt.plot(LLhoods[1])
plt.savefig('Dataset_2A_GMM_Class_2_K_16.png')
plt.clf()
plt.plot(LLhoods[2])
plt.savefig('Dataset_2A_GMM_Class_3_K_16.png')
plt.clf()

radial basis network derivatives are pushing means together

I am trying to make my own implementation of a simple neural network to classify points. I heard about a specific type of activation function that I am interested in testing, the Gaussian. I do not just want to use relus or sigmoids, I am trying to build a network that takes as input about 300 x and y values, then in the first layer computes the Gaussian function on these values with about 50 neurons which each have a separate x and y value as their means (I will keep the sigma constant). Mathematically I anticipate this to look like
exp(- [(x-Mx)^2 + (y-My)^2] / (2 * sigma^2) ) / (sqrt(2*pi*sigma))
then I will perform a weighted sum of these terms over all the neurons in the first layer, add a bias, and pass it through a sigmoid to get my prediction. I will perform this step for each training example and get a list of predictions. I think that I do the forward propagation but I will include the code for that in case someone can spot an obvious error in my implementation. Then I perform the back-propogation. I have tested my updating of the weights and bias, and I believe that they are not the problem. I think that there is something wrong with my implementation of the gradient for the means however because they always cluster to a single point which clearly does not maximize the cost function. I have already tried using a couple of different data sets, and varying some hyper parameters, all to no avail. Can anyone figure out what the problem is?
Here is my code.
# libraries
import matplotlib.patches as patches
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pdb
# functions
def gaussian(sq_error, sigma):
return ((1/np.sqrt(2*np.pi*sigma**2))) * np.exp(-(sq_error)/(2*sigma**2))
def calc_X1(X0, Mx, My, m, sigma):
X1 = [] # shape will be (10, m)
for ex in range(0, m):
sq_error = (X0[0][ex] - Mx) **2 + (X0[1][ex] - My) **2
X1.append(gaussian(sq_error, sigma))
X1 = np.array(X1)
return X1.T
def sigmoid(Z):
return 1 / (1 + np.exp(-Z))
def calc_X2(W2, X1, b2):
return sigmoid(np.dot(W2, X1) + b2)
def cost(X2, Y, m):
return -1/m * ( np.dot(Y, np.log(X2.T)) + np.dot(1-Y, np.log(1-X2.T))) [0]
def calc_dZ2(X2, Y):
return X2 - Y
def calc_dM(dZ2, W2, X1, sigma, M, m, xOrY, X0):
cur_dM = np.zeros(M.shape)
for i in range(0, m):
# pdb.set_trace()
cur_dM += dZ2[0][i] * float(np.dot(W2, X1.T[i])) * 1/sigma**2 * (X0[xOrY][i] - M)
return cur_dM / m
def train_correct(X2, Y, m):
ct = 0
for i in range(0, m):
if np.round(X2[0][i]) == Y[i]:
ct += 1
return ct / m
# graphing functions
def plot_train_data(X, Y, m, ax):
for ex in range(0, m):
xCur = X[0][ex]
yCur = X[1][ex]
if Y[ex] == 1:
color=(1, 0, 0)
else:
color=(0,0,1)
ax.scatter(xCur, yCur, c=color)
def probability_hash(pr):
return (float(pr), float(np.round(pr)), float(1-pr))
def probability_hash_1d(pr):
return float(pr)
def plot_boundary(Mx, My, sigma, W2, b2, ax):
boundsx = [-5, 5]
boundsy = [-5, 5]
samples = [10, 10]
width = (boundsx[1] - boundsx[0]) / samples[0]
height = (boundsy[1] - boundsy[0]) / samples[1]
pt = np.zeros((2,1))
for x in np.linspace(boundsx[0], boundsx[1], samples[0]):
for y in np.linspace(boundsy[0], boundsy[1], samples[1]):
pt[0][0] = x
pt[1][0] = y
X1_cur = calc_X1(pt, Mx, My, 1, sigma)
X2_cur = calc_X2(W2, X1_cur, b2)
# ax.add_patch(patches.Rectangle((x, y), width, height, facecolor=probability_hash(X2_cur)))
ax.scatter(x, y, c=probability_hash(X2_cur))
def cool_plot_boundary(Mx, My, sigma, W2, b2, ax):
boundsx = [-2, 2]
boundsy = [-2, 2]
samples = [50, 50]
width = (boundsx[1] - boundsx[0]) / samples[0]
height = (boundsy[1] - boundsy[0]) / samples[1]
pt = np.zeros((2,1))
heats = []
xs = np.linspace(boundsx[0], boundsx[1], samples[0])
ys = np.linspace(boundsy[0], boundsy[1], samples[1])
for x in xs:
heats.append([])
for y in ys:
pt[0][0] = x
pt[1][0] = y
X1_cur = calc_X1(pt, Mx, My, 1, sigma)
X2_cur = calc_X2(W2, X1_cur, b2)
heats[-1].append(probability_hash_1d(X2_cur))
# xticks = []
# yticks = []
# for i in range(0, len(xs)):
# if i % 3 == 0:
# xticks.append(round(xs[i], 2))
# for i in range(0, len(ys)):
# if i % 3 == 0:
# yticks.append(round(ys[i], 2))
xticks = []
yticks = []
sns.heatmap(heats, ax=ax, cbar=True, xticklabels=xticks, yticklabels=yticks)
def plot_m(Mx, My, n1, ax):
for i in range(0, n1):
ax.scatter(Mx[i], My[i], c="k")
# initialize parameters
file = "data/disk2.csv"
df = pd.read_csv(file)
sigma = 2
itterations = 10000
learning_rate = 0.9
n0 = 2 # DO NOT CHANGE, formality
X0 = np.row_stack((df["0"], df["1"])) # shape is (2, m)
Y = np.array(df["2"])
m = len(Y)
n1 = 50
Mx = np.random.randn(n1)
My = np.random.randn(n1)
X1 = calc_X1(X0, Mx, My, m, sigma)
n2 = 1 # DO NOT CHANGE, formality
small_number = 0.01
W2 = np.random.randn(1, n1) * small_number
b2 = 0
X2 = calc_X2(W2, X1, b2)
J = cost(X2, Y, m)
Js = []
itters = []
fig = plt.figure()
plotGap = 200
for i in range(0, itterations):
# forward propogation
X1 = calc_X1(X0, Mx, My, m, sigma)
X2 = calc_X2(W2, X1, b2)
J = cost(X2, Y, m)
if i % plotGap == 0:
fig.clear()
costAx = fig.add_subplot(311)
plotAx = fig.add_subplot(312)
pointsAx = fig.add_subplot(313)
cool_plot_boundary(Mx, My, sigma, W2, b2, plotAx)
# plot_boundary(Mx, My, sigma, W2, b2, plotAx)
plot_train_data(X0, Y, m, pointsAx)
Js.append(J)
itters.append(i)
costAx.plot(itters, Js, c="k")
print("cost = " + str(J) + "\ttraining correct = " + str(train_correct(X2, Y, m)))
plot_m(Mx, My, n1, pointsAx)
plt.pause(0.1)
# back propogation
dZ2 = calc_dZ2(X2, Y)
dW2 = np.dot(dZ2, X1.T) / m
db2 = np.sum(dZ2) / m
dMx = calc_dM(dZ2, W2, X1, sigma, Mx, m, 0, X0)
dMy = calc_dM(dZ2, W2, X1, sigma, My, m, 1, X0)
b2 -= learning_rate * db2
W2 -= learning_rate * dW2
Mx -= learning_rate * dMx
My -= learning_rate * dMy
For data I have a csv with a bunch of point locations and labels. You can use this code to generate a similar csv. (Make sure you have a folder called data in the folder you run this from).
# makes data in R2 to learn
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
n = 2
# number of exaples
m = 300
X = []
Y = []
# hyperparamers for data
rApprox = 1
error = 0.4
noise = 0.1
name = "data/disk2"
plt.cla()
for ex in range(0, m):
xCur = np.random.randn(2)
X.append(xCur)
if abs(np.linalg.norm(xCur) + np.random.randn()*noise - rApprox) < error:
Y.append(1)
color="r"
else:
Y.append(0)
color="b"
plt.scatter(xCur[0], xCur[1], c=color)
if abs(np.random.randn()) < 0.01:
plt.pause(0.1)
plt.pause(1)
plt.savefig(name + ".png")
X = np.array(X)
Y = np.array(Y)
df = pd.DataFrame(X)
df[2] = Y
df.to_csv(name + ".csv", index=False)
Thanks for your help.

Substitute this function for the calculate dm function. You must be careful when multiplying, it is not just enough that the dimensions work out.
def calculuate_dMs(X0, X1, X2, Mx, My, W2, dZ2, sigma, m, n1):
# pdb.set_trace()
X0x_big = np.dot(np.ones((n1, 1)), X0[0].reshape(1, m))
X0y_big = np.dot(np.ones((n1, 1)), X0[1].reshape(1, m))
Mx_big = np.dot(Mx.reshape(n1, 1), np.ones((1, m)))
My_big = np.dot(My.reshape(n1, 1), np.ones((1, m)))
W2_big = np.dot(W2.reshape(n1, 1), np.ones((1, m)))
dZ2_big = np.dot(np.ones((n1, 1)), dZ2.reshape(1, m))
dxTemp = np.multiply(np.multiply(np.multiply((X0x_big - Mx_big), X1), W2_big), dZ2_big)
dyTemp = np.multiply(np.multiply(np.multiply((X0y_big - My_big), X1), W2_big), dZ2_big)
return (np.sum(dxTemp, axis=1)/m, np.sum(dyTemp, axis=1)/m)

Lotka-Volterra equations(predator prey) using Runge-Kutta in Python

I am trying to write a program using the Lotka-Volterra equations for predator-prey interactions. Solve Using ODE's:
dx/dt = a*x - B*x*y
dy/dt = g*x*y - s*y
Using 4th order Runge-Kutta method
I need to plot a graph showing both x and y as a function of time from t = 0 to t=30.
a = alpha = 1
b = beta = 0.5
g = gamma = 0.5
s = sigma = 2
initial conditions x = y = 2
Here is my code so far but not display anything on the graph. Some help would be nice.
#!/usr/bin/env python
from __future__ import division, print_function
import matplotlib.pyplot as plt
import numpy as np
def rk4(f, r, t, h):
""" Runge-Kutta 4 method """
k1 = h*f(r, t)
k2 = h*f(r+0.5*k1, t+0.5*h)
k3 = h*f(r+0.5*k2, t+0.5*h)
k4 = h*f(r+k3, t+h)
return (k1 + 2*k2 + 2*k3 + k4)/6
def f(r, t):
alpha = 1.0
beta = 0.5
gamma = 0.5
sigma = 2.0
x, y = r[2], r[2]
fxd = x*(alpha - beta*y)
fyd = -y*(gamma - sigma*x)
return np.array([fxd, fyd], float)
tpoints = np.linspace(0, 30, 0.1)
xpoints = []
ypoints = []
r = np.array([2, 2], float)
for t in tpoints:
xpoints += [r[2]]
ypoints += [r[2]]
r += rk4(f, r, t, h)
plt.plot(tpoints, xpoints)
plt.plot(tpoints, ypoints)
plt.xlabel("Time")
plt.ylabel("Population")
plt.title("Lotka-Volterra Model")
plt.savefig("Lotka_Volterra.png")
plt.show()

A simple check of your variable tpoints after running your script shows it's empty:
In [7]: run test.py
In [8]: tpoints
Out[8]: array([], dtype=float64)
This is because you're using np.linspace incorrectly. The third argument is the number of elements desired in the output. You've requested an array of length 0.1.
Take a look at np.linspace's docstring. You won't have a problem figuring out how to adjust your code.

1) define 'h' variable.
2) use
tpoints = np.arange(30) #array([0, 1, 2, ..., 30])
not
np.linspace()
and don't forget to set time step size equal to h:
h=0.1
tpoints = np.arange(0, 30, h)
3) be careful with indexes:
def f(r,t):
...
x, y=r[0], r[1]
...
for t in tpoints:
xpoints += [r[0]]
ypoints += [r[1]]
...
and better use .append(x):
for t in tpoints:
xpoints.append(r[0])
ypoints.append(r[1])
...
Here's tested code for python 3.7 (I've set h=0.001 for more presize)
import matplotlib.pyplot as plt
import numpy as np
def rk4(r, t, h): #edited; no need for input f
""" Runge-Kutta 4 method """
k1 = h*f(r, t)
k2 = h*f(r+0.5*k1, t+0.5*h)
k3 = h*f(r+0.5*k2, t+0.5*h)
k4 = h*f(r+k3, t+h)
return (k1 + 2*k2 + 2*k3 + k4)/6
def f(r, t):
alpha = 1.0
beta = 0.5
gamma = 0.5
sigma = 2.0
x, y = r[0], r[1]
fxd = x*(alpha - beta*y)
fyd = -y*(gamma - sigma*x)
return np.array([fxd, fyd], float)
h=0.001 #edited
tpoints = np.arange(0, 30, h) #edited
xpoints, ypoints = [], []
r = np.array([2, 2], float)
for t in tpoints:
xpoints.append(r[0]) #edited
ypoints.append(r[1]) #edited
r += rk4(r, t, h) #edited; no need for input f
plt.plot(tpoints, xpoints)
plt.plot(tpoints, ypoints)
plt.xlabel("Time")
plt.ylabel("Population")
plt.title("Lotka-Volterra Model")
plt.savefig("Lotka_Volterra.png")
plt.show()
You can also try to plot "cycles":
plt.xlabel("Prey")
plt.ylabel("Predator")
plt.plot(xpoints, ypoints)
plt.show()
https://i.stack.imgur.com/NB9lc.png

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Numerical optimization with Gradient Descent in Python - python

Related

How to carry over variables from past defined functions?

Why is tempered mcmc fit not convering well?

How to implement GMM Clustering EM algorighm(Expectation Maximisation algorithm) which work for N Dimension feature vector in python

radial basis network derivatives are pushing means together

Lotka-Volterra equations(predator prey) using Runge-Kutta in Python

Categories

Resources