I'm trying to implement a Gaussian fitting using TensorFlow-probability's Nelder-Mead optimizer: tfp.optimizer.nelder_mead_minimize(). It does not converge, while scipy.optimizer.minimize() provide good result in less than 1 second of computation time. I am probably doing something wrong but i can't figure what ? Can someone help me on this ?
I am using :
python 3.7.3
tensorflow-probability 0.8
tensorflow 2.0
Here's my code :
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import matplotlib as plt
# Define the gaussian model : y = 1/(sigma * sqrt(2 pi)) * exp(- (x-mu)²/(2*sigma²))
pi = np.pi
def model(x, theta):
y = 1/(theta[1]*tf.sqrt(2*pi)) * tf.exp(-(x-theta[0])**2 /(2*theta[1]**2))
return y
# Define the loss (least mean square)
def loss_function(theta, y, x, callback=False, n_iterations=1):
global n_epochs_cb
loss = tf.losses.mean_squared_error(y, model(x, theta))
if callback:
if n_epochs_cb % (n_iterations/10.) == 0:
print('{0:4d} {1:} {2: 3.6f}'.format(n_epochs_cb, theta, loss))
n_epochs_cb = n_epochs_cb + 1
return loss
# Generate some data
theta_true = [3, 2]
X = np.arange(-10, 10, 0.5)
Y = model(X, theta_true)
# fig, ax = plt.subplots(1, 1, figsize = [20, 10])
# ax.scatter(X, Y, label='data', alpha=0.5)
# initialize parameters
theta = tf.constant(tf.random.uniform([2], 0, 10), dtype=tf.float32, name='theta')
n_iterations = 100
n_epochs_cb = 1
# minimization
print('{0:4s} {1:9s} {2:9s}'.format('Iter', 'theta', 'loss'))
optim_results = tfp.optimizer.nelder_mead_minimize(lambda theta: loss_function(theta, X, Y, True, n_iterations),
initial_vertex=theta,
func_tolerance=1e-8,
position_tolerance=1e-8,
max_iterations=n_iterations)
print("theta_true", theta_true)
print("theta_est", optim_results.position.numpy())
print("convergenced:", optim_results.converged.numpy())
print("number of function evaluation", optim_results.num_objective_evaluations.numpy())
print("number of iterations", optim_results.num_iterations.numpy())
print("objective value", optim_results.objective_value.numpy())
Optimization stop around 50 iterations and return :
Iter theta loss
10 [0.1448533 6.7525005] 33.408031
20 [-0.2385819 28.76061 ] 33.382130
30 [ -4.1879644 260.84622 ] 33.375771
40 [ -34.722183 2053.5083 ] 33.375099
50 [ -418.6432 24589.836 ] 33.375008
theta_true [3, 2]
theta_est [ -488.44122 28687.352 ]
convergenced: True
number of function evaluation 55
number of iterations 35
objective value 33.375008
I run the same problem using minimize() from scipy.optimize with 'Nelder-Mead' method and it gives :
Iter theta loss
10 [4.61612335 4.40795762] 0.007583
20 [3.19502416 2.09290338] 0.001023
30 [3.01845636 1.99504269] 0.000091
40 [2.99843397 2.00065615] 0.000010
Optimization terminated successfully.
Current function value: 0.000010
Iterations: 44
Function evaluations: 96
computation time 0.046 seconds
I would expect to have the same performance using tensorflow-probability's Nelder-Mead optimizer and Scipy.optimize's Nelder-mead optimizer.
What am I doing wrong ?
Edit :
Found the mistake in the definition of the loss function. the following code is now converging :
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import matplotlib as plt
# Define the gaussian model : y = 1/(sigma * sqrt(2 pi)) * exp(- (x-mu)²/(2*sigma²))
pi = np.pi
def model(x, theta):
y = 1/(theta[1]*tf.sqrt(2*pi)) * tf.exp(-(x-theta[0])**2 /(2*theta[1]**2))
return y
# Define the loss (least mean square)
def loss_function(theta, y, x, callback=False, n_iterations=1):
global n_epochs_cb
loss = tf.losses.mean_squared_error(y, model(x, theta))
if callback:
if n_epochs_cb % (n_iterations/10.) == 0:
print('{0:4d} {1:} {2: 3.6f}'.format(n_epochs_cb, theta, loss))
n_epochs_cb = n_epochs_cb + 1
return loss
# Generate some data
theta_true = [3, 2]
X = np.arange(-10, 10, 0.5, dtype=np.float32)
Y = model(X, theta_true)
# fig, ax = plt.subplots(1, 1, figsize = [20, 10])
# ax.scatter(X, Y, label='data', alpha=0.5)
# initialize parameters
theta = tf.constant(tf.random.uniform([2], 0, 10), dtype=tf.float32, name='theta')
print("theta_true", theta_true)
print("theta_init", theta.numpy())
n_iterations = 100
n_epochs_cb = 1
# minimization
print('{0:4s} {1:9s} {2:9s}'.format('Iter', 'theta', 'loss'))
optim_results = tfp.optimizer.nelder_mead_minimize(lambda theta: loss_function(theta, Y, X, True, n_iterations),
initial_vertex=theta,
func_tolerance=1e-8,
position_tolerance=1e-8,
max_iterations=n_iterations)
print("theta_est", optim_results.position.numpy())
print("convergenced:", optim_results.converged.numpy())
print("number of function evaluation", optim_results.num_objective_evaluations.numpy())
print("number of iterations", optim_results.num_iterations.numpy())
print("objective value", optim_results.objective_value.numpy())
Related
I want to develop a Physics Informed Neural Network model in Pytorch. My network should be trained based on two losses: boundary condition (BC) and partial derivative equation (PDE). I am adding these two losses but the problem is that the BC is controlling the main loss, like the following figure:
This way I make asimple finite difference calculation for my 1D heat conduction:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np
from pyDOE import lhs
######### Finite difference solution
# geometry:
L = 1 # length of the rod
# mesh:
dx = 0.01
nx = int(L/dx) + 1
x = np.linspace(0, L, nx)
# temporal grid:
t_sim = 1
dt = 0.01
nt = int (t_sim/dt)
# parametrization
alpha = 0.14340344168260039
# IC
t_ic = 4
# BC
t_left = 5 # left side with 6 °C temperature
t_right = 3 # right side with 4 °C temperature
# Results
T = np.ones(nx) * t_ic
all_T = []
for i in range (0, nt):
Tn = T.copy()
T[1:-1] = Tn[1:-1] + dt/(dx++2) * alpha * (Tn[2:] - 2*Tn[1:-1] + Tn[0:-2])
T[0] = t_left
T[-1] = t_right
all_T.append(Tn)
Then,data is prepared for the PINN model through the next block of code:
x = torch.linspace(0, L, nx, dtype=torch.float32)
t = torch.linspace(0, t_sim, nt, dtype=torch.float32)
T, X = torch.meshgrid(t,x)
Temps = np.concatenate (all_T).reshape(nt,nx)
x_test = torch.hstack((X.transpose(1,0).flatten()[:,None], T.transpose(1,0).flatten()[:,None]))
y_test = torch.from_numpy(Temps) # I suppose it is the ground truth
lb = x_test[0] # lower boundary
ub = x_test[-1] # upper boundary
left_x = torch.hstack((X[:,0][:,None], T[:,0][:,None])) # x and t of left boundary
left_y = torch.ones(left_x.shape[0], 1) * t_left # Temperature of left boundary
left_y[0,0] = t_ic
right_x = torch.hstack((X[:,-1][:,None], T[:,0][:,None])) # x and t of right boundary
right_y = torch.ones(right_x.shape[0], 1) * t_right # Temperature of right boundary
right_y[0,0] = t_ic
bottom_x = torch.hstack((X[0,1:-1][:,None], T[0,1:-1][:,None])) # x and t of IC
bottom_y = torch.ones(bottom_x.shape[0], 1) * t_ic # Temperature of IC
No_BC = 1 # 50 percent of the BC data are used from training
No_IC = 1 # 75 percent of the IC data are used from training
idx_l = np.random.choice(left_x.shape[0], int (left_x.shape[0]*No_BC), replace=False)
idx_r = np.random.choice(right_x.shape[0], int (right_x.shape[0]*No_BC), replace=False)
idx_b = np.random.choice(bottom_x.shape[0], int (bottom_x.shape[0]*No_IC), replace=False)
X_train_No = torch.vstack([left_x[idx_l,:], right_x[idx_r,:], bottom_x[idx_b,:]])
Y_train_No = torch.vstack([left_y[idx_l,:], right_y[idx_r,:], bottom_y[idx_b,:]])
N_f = 5000
X_train_Nf = lb + (ub-lb)*lhs(2,N_f)
f_hat = torch.zeros(X_train_Nf.shape[0], 1, dtype=torch.float32) # zero array for loss of PDE
This is my script for PINN and I very much appreciate your help:
class FCN(nn.Module):
##Neural Network
def __init__(self,layers):
super().__init__() #call __init__ from parent class
self.activation = nn.Tanh()
self.loss_function = nn.MSELoss(reduction ='mean')
'Initialise neural network as a list using nn.Modulelist'
self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)])
self.iter = 0
'Xavier Normal Initialization'
for i in range(len(layers)-1):
nn.init.xavier_normal_(self.linears[i].weight.data, gain=1.0)
nn.init.zeros_(self.linears[i].bias.data)
'foward pass'
def forward(self,x):
if torch.is_tensor(x) != True:
x = torch.from_numpy(x)
a = x.float()
for i in range(len(layers)-2):
z = self.linears[i](a)
a = self.activation(z)
a = self.linears[-1](a)
return a
'Loss Functions'
#Loss BC
def lossBC(self, x_BC, y_BC):
loss_BC = self.loss_function(self.forward(x_BC),y_BC)
return loss_BC.float()
#Loss PDE
def lossPDE(self,x_PDE):
g = x_PDE.clone()
g.requires_grad = True # Enable differentiation
f = self.forward(g)
f_x_t = torch.autograd.grad(f,g,torch.ones([g.shape[0],1]).to(device),retain_graph=True, create_graph=True)[0] #first derivative
f_xx_tt = torch.autograd.grad(f_x_t,g,torch.ones(g.shape).to(device), create_graph=True)[0]#second derivative
f_t = f_x_t[:,[1]]
f_xx = f_xx_tt[:,[0]]
f = f_t - alpha * f_xx
return self.loss_function(f,f_hat).float()
def loss(self,x_BC,y_BC,x_PDE):
loss_bc = self.lossBC(x_BC.float(),y_BC.float())
loss_pde = self.lossPDE(x_PDE.float())
return loss_bc.float() + loss_pde.float()
And this is how I make the model, arrays representing losses and finally the plot:
layers = np.array([2, 50, 50, 50, 50, 50, 1])
PINN = FCN(layers)
optimizer = torch.optim.Adam(PINN.parameters(), lr=0.001)
def closure():
optimizer.zero_grad()
loss_p = PINN.lossPDE(X_train_Nf)
loss_p.backward()
loss_b = PINN.lossBC(X_train_No, Y_train_No)
loss_b.backward()
return loss_b + loss_p
total_l = np.array([])
BC_l = np.array([])
PDE_l = np.array([])
test_BC_l = np.array([])
for i in range(10000):
loss = optimizer.step(closure)
total_l = np.append(total_l, loss.cpu().detach().numpy())
PDE_l = np.append (PDE_l, PINN.lossPDE(X_train_Nf).cpu().detach().numpy())
BC_l = np.append(BC_l, PINN.lossBC(X_train_No, Y_train_No).cpu().detach().numpy())
with torch.no_grad():
test_loss = PINN.lossBC(X_test, Y_test.flatten().view(-1,1))
test_BC_l = np.append(test_BC_l, test_loss.cpu().detach().numpy())
import matplotlib.pyplot as plt
fig,ax=plt.subplots(1,1, figsize=(9,9))
ax.plot(PDE_l, c = 'g', lw=2, label='PDE loss in train')
ax.plot(BC_l, c = 'k', lw=2, label='BC loss in train')
ax.plot(test_BC_l, c = 'r', lw=2, label='BC loss in test')
ax.plot(total_l, c = 'b', lw=2, label='total loss in train')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
plt.legend()
plt.show()
You should not add the boundary and PDE based loss while performing the backpropagation. Backpropagate iteratively on the PDE and the number of different boundary conditions used (Dirichlet or Neumann). When you add both of them, the network is not learning any thing about the BC, as the majority of the loss is being generated from the PDE. So, the network learns more about the PDE based loss and none about the BC, as it is clearly evident from your graph.
The loss function should be something like this :
for _ in different_loss_types: 1) PDE loss (backprop) on PDE 2) BC loss (backprop on BC)
This is the code that create problem.
def cost_func(x=None, y=None):
if not x:
tf.compat.v1.disable_eager_execution()
x = tf.compat.v1.placeholder(tf.float32, shape=[None, 1])
if not y:
tf.compat.v1.disable_eager_execution()
y = tf.compat.v1.placeholder(tf.float32, shape=[None, 1])
# two local minima near (0, 0)
# z = __f1(x, y)
# 3rd local minimum at (-0.5, -0.8)
z = -1 * __f2(x, y, x_mean=-0.5, y_mean=-0.8, x_sig=0.35, y_sig=0.35)
# one steep gaussian trench at (0, 0)
# z -= __f2(x, y, x_mean=0, y_mean=0, x_sig=0.2, y_sig=0.2)
# three steep gaussian trenches
z -= __f2(x, y, x_mean=1.0, y_mean=-0.5, x_sig=0.2, y_sig=0.2)
z -= __f2(x, y, x_mean=-1.0, y_mean=0.5, x_sig=0.2, y_sig=0.2)
z -= __f2(x, y, x_mean=-0.5, y_mean=-0.8, x_sig=0.2, y_sig=0.2)
return x, y, z
My goal is:
For visualizing contour plot, call f() and collect placeholder nodes for fast GPU calc.
To incorporate variables to optimize, pass them in as argument to attach as x and y.
Args:
x: None if placeholder tensor is used as input. Specify x to use x as input tensor.
y: None if placeholder tensor is used as input. Specify y to use y as input tensor.
Returns:
Tuple (x, y, z) where x and y are input tensors and z is output tensor.
The error is:
using a tf.Tensor as a Python bool is not allowed in Graph execution. Use Eager execution or decorate this function with #tf.function.
The fully code is:
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
# cost function
def cost_func(x=None, y=None):
'''Cost function.
For visualizing contour plot, call f() and collect placeholder nodes for fast GPU calc.
To incorporate variables to optimize, pass them in as argument to attach as x and y.
Args:
x: None if placeholder tensor is used as input. Specify x to use x as input tensor.
y: None if placeholder tensor is used as input. Specify y to use y as input tensor.
Returns:
Tuple (x, y, z) where x and y are input tensors and z is output tensor.
'''
if not x:
tf.compat.v1.disable_eager_execution()
x = tf.compat.v1.placeholder(tf.float32, shape=[None, 1])
if not y:
tf.compat.v1.disable_eager_execution()
y = tf.compat.v1.placeholder(tf.float32, shape=[None, 1])
# two local minima near (0, 0)
# z = __f1(x, y)
# 3rd local minimum at (-0.5, -0.8)
z = -1 * __f2(x, y, x_mean=-0.5, y_mean=-0.8, x_sig=0.35, y_sig=0.35)
# one steep gaussian trench at (0, 0)
# z -= __f2(x, y, x_mean=0, y_mean=0, x_sig=0.2, y_sig=0.2)
# three steep gaussian trenches
z -= __f2(x, y, x_mean=1.0, y_mean=-0.5, x_sig=0.2, y_sig=0.2)
z -= __f2(x, y, x_mean=-1.0, y_mean=0.5, x_sig=0.2, y_sig=0.2)
z -= __f2(x, y, x_mean=-0.5, y_mean=-0.8, x_sig=0.2, y_sig=0.2)
return x, y, z
# noisy hills of the cost function
def __f1(x, y):
return -1 * tf.sin(x * x) * tf.cos(3 * y * y) * tf.exp(-(x * y) * (x * y)) - tf.exp(-(x + y) * (x + y))
# bivar gaussian hills of the cost function
def __f2(x, y, x_mean, y_mean, x_sig, y_sig):
normalizing = 1 / (2 * np.pi * x_sig * y_sig)
x_exp = (-1 * tf.square(x - x_mean)) / (2 * tf.square(x_sig))
y_exp = (-1 * tf.square(y - y_mean)) / (2 * tf.square(y_sig))
return normalizing * tf.exp(x_exp + y_exp)
# pyplot settings
plt.ion()
fig = plt.figure(figsize=(3, 2), dpi=300)
ax = fig.add_subplot(111, projection='3d')
plt.subplots_adjust(left=0, bottom=0, right=1, top=1, wspace=0, hspace=0)
params = {'legend.fontsize': 3,
'legend.handlelength': 3}
plt.rcParams.update(params)
plt.axis('off')
# input (x, y) and output (z) nodes of cost-function graph
x, y, z = cost_func()
# visualize cost function as a contour plot
x_val = y_val = np.arange(-1.5, 1.5, 0.005, dtype=np.float32)
x_val_mesh, y_val_mesh = np.meshgrid(x_val, y_val)
x_val_mesh_flat = x_val_mesh.reshape([-1, 1])
y_val_mesh_flat = y_val_mesh.reshape([-1, 1])
with tf.compat.v1.Session() as sess:
z_val_mesh_flat = sess.run(z, feed_dict={x: x_val_mesh_flat, y: y_val_mesh_flat})
z_val_mesh = z_val_mesh_flat.reshape(x_val_mesh.shape)
levels = np.arange(-10, 1, 0.05)
# ax.contour(x_val_mesh, y_val_mesh, z_val_mesh, levels, alpha=.7, linewidths=0.4)
# ax.plot_wireframe(x_val_mesh, y_val_mesh, z_val_mesh, alpha=.5, linewidths=0.4, antialiased=True)
ax.plot_surface(x_val_mesh, y_val_mesh, z_val_mesh, alpha=.4, cmap=cm.coolwarm)
plt.draw()
# starting location for variables
x_i = 0.75
y_i = 1.0
# create variable pair (x, y) for each optimizer
x_var, y_var = [], []
for i in range(7):
x_var.append(tf.Variable(x_i, [1], dtype=tf.float32))
y_var.append(tf.Variable(y_i, [1], dtype=tf.float32))
# create separate graph for each variable pairs
cost = []
for i in range(7):
cost.append(cost_func(x_var[i], y_var[i])[2])
# define method of gradient descent for each graph
# optimizer label name, learning rate, color
ops_param = np.array([['Adadelta', 50.0, 'b'],
['Adagrad', 0.10, 'g'],
['Adam', 0.05, 'r'],
['Ftrl', 0.5, 'c'],
['GD', 0.05, 'm'],
['Momentum', 0.01, 'y'],
['RMSProp', 0.02, 'k']])
ops = []
ops.append(tf.compat.v1.train.AdadeltaOptimizer(float(ops_param[0, 1])).minimize(cost[0]))
ops.append(tf.compat.v1.train.AdagradOptimizer(float(ops_param[1, 1])).minimize(cost[1]))
ops.append(tf.compat.v1.train.AdamOptimizer(float(ops_param[2, 1])).minimize(cost[2]))
ops.append(tf.compat.v1.train.FtrlOptimizer(float(ops_param[3, 1])).minimize(cost[3]))
ops.append(tf.compat.v1.train.GradientDescentOptimizer(float(ops_param[4, 1])).minimize(cost[4]))
ops.append(tf.compat.v1.train.MomentumOptimizer(float(ops_param[5, 1]), momentum=0.95).minimize(cost[5]))
ops.append(tf.compat.v1.train.RMSPropOptimizer(float(ops_param[6, 1])).minimize(cost[6]))
# 3d plot camera zoom, angle
xlm = ax.get_xlim3d()
ylm = ax.get_ylim3d()
zlm = ax.get_zlim3d()
ax.set_xlim3d(xlm[0] * 0.5, xlm[1] * 0.5)
ax.set_ylim3d(ylm[0] * 0.5, ylm[1] * 0.5)
ax.set_zlim3d(zlm[0] * 0.5, zlm[1] * 0.5)
azm = ax.azim
ele = ax.elev + 40
ax.view_init(elev=ele, azim=azm)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# use last location to draw a line to the current location
last_x, last_y, last_z = [], [], []
plot_cache = [None for _ in range(len(ops))]
# loop each step of the optimization algorithm
steps = 1000
for iter in range(steps):
for i, op in enumerate(ops):
# run a step of optimization and collect new x and y variable values
_, x_val, y_val, z_val = sess.run([op, x_var[i], y_var[i], cost[i]])
# move dot to the current value
if plot_cache[i]:
plot_cache[i].remove()
plot_cache[i] = ax.scatter(x_val, y_val, z_val, s=3, depthshade=True, label=ops_param[i, 0],
color=ops_param[i, 2])
# draw a line from the previous value
if iter == 0:
last_z.append(z_val)
last_x.append(x_i)
last_y.append(y_i)
ax.plot([last_x[i], x_val], [last_y[i], y_val], [last_z[i], z_val], linewidth=0.5, color=ops_param[i, 2])
last_x[i] = x_val
last_y[i] = y_val
last_z[i] = z_val
if iter == 0:
legend = np.vstack((ops_param[:, 0], ops_param[:, 1])).transpose()
plt.legend(plot_cache, legend)
plt.savefig('figures/' + str(iter) + '.png')
print('iteration: {}'.format(iter))
plt.pause(0.0001)
print("done")
If x and y are placeholder tensor and they are used as input, output None. I have tried converting x and y to other types (such as string) if they are placeholder tensors, and then outputting them directly if they are not placeholder tensors. But I found that this didn't work. Then I tried not using keyword arguments, but making the determination within the function, and I found that that didn't work either.
As I understand, I will solve this problem if I use tensorflow v1, but I want to use tensorflow v2. Please help me!!!!
To resolve this error, you can either use eager execution or decorate the function using #tf.function. Eager execution is enabled by default, so if you're using versions of TensorFlow older than 1.10.0, you may need to explicitly enable it in your code. To enable it, you can add the following line of code:
tf.enable_eager_execution()
The #tf.function decorator allows for the conversion of a Python function into a TensorFlow graph. It accepts parameters, such as input signature and autograph, that can be used to control how the graph is generated. For more information, you can refer to the official TensorFlow documentation
I want to find the parameters of a Weibull distribution by minimizing the parameters using Kullbak-Leibler method. I found a code here which did the same thing. I replaced the Normal distributions in the original code by the Weibull distributions. I do not know why I get “Nan” parameters and “Nan” Kullback-Leibler divergence value. Can anyone please help?
import numpy as np
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import seaborn as sns
sns.set()
from scipy.stats import weibull_min
learning_rate = 0.001
epochs = 100
x = np.arange(0, 2000,0.001)
p_pdf=weibull_min.pdf(x, 1.055,0, 468).reshape(1, -1)
p = tf.placeholder(tf.float64, shape=p_pdf.shape)
alpha = tf.Variable(np.zeros(1))
beta = tf.Variable(np.eye(1))
weibull=(beta / alpha) * ((x / alpha)**(beta - 1)) * tf.exp(-((x / alpha)**beta))
q = weibull
kl_divergence = tf.reduce_sum(tf.where(p == 0, tf.zeros(p_pdf.shape, tf.float64), p * tf.log(p / q)))
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(kl_divergence)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
history = []
alphas = []
betas = []
for i in range(epochs):
sess.run(optimizer, { p: p_pdf })
if i % 10 == 0:
history.append(sess.run(kl_divergence, { p: p_pdf }))
alphas.append(sess.run(alpha)[0])
betas.append(sess.run(beta)[0][0])
for a, b in zip(alphas, betas):
q_pdf =weibull_min.pdf(x, b,0,a)
plt.plot(x, q_pdf.reshape(-1, 1), c='red')
plt.title('KL(P||Q) = %1.3f' % history[-1])
plt.plot(x, p_pdf.reshape(-1, 1), linewidth=3)
plt.show()
plt.plot(history)
plt.show()
sess.close()
Try initialising your alphas to not be 0. Perhaps initialise to np.ones(1) instead.
If you use an alpha of zero you will get a nan with scipy.
from scipy.stats import weibull_min
weibull_min.pdf(100, 0, 0, 2.), weibull_min.pdf(100, 1, 0, 2.)
(nan, 9.643749239819589e-23)
I am very new to Gaussian processes and python as well.
I am trying to produce a very simple Gaussian regression for a 3d model.
I have a very simple Python code for a function:
import numpy as np
def exponential_cov(x, y, params):
return params[0] * np.exp( -0.5 * params[1] * np.subtract.outer(x, y)**2)
def conditional(x_new, x, y, params):
B = exponential_cov(x_new, x, params)
C = exponential_cov(x, x, params)
A = exponential_cov(x_new, x_new, params)
mu = np.linalg.inv(C).dot(B.T).T.dot(y)
sigma = A - B.dot(np.linalg.inv(C).dot(B.T))
return(mu.squeeze(), sigma.squeeze())
import matplotlib.pylab as plt
# GP PRIOR
tu = [1, 10]
Si_tu = exponential_cov(0, 0, tu)
xpts = np.arange(-5, 5, step=0.01)
plt.errorbar(xpts, np.zeros(len(xpts)), yerr=Si_tu, capsize=0, color='#95daed', alpha=0.5, label='error') #error
plt.plot(xpts, np.zeros(len(xpts)), linestyle='dashed', color='#3105b2', linewidth=2.5, label='mu'); #mu
# GP FOR 1ST POINT
x = [1.]
y = np.sin(x)+np.cos(np.sqrt(15)*x)
Si_1 = exponential_cov(x, x, tu)
def predict(x, data, kernel, params, sigma, t):
k = [kernel(x, y, params) for y in data]
Sinv = np.linalg.inv(sigma)
y_pred = np.dot(k, Sinv).dot(t)
sigma_new = kernel(x, x, params) - np.dot(k, Sinv).dot(k)
return y_pred, sigma_new
x_pred = np.linspace(-5, 5, 1000) #change step here!!
print "x_pred="
print(x_pred)
predictions = [predict(i, x, exponential_cov, tu, Si_1, y) for i in x_pred]
y_pred, sigmas = np.transpose(predictions)
print "y_pred ="
print(y_pred )
print "sigmas ="
print(sigmas )
# GP FOR 2ND POINT
m, s = conditional([-1], x, y, tu)
y2 = np.sin(-1)+np.cos(np.sqrt(15)*(-1))
x.append(-1)
y=np.append(y,y2)
Si_2 = exponential_cov(x, x, tu)
predictions = [predict(i, x, exponential_cov, tu, Si_2, y) for i in x_pred]
y_pred, sigmas = np.transpose(predictions)
print "y_pred ="
print(y_pred )
print "sigmas ="
print(sigmas )
By using this code I get very nice fitting results for the function np.sin(x) + np.cos(np.sqrt(15) * x), but what I really want to do is to try the same Gaussian process for the function Z = np.sin(2*X) * np.cos(2*Y) / 2.
I know that the idea is basically the same, but I cannot adapt my python code to the [x,y] input to obtain z.
I will really appreciate your help, hints or links!
In the previous, the input of your function is 1-D, and then the new function is 2-D. So you have to change the covariance function, for example, use ard-based kernel, please refer to cook book for kernel. Also, you can do the isotropic kernel for 2-D, just make sure the suitable distance function (e.g. L2-norm) and the single lengthscale you choose.
I am using the below code for logistic regression with regularization in python. Its giving me 80% accuracy on the training set itself.
I am using minimize method 'TNC'. With BFG the results are of 50%.
What is the ideal method(equivalent to fminunc in Octave) to use for gradient descent?
How can I increase or decrease iteration?
What is the default iteration?
Any other suggestion/approach to improve performance?
The same algo in Octave with fminunc gives 83% accuracy on the training set.
import numpy as np
import scipy.optimize as op
from sklearn import preprocessing
import matplotlib.pyplot as plt
from matplotlib import style
from pylab import scatter, show, legend, xlabel, ylabel
from numpy import loadtxt, where
from sklearn.preprocessing import PolynomialFeatures
def sigmoid(z):
return 1/(1 + np.exp(-z));
def Gradient(theta,X,y,l):
m,n = X.shape
#print("theta shape")
#print(theta.shape)
theta = theta.reshape((n,1))
thetaR = theta[1:n,:]
y = y.reshape((m,1))
h = sigmoid(X.dot(theta))
nonRegGrad = ((np.sum(((h-y)*X),axis=0))/m).reshape(n,1)
reg = np.insert((l/m)*thetaR,0,0,axis=0)
grad = nonRegGrad + reg
return grad.flatten();
def CostFunc(theta,X,y,l):
h = sigmoid(X.dot(theta))
m,n=X.shape;
#print("theta shape")
#print(theta.shape)
theta = theta.reshape((n,1))
thetaR = theta[1:n,:]
cost=np.sum((np.multiply(-y,np.log(h))-np.multiply((1-y),np.log(1-h))))/m
reg=(l/(2*m))* np.sum(np.square(thetaR))
J=cost+reg
return J;
def predict(theta,X):
m,n=X.shape;
return np.round(sigmoid(X.dot(theta.reshape(n,1))));
data = np.loadtxt(open("ex2data2.txt","rb"),delimiter=",",skiprows=1)
nr,nc = data.shape
X=data[:,0:nc - 1]
#X=preprocessing.scale(X)
#X=np.insert(X,0,1,axis=1)
y= data[:,[nc - 1]]
pos = where(y == 1)
neg = where(y == 0)
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
xlabel('Microchip Test 1')
ylabel('Microchip Test 2')
legend(['Passed', 'Failed'])
show()
storeX=X
poly = PolynomialFeatures(6)
X=poly.fit_transform(X)
#print(X.shape)
m , n = X.shape;
initial_theta = np.zeros((n,1));
#initial_theta = zeros(shape=(it.shape[1], 1))
l = 1
# Compute and display initial cost and gradient for regularized logistic
# regression
#cost, grad = cost_function_reg(initial_theta, X, y, l)
#def decorated_cost(theta):
# return cost_function_reg(theta, X, y, l)
#print fmin_bfgs(decorated_cost, initial_theta, maxfun=400)
print("Calling optimization")
Result = op.minimize(fun = CostFunc,
x0 = initial_theta,
args = (X, y,l),
method = 'TNC',
jac = Gradient);
optimal_theta = Result.x;
print(Result.x.shape)
print("optimal theta")
print(optimal_theta)
p=predict(optimal_theta,X)
accuracy = np.mean(np.double(p==y))
print("accuracy")
print(accuracy)
enter code here