2d laplacian computation in tensorflow - python

I want to do a toy code computing the laplacian of the function f(x,y) = sin(pi (x+1)/2)*sin(pi (y+1)/2) for (x,y) in [-1,1]^2.
I have tried multiple methods:
The first one works
the second one has a bug when calculating u_xx
the third one gives wrong results. I dont even know what is in the hess variable
the fourth one is a try of the tf.hessians function that doesnt work.
If someone can shed some light, I would be indebted.
import tensorflow as tf
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
pi = np.pi
# function that will be passed to autmoatic differentiation
#tf.function
def sol(X):
x,y = X[:,0] , X[:,1]
return tf.sin(pi*(x+1)/2)*tf.sin(pi*(y+1)/2)
#tf.function
def sol2(X,Y):
return tf.sin(pi*(X+1)/2)*tf.sin(pi*(Y+1)/2)
# true laplacian for comparison
def f(X):
x , y = X[:,0] , X[:,1]
return -pi**2/2*tf.sin(pi*(x+1)/2)*tf.sin(pi*(y+1)/2)
def fgrid(X,Y):
return -pi**2/2*tf.sin(pi*(X+1)/2)*tf.sin(pi*(Y+1)/2)
#######"MESH OVER THE SQUARE"
n = 500
x1 , x2 = -1 , 1
vec = tf.linspace(x1,x2,n)
xgrid,ygrid = tf.meshgrid(vec,vec)
xrow,yrow = tf.reshape(xgrid,(-1,1)),tf.reshape(ygrid,(-1,1))
Xdata = tf.Variable(tf.concat((xrow,yrow),axis=1))
#######COMPUTING LAPLACIAN
# FIRST WORKING METHOD
"""
with tf.GradientTape(persistent=True) as tape:
xx = tf.reshape(Xdata[:,0],(-1,1))
yy = tf.reshape(Xdata[:,1],(-1,1))
tape.watch(xx)
tape.watch(yy)
u = sol(tf.concat([xx,yy],axis=1))
u_x = tape.gradient(u,xx)
u_xx = tape.gradient(u_x,xx)
u_y = tape.gradient(u,yy)
u_yy = tape.gradient(u_y,yy)
lapl = (u_xx+u_yy)
del(tape)
#displaying results
plt.contourf(xgrid,ygrid,lapl.numpy().reshape(n,n))
#"""
# SECOND METHOD NOT WORKING : error in u_xx computation
"""
with tf.GradientTape(persistent=True) as tape:
xx = tf.Variable(Xdata[:,0])
yy = tf.Variable(Xdata[:,1])
u = sol(Xdata)
u_x = tape.gradient(u,xx)
u_xx = tape.gradient(u_x,xx)
u_y = tape.gradient(u,yy)
u_yy = tape.gradient(u_y,yy)
lapl = u_xx + u_yy
del(tape)
#"""
#plt.contourf(xgrid,ygrid,lapl.numpy().reshape(n,n))
# THIRD METHOD NOT WORKING : wrong results
"""
with tf.GradientTape(persistent=True) as tape:
u = sol(Xdata)
grads = tape.gradient(u,Xdata)
hess = tape.gradient(grads,Xdata) # shape (-1,2)
lapl = hess[:,0] + hess[:,1]
del(tape)
plt.contourf(xgrid,ygrid,lapl.numpy().reshape(n,n))
#"""
# FOURTH METHOD NOT WORKING : tf.hessians (use Gradient.tape instead) or 'GradientTape' object has no attribute 'hessians'
"""
with tf.GradientTape() as tape:
u = sol(Xdata)
hess = tape.hessians(u,Xdata)
lapl = hess[:,0] + hess[:,1]
plt.contourf(xgrid,ygrid,lapl.numpy().reshape(n,n))
#"""
# second try with tf.hessians
#"""
hess = tf.hessians(sol(Xdata),Xdata) #tf.gradients is not supported when eager execution is enabled. Use tf.GradientTape instead.
#""" ```

Related

How can I implement interface conditions using DeepXDE for solving differential equations?

I have been using DeepXDE (which is a framework for solving differential equations). I am particularly interested in implementing interface conditions, for example, to represent perfect thermal contact and heat flux continuity at a interface between to different solids.
Problem:
So far, I've considered a simple heat transfer problem, as if it were a rod composed of two different materials, with Dirichlet conditions at x=0 and x=L:
from x=0 to x=L/2, we have conductivity coefficient a_1 and temperature T_1(x,t);
from x=L/2 to x=L, we have coefficient a_2 and temperature T_2(x,t);
at the interface, we have to meet both T_1 - T_2 = 0 and a_1dT_1/dx + a_2dT_2/dx = 0 for x=L/2 and t>0.
Although I did not find a concise solution, I tried to implement this problem. But, I have some questions:
I found a way to enforce the heat flux continuity using geom.boundary_normal( ). But, the respective loss is not decreasing (in fact, it is constant). Is it correct to use geom.boundary_normal( )? Is there an alternative way?
I am struggling to come up with away to enforce T_1 - T_2 = 0. How could I get the values of T_1 and T_2 at x=L/2 during the model train?
My code is as follows:
# Libraries to import
import deepxde as dde
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
# pamaters and geometries
a1 = 1.14 # conductivity coefficient 1
a2 = 0.01 # conductivity coefficient 2 # fairly different from a1
L = 1.0 # total length
T0 = 0.0 # temperature specified at x=0
TL = 1.0 # temperature specified at x=L
tend = 1.0 # final time for the simulation
geom1 = dde.geometry.Interval(0, L/2) # first solid
geom2 = dde.geometry.Interval(L/2, L) # second solid
timedomain = dde.geometry.TimeDomain(0, tend)
geomtime = dde.geometry.GeometryXTime(geom1|geom2, timedomain)
# Models and the respective domains
def pde_T1(x, y, _):
dy_t = dde.grad.jacobian(y, x, i=0, j=1)
dy_xx = dde.grad.hessian(y, x, i=0, j=0)
return dy_t - a1 * dy_xx
def pde_T2(x, y, _):
dy_t = dde.grad.jacobian(y, x, i=0, j=1)
dy_xx = dde.grad.hessian(y, x, i=0, j=0)
return dy_t - a2 * dy_xx
def on_domain1(x, on_domain):
return geom1.inside(x)[0]
def on_domain2(x, on_domain):
return geom2.inside(x)[0]
# Boundary and initial conditions
def on_boundary1(x, on_boundary):
return on_boundary and np.isclose(x[0], 0)
def on_boundary2(x, on_boundary):
return on_boundary and np.isclose(x[0], L)
def boundary_initial(x, on_initial):
return on_initial and np.isclose(x[1], 0)
# interface conditions
def on_interf(x, on_boundary):
return on_boundary and np.isclose(x[0], L/2)
def flux_int(x,y,X):
# I need help here.
return (a1*geom1.boundary_normal(X) + a2*geom2.boundary_normal(X)).reshape(-1,1)
def Temp_int(x,y,X):
# I need help here.
# T1_int: how to get from geom1 at x=L/2?
# T2_int = how to get from geom2 at x=L/2?
pass
# Setting the IC
def init_func(X):
x = X[:, 0:1]
y = X[:, 1:2]
t = np.zeros((len(X),1))
for count, x_ in enumerate(x):
if x_ < L/2:
t[count] = T0
else:
t[count] = T0 + 2*(Ts-T0) * (x_ - L/2)
return t
ic = dde.IC(geomtime, init_func, boundary_initial)
# Seting the BCs
pde1 = dde.OperatorBC(geomtime1, pde_T1, on_boundary = on_domain1)
pde2 = dde.OperatorBC(geomtime2, pde_T2, on_boundary = on_domain2)
bc1 = dde.icbc.DirichletBC(geomtime1, lambda x: T0*np.ones((len(x),1)), on_boundary1)
bc2 = dde.icbc.DirichletBC(geomtime2, lambda x: TL*np.ones((len(x),1)), on_boundary2) # not used in loss
# Setting the BC at the interface with 500 points
X = np.hstack( (np.full((500), L/2).reshape(-1,1), timedomain.random_points(500))).reshape(-1, 2)
FluxInterf = dde.icbc.PointSetOperatorBC(X,
np.zeros((X.shape[0],1)), # fluxes must add up to zero at x=L/2.
lambda x, y, X : flux_int(x, y, X[:,0]))
# Setting the problem
loss = [pde1, pde2, bc1, ic, FluxInterf]
data = dde.data.TimePDE(
geomtime,
None,
loss,
num_domain=1000,
num_boundary=500,
num_initial=500,
num_test=500)
loss_weights = [10, 10, 0.1, 0.1, 100]
net = dde.nn.FNN([2] + 4 * [50] + [1], "tanh", "Glorot normal")
# Enforcing BC at x=L
def output_transform(x, y):
xx, t = x[:,0:1], x[:,1:2]
return (L-xx)*y + Ts
net.apply_output_transform(output_transform)
model = dde.Model(data, net)
model.compile("adam", lr=1.0e-3, loss_weights = loss_weights)
losshistory, train_state = model.train(iterations=25000)
model.compile("L-BFGS")
losshistory, train_state = model.train()
dde.saveplot(losshistory, train_state, issave=True, isplot=True)
Thank you for your time and consideration.
Best regards.

Pytorch: multiplication between parameters is inplace for LBFGS optimizer?

I am trying to solve a kind of inverse problem by backward propagation with pytorch. I am trying to recover the parameters (r, theta) that generate a vector field U(r,theta).
As I intended to use the LBFGS optimizer from pytorch, I realize that the operation
r*theta
is detected as inplace and thus not supported for the backward computation of the gradient, whereas
r+theta is not.
How can I overcome this ? I actually need to recover fields that use transformations of the form r*theta.
Here is an example of a code that reproduces the error: it is running fine if you change
field = Wrong_U_param(r, theta, positions)
by
field = U_param(r, theta, positions)
in the loop. Is also works if you replace the r*theta operation by r.item()*theta (but is does not optimize over r since there is no more gradient depending on r.
I tried to use torch.mul() to run the product but it also fails.
The error message is the following
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
and the automatic detection points towards this very product.
Thank you for your help !
import numpy as np
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import torch.optim as optim
from geomloss import SamplesLoss
torch.autograd.set_detect_anomaly(True)
def model(field):
return field
def U_param(r, theta, pos):
result = r + theta + 0. * pos
return result
def Wrong_U_param(r, theta, pos):
result = r * theta + 0. * pos
return result
def learn_U_param(Zobs, ngrad, params, r_guess=0., theta_guess=0., lambd=1.):
Npts = params[0]
positions = torch.tensor(np.arange(0, 1, 1 / Npts) + 1 / 2 / Npts).reshape((Npts, 1))
lab = torch.tensor(np.arange(0, Npts))
r = torch.tensor(float(r_guess)).to(device)
r.requires_grad = True
theta = torch.tensor(float(theta_guess)).to(device)
theta.requires_grad = True
r_hist = [r.item()]
theta_hist = [theta.item()]
loss_hist = []
optimizer = optim.LBFGS([r, theta])
for i in range(ngrad):
field = Wrong_U_param(r, theta, positions)
Z = model(field)
Loss = SamplesLoss(loss="sinkhorn", p=2, blur=.05)
Wass = Loss(lab, Z, positions, lab, Zobs, positions)
def closure():
optimizer.zero_grad()
Wass.backward(retain_graph=True)
return Wass
optimizer.step(closure)
optimizer.zero_grad()
r_hist.append(r.item())
theta_hist.append(theta.item())
loss_hist.append(Wass.item())
return r_hist, theta_hist, loss_hist
N=100
r = 2
theta = 2
params = [N]
positions = torch.tensor(np.arange(0, 1, 1 / N) + 1 / 2 / N).reshape((N, 1))
Zobs = U_param(r, theta, positions)
ngrad = 10
print(learn_U_param(Zobs, ngrad, params, r_guess=0.1, theta_guess=0.1, lambd=1.))

SKlearn Gaussian Process with constant, manually set correlation

I want to use the Gaussian Process approximation for a simple 1D test function to illustrate a few things. I want to iterate over a few different values for the correlation matrix (since this is 1D it is just a single value) and show what effect different values have on the approximation. My understanding is, that "theta" is the parameter for this. Therefore I want to set the theta value manually and don't want any optimization/changes to it. I thought the constant kernel and the clone_with_theta function might get me what I want but I didn't get it to work. Here is what I have so far:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as ConstantKernel
def f(x):
"""The function to predict."""
return x/2 + ((1/10 + x) * np.sin(5*x - 1))/(1 + x**2 * (np.sin(x - (1/2))**2))
# ----------------------------------------------------------------------
# Data Points
X = np.atleast_2d(np.delete(np.linspace(-1,1, 7),4)).T
y = f(X).ravel()
# Instantiate a Gaussian Process model
kernel = ConstantKernel(constant_value=1, constant_value_bounds='fixed')
theta = np.array([0.5,0.5])
kernel = kernel.clone_with_theta(theta)
gp = GaussianProcessRegressor(kernel=kernel, optimizer=None)
# Fit to data using Maximum Likelihood Estimation of the parameters
gp.fit(X, y)
# Make the prediction on the meshed x-axis (ask for MSE as well)
y_pred, sigma = gp.predict(x, return_std=True)
# Plot
# ...
I programmed a simple implementation myself now, which allows to set correlation (here 'b') manually:
import numpy as np
from numpy.linalg import inv
def f(x):
"""The function to predict."""
return x/2 + ((1/10 + x) * np.sin(5*x - 1))/(1 + x**2 * (np.sin(x - (1/2))**2))
def kriging_approx(x,xt,yt,b,mu,R_inv):
N = yt.size
one = np.matrix(np.ones((yt.size))).T
r = np.zeros((N))
for i in range(0,N):
r[i]= np.exp(-b * (xt[i]-x)**2)
y = mu + np.matmul(np.matmul(r.T,R_inv),yt - mu*one)
y = y[0,0]
return y
def calc_R (x,b):
N = x.size
# setup R
R = np.zeros((N,N))
for i in range(0,N):
for j in range(0,N):
R[i][j] = np.exp(-b * (x[i]-x[j])**2)
R_inv = inv(R)
return R, R_inv
def calc_mu_sig (yt, R_inv):
N = yt.size
one = np.matrix(np.ones((N))).T
mu = np.matmul(np.matmul(one.T,R_inv),yt) / np.matmul(np.matmul(one.T,R_inv),one)
mu = mu[0,0]
sig2 = (np.matmul(np.matmul((yt - mu*one).T,R_inv),yt - mu*one))/(N)
sig2 = sig2[0,0]
return mu, sig2
# ----------------------------------------------------------------------
# Data Points
xt = np.linspace(-1,1, 7)
yt = np.matrix((f(xt))).T
# Calc R
R, R_inv = calc_R(xt, b)
# Calc mu and sigma
mu_dach, sig_dach2 = calc_mu_sig(yt, R_inv)
# Point to get approximation for
x = 1
y_approx = kriging_approx(x, xt, yt, b, mu_dach, R_inv)

Computing the partial derivatives of a deep neural network with respect to Inputs

I am trying to compute the derivative of a neural network with 2 or more hidden layers with respect to its inputs. So not "standard backpropagation" since I am not interested in how the output varies with respect to the weights. And I am not looking to train my network using it (if this warrants removing the backpropagation tag, let me know, but I suspect that what I need is not too different)
The reason for my interest in the derivative here, is that I have a test set which sometimes provides me with a matching [x1, x2] : [y] pair, and sometimes a [x1, x2] : [d(y)/dx1] or [x1, x2] : [d(y)/dx2]. I then use a particle swarm algorithm to train my network.
I like diagrams, so to save a few words here is my network:
and what I would like is for the compute_derivativemethod to return a numpy array of the form below:
This is my attempt so far, but I can't seem to get an array matching my number of inputs at the end. I can't figure what I am doing wrong.
def compute_derivative(self):
"""Computes the network derivative and returns an array with the change in output with respect to each input"""
self.compute_layer_derivative(0)
for l in np.arange(1,self.size):
dl = self.compute_layer_derivative(l)
dprev = self.layers[l-1].derivatives
self.output_derivatives = dl.T.dot(dprev)
return self.output_derivatives
def compute_layer_derivative(self, l_id):
wL = self.layers[l_id].w
zL = self.layers[l_id].output
daL = self.layers[l_id].f(zL, div=1)
daLM = np.repeat(daL,wL.shape[0], axis=0)
self.layers[l_id].derivatives = np.multiply(daLM,wL)
return self.layers[l_id].derivatives
If you want to run the entire code I have made a cut down, commented version, which will work with a copy paste (see below). Thank you for your help !
# -*- coding: utf-8 -*-
import numpy as np
def sigmoid(x, div = 0):
if div == 1: #first derivative f'
return np.exp(-x) / (1. + np.exp(-x))**2.
if div == 2: # second derivative f''
return - np.exp(x) * (np.exp(x) - 1) / (1. + np.exp(x))**3.
return 1. / (1. + np.exp(-x)) # f
def linear(x, div = 0):
if div == 1: #first derivative f'
return np.full(x.shape,1)
if div > 2: # second derivative f''
return np.zeros(x.shape)
return x # f
class Layer():
def __init__(self, in_n, h_n, activation, bias = True, debug = False):
self.w = 2*np.random.random((in_n, h_n)) - 1 # synaptic weights with 0 mean
self.f = activation
self.output = None
self.activation = None
self.derivatives = np.array([[None for i in range(in_n+1)]]) #+1 for global dev
if bias:
self.b = 2*np.random.random((1, h_n)) - 1
else:
self.b = None
if debug:
self.w = np.full((in_n, h_n), 1.)
if self.b is not None: self.b = np.full((1, h_n), 1.)
def compute(self, inputs):
if self.w.shape[0] != inputs.shape[1]:
raise ValueError("Inputs dimensions do not match test data dim.")
if self.b is None:
self.output = np.dot(inputs, self.w)
else:
self.output = np.dot(inputs, self.w) + self.b
self.activation = self.f(self.output)
class NeuralNetwork():
def __init__(self, nb_layers, in_NN, h_density, out_NN, debug = False):
self.debug = debug
self.layers = []
self.size = nb_layers+1
self.output_derivatives = None
self.output = None
self.in_N = in_NN
self.out_N = out_NN
if debug:
print("Input Layer with {} inputs.".format(in_NN))
#create hidden layers
current_inputs = in_NN
for l in range(self.size - 1):
self.layers.append(Layer(current_inputs, h_density, sigmoid, debug = debug))
current_inputs = h_density
if debug:
print("Hidden Layer {} with {} inputs and {} neurons.".format(l+1, self.layers[l].w.shape[0], self.layers[l].w.shape[1]))
#creat output layer
self.layers.append(Layer(current_inputs, out_NN, linear, bias=False, debug = debug))
if debug:
print("Output Layer with {} inputs and {} outputs.".format(self.layers[-1].w.shape[0], self.layers[-1].w.shape[1]))
#print("with w: {}".format(self.layers[l].w))
print("ANN size = {}, with {} Layers\n\n".format( self.size, len(self.layers)))
def compute(self, point):
curr_inputs = point
for l in range(self.size):
self.layers[l].compute(curr_inputs)
curr_inputs = self.layers[l].activation
self.output = curr_inputs
if self.debug: print("ANN output: ",curr_inputs)
return self.output
def compute_derivative(self, order, point):
""" If the network has not been computed, compute it before getting
the derivative. This might be a bit expensive..."""
if self.layers[self.size-1].output is None:
self.compute(point)
#Compute output layer total derivative
self.compute_layer_derivative(self.size-1, order)
self.output_derivatives = self.get_partial_derivatives_to_outputs(self.size-1)
print(self.output_derivatives)
for l in np.arange(1,self.size):
l = self.size-1 - l
self.compute_layer_derivative(l, order)
if l > 0: #if we are not at first hidden layer compute the total derivative
self.output_derivatives *= self.get_total_derivative_to_inputs(l)
else:# get the each output derivative with respect to each input
backprop_dev_to_outs = np.repeat(np.matrix(self.output_derivatives),self.in_N, axis=0).T
dev_to_inputs = np.repeat(np.matrix(self.get_partial_derivatives_to_inputs(l)).T,self.out_N, axis=1).T
self.output_derivatives = np.multiply(backprop_dev_to_outs, dev_to_inputs)
if self.debug: print("output derivatives: ",self.output_derivatives)
return self.output_derivatives
def get_total_derivative(self,l_id):
return np.sum(self.get_partial_derivatives_to_inputs(l_id))
def get_total_derivative_to_inputs(self,l_id):
return np.sum(self.get_partial_derivatives_to_inputs(l_id))
def get_partial_derivatives_to_inputs(self,l_id):
return np.sum(self.layers[l_id].derivatives, axis=1)
def get_partial_derivatives_to_outputs(self,l_id):
return np.sum(self.layers[l_id].derivatives, axis=0)
def compute_layer_derivative(self, l_id, order):
if self.debug: print("\n\ncurrent layer is ", l_id)
wL = self.layers[l_id].w
zL = self.layers[l_id].output
daL = self.layers[l_id].f(zL, order)
daLM = np.repeat(daL,wL.shape[0], axis=0)
self.layers[l_id].derivatives = np.multiply(daLM,wL)
if self.debug:
print("L_id: {}, a_f: {}".format(l_id, self.layers[l_id].f))
print("L_id: {}, dev: {}".format(l_id, self.get_total_derivative_to_inputs(l_id)))
return self.layers[l_id].derivatives
#nb_layers, in_NN, h_density, out_NN, debug = False
nn = NeuralNetwork(1,2,2,1, debug= True)
nn.compute(np.array([[1,1]]))# head value
nn.compute_derivative(1,np.array([[1,1]])) #first derivative
EDITED ANSWER BASED ON SIRGUY's REPLY:
# Here we assume that the layer has sigmoid activation
def Jacobian(x = np.array([[1,1]]), w = np.array([[1,1],[1,1]]), b = np.array([[1,1]])):
return sigmoid_d(x.dot(w) + b) * w # J(S, x)
In the case of a network with 2 hidden layers with sigmoid activation and one output layer with sigmoid activation (so that we can just use the same function as above) we have:
J_L1 = Jacobian(x = np.array([[1,1]])) # where [1,1] are the inputs of to the network (i.e. values of the neuron in the input layer)
J_L2 = Jacobian(x = np.array([[3,3]])) # where [3,3] are the neuron values of layer 1 before activation
# in the output layer the weights and biases are adjusted as there is 1 neuron rather than 2
J_Lout = Jacobian(x = np.array([[2.90514825, 2.90514825]]), w = np.array([[1],[1]]), b = np.array([[1]]))# where [2.905,2.905] are the neuron values of layer 2 before activation
J_out_to_in = J_Lout.T.dot(J_L2).dot(J_L1)
Here's how I derived what your example should give:
# i'th component of vector-valued function S(x) (sigmoid-weighted layer)
S_i(x) = 1 / 1 + exp(-w_i . x + b_i) # . for matrix multiplication here
# i'th component of vector-valued function L(x) (linear-weighted layer)
L_i(x) = w_i . x # different weights than S.
# as it happens our L(x) output 1 value, so is in fact a scalar function
F(x) = L(S(x)) # final output value
#derivative of F, denoted as J(F, x) to mean the Jacobian of the function F, evaluated at x.
J(F, x) = J(L(S(x)), x) = J(L, S(x)) . J(S, x) # chain rule for multivariable, vector-valued functions
#First, what's the derivative of L?
J(L, S(x)) = L
This is usually a surprising result, but you can verify this yourself by computing partial derivatives of M . x for some random matrix M. If you compute all the derivatives and put them into the Jacobian you will get back M.
#Now what's the derivative of S? Compute via formula
d(S_i(x)/dx_j) = w_ij * exp(-w_i.x+b_i) / (1 + exp(-w_i.x+b_i))**2 #w_ij, is the j'th component of the vector w_i
#For the gradient of a S_i (which is just one component of S), we get
J(S_i, x) = (exp(-w_i . x + b_i) / (1 + exp(-w_i . x + b_i))**2) * w_i # remember this is a vector because w_i is a vector
Now to take your debug example of 1's everywhere.
w_i = b = x = [1, 1]
#define a to make this less cluttered
a = exp(-w_i . x + b) = exp(-3)
J(S_i, x) = a / (1 + a)^2 * [1, 1]
J(S, x) = a / (1 + a)^2 * [[1, 1], [1, 1]]
J(L, S(x)) = [1, 1] #Doesn't depend on S(x)
J(F, x) = J(L, S(x)) . J(S, x) = (a / (1 + a)**2) * [1, 1] . [[1, 1], [1, 1]]
J(F, x) = (a / (1 + a)**2) * [2, 2] = (2 * a / (1 + a)**2) * [1, 1]
J(F, x) = [0.0903533, 0.0903533]
Hopefully this will help you reorganise your code a bit. You can't evaluate the derivatives here with just the value of w_i . x, you will need w_i and x separately to properly compute everything.
EDIT
Because I find this stuff interesting, here is my python script for
computing the value and first derivative of a neural network:
import numpy as np
class Layer:
def __init__(self, weights_matrix, bias_vector, sigmoid_activation = True):
self.weights_matrix = weights_matrix
self.bias_vector = bias_vector
self.sigmoid_activation = sigmoid_activation
def compute_value(self, x_vector):
result = np.add(np.dot(self.weights_matrix, x_vector), self.bias_vector)
if self.sigmoid_activation:
result = np.exp(-result)
result = 1 / (1 + result)
return result
def compute_value_and_derivative(self, x_vector):
if not self.sigmoid_activation:
return (self.compute_value(x_vector), self.weights_matrix)
temp = np.add(np.dot(self.weights_matrix, x_vector), self.bias_vector)
temp = np.exp(-temp)
value = 1.0 / (1 + temp)
temp = temp / (1 + temp)**2
#pre-multiplying by a diagonal matrix multiplies each row by
#the corresponding diagonal element
#(1st row with 1st value, 2nd row with 2nd value, etc...)
jacobian = np.dot(np.diag(temp), self.weights_matrix)
return (value, jacobian)
class Network:
def __init__(self, layers):
self.layers = layers
def compute_value(self, x_vector):
for l in self.layers:
x_vector = l.compute_value(x_vector)
return x_vector
def compute_value_and_derivative(self, x_vector):
x_vector, jacobian = self.layers[0].compute_value_and_derivative(x_vector)
for l in self.layers[1:]:
x_vector, j = l.compute_value_and_derivative(x_vector)
jacobian = np.dot(j, jacobian)
return x_vector, jacobian
#first weights
l1w = np.array([[1,1],[1,1]])
l1b = np.array([1,1])
l2w = np.array([[1,1],[1,1]])
l2b = np.array([1,1])
l3w = np.array([1, 1])
l3b = np.array([0])
nn = Network([Layer(l1w, l1b),
Layer(l2w, l2b),
Layer(l3w, l3b, False)])
r = nn.compute_value_and_derivative(np.array([1,1]))
print r

How to get accurate predictions from Neural Network?

I'm doing a project on water quality prediction using Artificial Neural Network. I implemented this using python. I have completed my prediction model but the generated predictions are not much accurate.
What I'm doing is I have collected data from a river for past 4 and half years on daily basis and I'm predicting a pattern for a specific parameter by inputting data from past records. Simply what I need to do is to predict "Turbidity level" of water on 2015 by feeding data on turbidity from 2012-2014.
From the model which I have created it is not much accurate when I compare to the real data I have gathered for 2015. Please help me to solve this. I tried this by changing hidden layer sizes and the Lambda value.
//This is my code
import xlrd
import numpy as np
from numpy import zeros
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy import optimize
#Neural Network
class Neural_Network(object):
def __init__(self,Lambda):
#Define Hyperparameters
self.inputLayerSize = 2
self.outputLayerSize = 1
self.hiddenLayerSize = 10
#Weights (parameters)
self.W1 = np.random.randn(self.inputLayerSize,self.hiddenLayerSize)
self.W2 = np.random.randn(self.hiddenLayerSize,self.outputLayerSize)
#Regularization Parameter:
self.Lambda = Lambda
def forward(self, arrayInput):
#Propogate inputs though network
self.z2 = np.dot(arrayInput, self.W1)
self.a2 = self.sigmoid(self.z2)
self.z3 = np.dot(self.a2, self.W2)
yHat = self.sigmoid(self.z3)
return yHat
def sigmoid(self, z):
#Apply sigmoid activation function to scalar, vector, or matrix
return 1/(1+np.exp(-z))
def sigmoidPrime(self,z):
#Gradient of sigmoid
return np.exp(-z)/((1+np.exp(-z))**2)
def costFunction(self, arrayInput, arrayOutput):
#Compute cost for given input,output use weights already stored in class.
self.yHat = self.forward(arrayInput)
#J = 0.5*sum((arrayOutput-self.yHat)**2)
#J = 0.5*sum((arrayOutput-self.yHat)**2)/arrayInput.shape[0] + (self.Lambda/2)
J = 0.5*sum((arrayOutput-self.yHat)**2)/arrayInput.shape[0] + (self.Lambda/2)*sum(sum(self.W1**2),sum(self.W2**2))
#J = 0.5*sum((arrayOutput-self.yHat)**2)/arrayInput.shape[0] + (self.Lambda/2)*(sum(self.W1**2)+sum(self.W2**2))
return J
def costFunctionPrime(self, arrayInput, arrayOutput):
#Compute derivative with respect to W and W2 for a given X and y:
self.yHat = self.forward(arrayInput)
delta3 = np.multiply(-(arrayOutput-self.yHat), self.sigmoidPrime(self.z3))
#Add gradient of regularization term:
#dJdW2 = np.dot(self.a2.T, delta3) + self.Lambda*self.W2
dJdW2 = np.dot(self.a2.T, delta3)
delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2)
#Add gradient of regularization term:
#dJdW1 = np.dot(arrayInput.T, delta2)+ self.Lambda*self.W1
dJdW1 = np.dot(arrayInput.T, delta2)
return dJdW1, dJdW2
#Helper Functions for interacting with other classes:
def getParams(self):
#Get W1 and W2 unrolled into vector:
params = np.concatenate((self.W1.ravel(), self.W2.ravel()))
return params
def setParams(self, params):
#Set W1 and W2 using single paramater vector.
W1_start = 0
W1_end = self.hiddenLayerSize * self.inputLayerSize
self.W1 = np.reshape(params[W1_start:W1_end], (self.inputLayerSize , self.hiddenLayerSize))
W2_end = W1_end + self.hiddenLayerSize*self.outputLayerSize
self.W2 = np.reshape(params[W1_end:W2_end], (self.hiddenLayerSize, self.outputLayerSize))
def computeGradients(self, arrayInput, arrayOutput):
dJdW1, dJdW2 = self.costFunctionPrime(arrayInput, arrayOutput)
return np.concatenate((dJdW1.ravel(), dJdW2.ravel()))
def computeNumericalGradient(self,N, X, y):
paramsInitial = N.getParams()
numgrad = np.zeros(paramsInitial.shape)
perturb = np.zeros(paramsInitial.shape)
e = 1e-4
for p in range(len(paramsInitial)):
#Set perturbation vector
perturb[p] = e
N.setParams(paramsInitial + perturb)
loss2 = N.costFunction(X, y)
N.setParams(paramsInitial - perturb)
loss1 = N.costFunction(X, y)
#Compute Numerical Gradient
numgrad[p] = (loss2 - loss1) / (2*e)
#Return the value we changed to zero:
perturb[p] = 0
#Return Params to original value:
N.setParams(paramsInitial)
return numgrad
#Trainer class
class trainer(object):
def __init__(self, N):
self.N = N
def costFunctionWrapper(self, params, arrayInput, arrayOutput):
self.N.setParams(params)
cost = self.N.costFunction(arrayInput, arrayOutput)
#grad = self.N.computeGradients(arrayInput, arrayOutput)
grad = self.N.computeNumericalGradient(self.N,arrayInput, arrayOutput)
return cost, grad
def callbackF(self, params):
self.N.setParams(params)
self.J.append(self.N.costFunction(self.arrayInput, self.arrayOutput))
self.testJ.append(self.N.costFunction(self.TestInput, self.TestOutput))
def train(self, arrayInput, arrayOutput,TestInput,TestOutput):
#Make an internal variable for the callback function:
self.arrayInput = arrayInput
self.arrayOutput = arrayOutput
self.TestInput = TestInput
self.TestOutput = TestOutput
#Make empty list to store costs:
self.J = []
self.testJ= []
params0 = self.N.getParams()
options = {'maxiter': 200, 'disp' : True}
_res = optimize.minimize(self.costFunctionWrapper, params0, jac=True, method='BFGS', \
args=(arrayInput, arrayOutput), options=options, callback=self.callbackF)
self.N.setParams(_res.x)
self.optimizationResults = _res
#Main Program
path = "F:\prototype\\newdata\\tody\\turbidity\\c.xlsx"
book = xlrd.open_workbook(path)
input1=[]
output=[]
testinput=[]
testoutput=[]
#training data set
first_sheet = book.sheet_by_index(1)
for row in range(first_sheet.ncols-1):
input1.append(first_sheet.col_values(row))
for row in range((first_sheet.ncols-1),first_sheet.ncols ):
output.append(first_sheet.col_values(row))
arrayInput = np.asarray(input1)
arrayInput = arrayInput.T
arrayOutput = np.asarray(output)
arrayOutput = arrayOutput.T
#testing data set
first_sheet1 = book.sheet_by_index(0)
for row in range(first_sheet1.ncols-1):
testinput.append(first_sheet1.col_values(row))
for row in range((first_sheet1.ncols-1),first_sheet1.ncols ):
testoutput.append(first_sheet1.col_values(row))
TestInput = np.asarray(testinput)
TestInput = TestInput.T
TestOutput = np.asarray(testoutput)
TestOutput = TestOutput.T
#2016
input2016=[]
first_sheet2 = book.sheet_by_index(2)
for row in range(first_sheet2.ncols):
input2016.append(first_sheet2.col_values(row))
Input = np.asarray(input2016)
Input = Input.T
# Scaling
arrayInput = arrayInput / np.amax(arrayInput, axis=0)
arrayOutput = arrayOutput / np.amax(arrayOutput, axis=0)
TestInput = TestInput / np.amax(TestInput, axis=0)
Input = Input / np.amax(Input, axis=0)
TestOutput = TestOutput / np.amax(TestOutput, axis=0)
NN=Neural_Network(Lambda=0.00000000000001)
T = trainer(NN)
T.train(arrayInput,arrayOutput,TestInput,TestOutput)
print NN.costFunctionPrime(arrayInput,arrayOutput)
Output = NN.forward(Input)
print Output
print '----------'
#print TestOutput
#plt.plot(T.J)
plt.plot(Output)
plt.grid(1)
plt.xlabel('Iterations')
plt.ylabel('cost')
plt.show()
//Turbidity means 2015 real data and prediction means data predicted using this code
Some of the comments suggest scaling the output sigmoidal layer to match the correct data. If you look at your predictions, you will see that with some scaling they are pretty accurate. I advise against scaling a sigmoidal function, however.
A sigmoidal output is meant to be interpreted as a probability (given certain constraints are followed), so scaling it would be breaking that contract and could give undefined results. What happens if you scale from 0-100, but then start receiving training targets larger than 100? (assuming you are training an online system, otherwise perhaps that example is not relevant)
I would change your code to use a linear output layer. This would not require any manipulation of the data after training the network. Also given that your cost function is least squares, the linear output layer will be convex (which reduces the number of local optima that your algorithm can get stuck in).

Categories

Resources