Neural network from scratch - predict single example - python

Here is a neural network I've modified from Coursera Deep Learning Specialization to train on a dataset containing a flattened array of training data :
%reset -s -f
import numpy as np
import math
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def initialize_with_zeros(dim):
w = np.zeros(shape=(dim, 1))
b = 0
return w, b
X = np.array([[1,1,1,1],[1,0,1,0] , [1,1,1,0], [0,0,0,0], [0,1,0,0], [0,1,0,1]])
Y = np.array([[1,0,1,1,1,1]])
X = X.reshape(X.shape[0], -1).T
Y = Y.reshape(Y.shape[0], -1).T
print('X shape' , X.shape)
print('Y shape' , Y.shape)
b = 1
w, b = initialize_with_zeros(4)
def propagate(w, b, X, Y) :
m = X.shape[1]
A = sigmoid(np.dot(w.T, X) + b) # compute activation
cost = (- 1 / m) * np.sum(Y * np.log(A) + (1 - Y) * (np.log(1 - A))) # compute cost
dw = (1./m)*np.dot(X,((A-Y).T))
db = (1./m)*np.sum(A-Y, axis=1)
cost = np.squeeze(cost)
grads = {"dw": dw,
"db": db}
return grads, cost
propagate(w , b , X , Y)
learning_rate = .001
costs = []
def optimize(w , b, X , Y) :
for i in range(2):
grads, cost = propagate(w=w, b=b, X=X, Y=Y)
dw = grads["dw"]
db = grads["db"]
w = w - learning_rate*dw
b = b - learning_rate*db
if i % 100 == 0:
costs.append(cost)
return w , b
w , b = optimize(w , b , X , Y)
def predict(w, b, X):
m = 6
Y_prediction = np.zeros((1,m))
# w = w.reshape(X.shape[0], 1)
A = sigmoid(np.dot(w.T, X) + b)
for i in range(A.shape[1]):
if A[0, i] >= 0.5:
Y_prediction[0, i] = 1
else:
Y_prediction[0, i] = 0
return Y_prediction
predict(w , b, X)
This works as expected but I'm struggling to predict a single example.
If I use :
predict(w , b, X[0])
returns error :
ValueError: shapes (6,4) and (6,) not aligned: 4 (dim 1) != 6 (dim 0)
How to re-arrange matrix operation in order to predict a single instance ?

Try
predict(w, b, X[:1])
It seems like you predict function expects X to be 2-d, when passing only one X it should have a singleton second dimension (i.e., shape=(6,1)) rather than being a single dimension (i.e., shape=(6,)).

The error comes from the fact that predict expect to be called on a batch of data of shape ... * bs. In order to predict on a single element you can create a batch of size 1 using np.expand_dims :
predict(w, b, np.expand_dims(X[0], axis=1)
should work.

Related

How can I reshape two different shapes to be able to compute them?

I am trying to get this code to work but I keep getting value errors for shapes. when I tried np.reshape(w,(103,126)) I got another error saying the size was 103 and could not reshape. Any help would be greatly appreciated.
def run_epoch(X,Y,w,b):
print("w",w.shape)
for i in range(len(X)):
print(len(X))
print("X[i] shape",X[i].shape)
print("X[i] shape",X[102].shape)
a = np.sum(np.reshape(w,(126,103)) * X[i]) + b
#a = np.sum(w * X[i]) + b
if(Y[i]*a <= 0):
w = w + Y[i] * X[i]
b = b + Y[i]
#print("a,w,b: ",a, w, b)
return w,b
def perceptron_train(X,Y):
w = np.zeros(X.shape[0])
b = 0.0
temp_w, temp_b = run_epoch(X,Y,w,b)
n_epoch = 1
while( n_epoch <= 50 ):
#print("Epoch: ", n_epoch)
w,b = run_epoch(X,Y,w,b)
n_epoch += 1
return w,b
def perceptron_test(X_test, Y_test, w, b):
a = np.sum(X_test * w, axis = 1) + b
y = np.where(a>0, 1,-1)
acc = (y == Y_test).sum()/len(Y_test)
return acc
I get the first error when I try to reshape
and the second says it can't broadcast.

Neural Networks Using Python and NumPy

I am newbie to NN and I am trying to implement NN with Python/Numpy from the code I found at:
"Create a Simple Neural Network in Python from Scratch"
enter link description here
My input array is:
array([[5.71, 5.77, 5.94],
[5.77, 5.94, 5.51],
[5.94, 5.51, 5.88],
[5.51, 5.88, 5.73]])
Output array is:
array([[5.51],
[5.88],
[5.73],
[6.41]])
after running the code, I see following results which are not correct:
synaptic_weights after training
[[1.90625275]
[2.54867698]
[1.07698312]]
outputs after training
[[1.]
[1.]
[1.]
[1.]]
Here is the core of the code:
for iteration in range(1000):
input_layer = tr_input
outputs = sigmoid(np.dot(input_layer, synapic_weights))
error = tr_output - outputs
adjustmnets = error * sigmoid_derivative(outputs)
synapic_weights +=np.dot(input_layer.T, adjustmnets )
print('synaptic_weights after training')
print(synapic_weights)
print('outputs after training')
print(outputs)
What should I change in this code so it works for my data? Or shall I take different method? Any help is highly appreciated.
That's because you are using a wrong activation function (i.e. sigmoid). The main reason why we use sigmoid function is because it exists between (0 to 1). Therefore, it is especially used for models where we have to predict the probability as an output.Since probability of anything exists only between the range of 0 and 1, sigmoid is the right choice.
If you want to train a model to predict the values in your array, you should use a regression model. Otherwise, you can convert your output into labels (for example are 5.x to 0 and 6.x to 1) and retrain your model.
These are the steps involved in my neural network implementation.
Randomly initialize weights (θ Theta)
Implement forward propagation
Compute cost function
Implement back propagation to compute partial derivative
Use gradient descent
def forward_prop(X, theta_list):
m = X.shape[0]
a_list = []
z_list = []
a_list.append(np.insert(X, 0, values=np.ones(m), axis=1))
idx = 0
for idx, thera in enumerate(theta_list):
z_list.append(a_list[idx] * (theta_list[idx].T))
if idx != (len(theta_list)-1):
a_list.append(np.insert(sigmoid(z_list[idx]), 0, values=np.ones(m), axis=1))
else:
a_list.append(sigmoid(z_list[idx]))
return a_list, z_list
def back_prop(params, input_size, hidden_layers, num_labels, X, y, regularization, regularize):
m = X.shape[0]
X = np.matrix(X)
y = np.matrix(y)
theta_list = []
startCount = 0
idx = 0
for idx, val in enumerate(hidden_layers):
if idx == 0:
startCount = val * (input_size + 1)
theta_list.append(np.matrix(np.reshape(params[:startCount], (val, (input_size + 1)))))
if idx != 0:
tempCount = startCount
startCount += (val * (hidden_layers[idx-1] + 1))
theta_list.append(np.matrix(np.reshape(params[tempCount:startCount], (val, (hidden_layers[idx-1] + 1)))))
if idx == (len(hidden_layers)-1):
theta_list.append(np.matrix(np.reshape(params[startCount:], (num_labels, (val + 1)))))
a_list, z_list= forward_prop(X, theta_list)
J = cost(X, y, a_list[len(a_list)-1], theta_list, regularization, regularize)
d_list = []
d_list.append(a_list[len(a_list)-1] - y)
idx = 0
while idx < (len(theta_list)-1):
d_temp = np.multiply(d_list[idx] * theta_list[len(a_list) - 2 - idx], sigmoid_gradient(a_list[len(a_list) - 2 - idx]))
d_list.append(d_temp[:,1:])
idx += 1
delta_list = []
for theta in theta_list:
delta_list.append(np.zeros(theta.shape))
for idx, delta in enumerate(delta_list):
delta_list[idx] = delta_list[idx] + ((d_list[len(d_list) - 1 -idx].T) * a_list[idx])
delta_list[idx] = delta_list[idx] / m
if regularize:
for idx, delta in enumerate(delta_list):
delta_list[idx][:, 1:] = delta_list[idx][:, 1:] + (theta_list[idx][:, 1:] * regularization)
grad_list = np.ravel(delta_list[0])
idx = 1
while idx < (len(delta_list)):
grad_list = np.concatenate((grad_list, np.ravel(delta_list[idx])), axis=None)
idx += 1
return J, grad_list
def cost(X, y, h, theta_list, regularization, regularize):
m = X.shape[0]
X = np.matrix(X)
y = np.matrix(y)
J = (np.multiply(-y, np.log(h)) - np.multiply((1 - y), np.log(1 - h))).sum() / m
if regularize:
regularization_value = 0.0
for theta in theta_list:
regularization_value += np.sum(np.power(theta[:, 1:], 2))
J += (float(regularization) / (2 * m)) * regularization_value
return J
Implementation

Gradient Descent is not converging for very large values in a small dataset

I am trying to write a program to calculate the slope and the intercept of a linear regression model but when I am running more than 10 iterations, the gradient descent function gives the np.nan value for both intercept as well as slope.
Below is my implementation
def get_gradient_at_b(x, y, b, m):
N = len(x)
diff = 0
for i in range(N):
x_val = x[i]
y_val = y[i]
diff += (y_val - ((m * x_val) + b))
b_gradient = -(2/N) * diff
return b_gradient
def get_gradient_at_m(x, y, b, m):
N = len(x)
diff = 0
for i in range(N):
x_val = x[i]
y_val = y[i]
diff += x_val * (y_val - ((m * x_val) + b))
m_gradient = -(2/N) * diff
return m_gradient
def step_gradient(b_current, m_current, x, y, learning_rate):
b_gradient = get_gradient_at_b(x, y, b_current, m_current)
m_gradient = get_gradient_at_m(x, y, b_current, m_current)
b = b_current - (learning_rate * b_gradient)
m = m_current - (learning_rate * m_gradient)
return [b, m]
def gradient_descent(x, y, learning_rate, num_iterations):
b = 0
m = 0
for i in range(num_iterations):
b, m = step_gradient(b, m, x, y, learning_rate)
return [b,m]
I am running it on the following data:
a=[3.87656018e+11, 4.10320300e+11, 4.15730874e+11, 4.52699998e+11,
4.62146799e+11, 4.78965491e+11, 5.08068952e+11, 5.99592902e+11,
6.99688853e+11, 8.08901077e+11, 9.20316530e+11, 1.20111177e+12,
1.18695276e+12, 1.32394030e+12, 1.65661707e+12, 1.82304993e+12,
1.82763786e+12, 1.85672212e+12, 2.03912745e+12, 2.10239081e+12,
2.27422971e+12, 2.60081824e+12]
b=[3.3469950e+10, 3.4784980e+10, 3.3218720e+10, 3.6822490e+10,
4.4560290e+10, 4.3826720e+10, 5.2719430e+10, 6.3842550e+10,
8.3535940e+10, 1.0309053e+11, 1.2641405e+11, 1.6313218e+11,
1.8529536e+11, 1.7875143e+11, 2.4981555e+11, 3.0596392e+11,
3.0040058e+11, 3.1440530e+11, 3.1033848e+11, 2.6229109e+11,
2.7585243e+11, 3.0352616e+11]
print(gradient_descent(a, b, 0.01, 100))
#result --> [nan, nan]
When I run the gradient_descent function on a dataset with smaller values, it gives the correct answers. Also I was able to obtain the intercept and slope for the above data with from sklearn.linear_model import LinearRegression
Any help will be appreciated in figuring out why the result is [nan, nan] instead of giving me the correct intercept and slope.
You need to reduce the learning rate. Since the values in a and b are so large (>= 1e11), the learning rate needs be approximately 1e-25 for this to even do the gradient descent, else it will randomly overshoot because of large gradients of a and b.
b, m = gradient_descent(a, b, 5e-25, 100)
print(b, m)
Out: -3.7387067636195266e-13 0.13854551291084335

Pricing American Stock Option with TensorFlow Neural Network , Simulate by Monte Carlo

so what I try to do is to simulate with Monte Carlo a American Option (Stock) and use TensorFlow to price it.
I use two helper function , get_continuation_function to create the TF operators. And the pricing_function to create the computational graph for the pricing.
The npv operator is sum of the optimal exercise decisions. At each time I check if the exercise value is greater than the predicted continuation value (in other words, whether the option is in the money).
And the actual pricing function is american_tf. I execute the function to create the paths, the exercise values for the training path. Then, I iterate backward through the training_functions and learn the value and decision on each exercise date.
def get_continuation_function():
X = tf.placeholder(tf.float32, (None,1),name="X")
y = tf.placeholder(tf.float32, (None,1),name="y")
w = tf.Variable(tf.random_uniform((1,1))*0.1,,name="w")
b = tf.Variable(initial_value = tf.ones(1)*1,name="b")
y_hat = tf.add(tf.matmul(X, w), b)
pre_error = tf.pow(y-y_hat,2)
error = tf.reduce_mean(pre_error)
train = tf.train.AdamOptimizer(0.1).minimize(error)
return(X, y, train, w, b, y_hat)
def pricing_function(number_call_dates):
S = tf.placeholder(tf.float32,name="S")
# First excerise date
dts = tf.placeholder(tf.float32,name="dts")
# 2nd exersice date
K = tf.placeholder(tf.float32,name="K")
r = tf.placeholder(tf.float32,,name="r")
sigma = tf.placeholder(tf.float32,name="sigma")
dW = tf.placeholder(tf.float32,name="dW")
S_t = S * tf.cumprod(tf.exp((r-sigma**2/2) * dts + sigma * tf.sqrt(dts) * dW), axis=1)
E_t = tf.exp(-r * tf.cumsum(dts)) * tf.maximum(K-S_t, 0)
continuationValues = []
training_functions = []
previous_exersies = 0
npv = 0
for i in range(number_call_dates-1):
(input_x, input_y, train, w, b, y_hat) = get_continuation_function()
training_functions.append((input_x, input_y, train, w, b, y_hat))
X = tf.keras.activations.relu(S_t[:, i])
contValue = tf.add(tf.matmul(X, w),b)
continuationValues.append(contValue)
inMoney = tf.cast(tf.greater(E_t[:,i], 0.), tf.float32)
exercise = tf.cast(tf.greater(E_t[:,i], contValue[:,0]), tf.float32) * inMoney * (1-previous_exersies)
previous_exersies += exercise
npv += exercise*E_t[:,i]
# Last exercise date
inMoney = tf.cast(tf.greater(E_t[:,-1], 0.), tf.float32)
exercise = inMoney * (1-previous_exersies)
npv += exercise*E_t[:,-1]
npv = tf.reduce_mean(npv)
return([S, dts, K, r, sigma,dW, S_t, E_t, npv, training_functions])
def american_tf(S_0, strike, M, impliedvol, riskfree_r, random_train, random_pricing):
n_exercise = len(M)
with tf.Session() as sess:
S,dts,K,r,sigma,dW,S_t,E_t,npv,training_functions = pricing_function(n_exercise)
sess.run(tf.global_variables_initializer())
paths, exercise_values = sess.run([S_t,E_t], {
S: S_0,
dts: M,
K: strike,
r: riskfree_r,
sigma: impliedvol,
dW: random_train
})
for i in range(n_exercise-1)[::-1]:
(input_x,input_y,train,w,b,y_hat) = training_functions[i]
y= exercise_values[:,i+1:i+2]
X = paths[:,i]
print(input_x.shape)
print((exercise_values[:,i]>0).shape)
for epochs in range(100):
_ = sess.run(train, {input_x:X[exercise_values[:,i]>0],
input_y:y[exercise_values[:,i]>0]})
cont_value = sess.run(y_hat, {input_x:X, input_y:y})
exercise_values[:,i+1:i+2] = np.maximum(exercise_values[:,i+1:i+2], cont_value)
npv = sess.run(npv, {S: S_0, K: strike, r: riskfree_r, sigma: impliedvol, dW: N_pricing})
return npv
N_samples_learn = 1000
N_samples_pricing = 1000
calldates = 12
N = np.random.randn(N_samples_learn,calldates)
N_pricing = np.random.randn(N_samples_pricing,calldates)
american_tf(100., 90., [1.]*calldates, 0.25, 0.05, N, N_pricing)
Calldates is the number of steps
training sample set = 1000
test sample size = 1000
But my error is very weird
---> 23 nput_y:y[exercise_values[:,i]>0]})
ValueError: Cannot feed value of shape (358,) for Tensor 'Placeholder_441:0', which has shape '(?, 1)'
There are a bunch of things discussed in comment with #hallo12. I just want to upload a working version incorporating all the changes. The code is tested and runs without error. But to make sure the final training output is correct, you may want to compare against some benchmark.
General comment: It's good to separate the variable and time dimension in this type of application, especially when you only have 1 variable. For example, your input array should be 3D with
[time, training sample, input variable]
rather than 2D with [training sample, time]. This way when you iterate over the time dimension, the rest of the dimensions are kept unchanged.
import tensorflow as tf
import numpy as np
def get_continuation_function():
X = tf.placeholder(tf.float32, (None,1),name="X")
y = tf.placeholder(tf.float32, (None,1),name="y")
w = tf.Variable(tf.random_uniform((1,1))*0.1,name="w")
b = tf.Variable(initial_value = tf.ones(1)*1,name="b")
y_hat = tf.add(tf.matmul(X, w), b)
pre_error = tf.pow(y-y_hat,2)
error = tf.reduce_mean(pre_error)
train = tf.train.AdamOptimizer(0.1).minimize(error)
return(X, y, train, w, b, y_hat)
def pricing_function(number_call_dates):
S = tf.placeholder(tf.float32,name="S")
# First excerise date
dts = tf.placeholder(tf.float32,name="dts")
# 2nd exersice date
K = tf.placeholder(tf.float32,name="K")
r = tf.placeholder(tf.float32,name="r")
sigma = tf.placeholder(tf.float32,name="sigma")
dW = tf.placeholder(tf.float32,name="dW")
S_t = S * tf.cumprod(tf.exp((r-sigma**2/2) * dts + sigma * tf.sqrt(dts) * dW), axis=1)
E_t = tf.exp(-r * tf.cumsum(dts)) * tf.maximum(K-S_t, 0)
continuationValues = []
training_functions = []
previous_exersies = 0
npv = 0
for i in range(number_call_dates-1):
(input_x, input_y, train, w, b, y_hat) = get_continuation_function()
training_functions.append((input_x, input_y, train, w, b, y_hat))
X = tf.keras.activations.relu(S_t[:, i:i+1])
contValue = tf.add(tf.matmul(X, w),b)
continuationValues.append(contValue)
inMoney = tf.cast(tf.greater(E_t[:,i], 0.), tf.float32)
exercise = tf.cast(tf.greater(E_t[:,i], contValue[:,0]), tf.float32) * inMoney * (1-previous_exersies)
previous_exersies += exercise
npv += exercise*E_t[:,i]
# Last exercise date
inMoney = tf.cast(tf.greater(E_t[:,-1], 0.), tf.float32)
exercise = inMoney * (1-previous_exersies)
npv += exercise*E_t[:,-1]
npv = tf.reduce_mean(npv)
return([S, dts, K, r, sigma,dW, S_t, E_t, npv, training_functions])
def american_tf(S_0, strike, M, impliedvol, riskfree_r, random_train, random_pricing):
n_exercise = len(M)
with tf.Session() as sess:
S,dts,K,r,sigma,dW,S_t,E_t,npv,training_functions = pricing_function(n_exercise)
sess.run(tf.global_variables_initializer())
paths, exercise_values = sess.run([S_t,E_t], {
S: S_0,
dts: M,
K: strike,
r: riskfree_r,
sigma: impliedvol,
dW: random_train
})
for i in range(n_exercise-1)[::-1]:
(input_x,input_y,train,w,b,y_hat) = training_functions[i]
y= exercise_values[:,i+1:i+2]
X = paths[:,i]
print(input_x.shape)
print((exercise_values[:,i]>0).shape)
for epochs in range(100):
_ = sess.run(train, {input_x:(X[exercise_values[:,i]>0]).reshape(len(X[exercise_values[:,i]>0]),1),
input_y:(y[exercise_values[:,i]>0]).reshape(len(y[exercise_values[:,i]>0]),1)})
cont_value = sess.run(y_hat, {input_x:X.reshape(len(X),1), input_y:y.reshape(len(y),1)})
exercise_values[:,i+1:i+2] = np.maximum(exercise_values[:,i+1:i+2], cont_value)
npv = sess.run(npv, {S: S_0, K: strike, dts:M, r: riskfree_r, sigma: impliedvol, dW: N_pricing})
return npv
N_samples_learn = 1000
N_samples_pricing = 1000
calldates = 12
N = np.random.randn(N_samples_learn,calldates)
N_pricing = np.random.randn(N_samples_pricing,calldates)
print(american_tf(100., 90., [1.]*calldates, 0.25, 0.05, N, N_pricing))

Trying to plot a simple function - python

I implemented a simple linear regression and I want to try it out by fitting a non linear model
specifically I am trying to fit a model for the function y = x^3 + 5 for example
this is my code
import numpy as np
import numpy.matlib
import matplotlib.pyplot as plt
def predict(X,W):
return np.dot(X,W)
def gradient(X, Y, W, regTerm=0):
return (-np.dot(X.T, Y) + np.dot(np.dot(X.T,X),W))/(m*k) + regTerm * W /(n*k)
def cost(X, Y, W, regTerm=0):
m, k = Y.shape
n, k = W.shape
Yhat = predict(X, W)
return np.trace(np.dot(Y-Yhat,(Y-Yhat).T))/(2*m*k) + regTerm * np.trace(np.dot(W,W.T)) / (2*n*k)
def Rsquared(X, Y, W):
m, k = Y.shape
SSres = cost(X, Y, W)
Ybar = np.mean(Y,axis=0)
Ybar = np.matlib.repmat(Ybar, m, 1)
SStot = np.trace(np.dot(Y-Ybar,(Y-Ybar).T))
return 1-SSres/SStot
m = 10
n = 200
k = 1
trX = np.random.rand(m, n)
trX[:, 0] = 1
for i in range(2, n):
trX[:, i] = trX[:, 1] ** i
trY = trX[:, 1] ** 3 + 5
trY = np.reshape(trY, (m, k))
W = np.random.rand(n, k)
numIter = 10000
learningRate = 0.5
for i in range(0, numIter):
W = W - learningRate * gradient(trX, trY, W)
domain = np.linspace(0,1,100000)
powerDomain = np.copy(domain)
m = powerDomain.shape[0]
powerDomain = np.reshape(powerDomain, (m, 1))
powerDomain = np.matlib.repmat(powerDomain, 1, n)
for i in range(1, n):
powerDomain[:, i] = powerDomain[:, 0] ** i
print(Rsquared(trX, trY, W))
plt.plot(trX[:, 1],trY,'o', domain, predict(powerDomain, W),'r')
plt.show()
the R^2 I'm getting is very close to 1, meaning I found a very good fit to the training data, but it isn't shown on the plots. When I plot the data, it usually looks like this:
it looks as if I'm underfitting the data, but with such a complex hypothesis, with 200 features (meaning i allow polynomials up to x^200) and only 10 training examples, I should very clearly be overfitting data, so I expect the red line to pass through all the blue points and go wild between them.
This isn't what I'm getting which is confusing to me.
What's wrong?
You forgot to set powerDomain[:,0]=1, that's why your plot goes wrong at 0. And yes you are over fitting: look how quickly your plot fires up as soon as you get out of your training domain.

Categories

Resources