I'm working on a project on Flood prediction. Here I've a dataset with 10 attributes and the code below is call method for a TensorFlow class, which takes an input tensor and calculates the Cartesian product of all the elements in the tensor.
def call(self, input_):
CP = []
# a tensor object is not assignable*, so you cannot use it on the left-hand side of an assignment.
# build a Python list of tensors, and tf.stack() them together at the end of the loop:
for batch in range(self.batch_size):
xd_shape = [self.m]
c_shape = [1]
cp = input_[batch,:,0]
for d in range(1,self.n):
# append shape indizes
c_shape.insert(0,self.m)
xd_shape.insert(0,1)
# get cartesian product for each dimension
xd = tf.reshape(input_[batch,:,d], (xd_shape))
c = tf.reshape(cp,(c_shape))
cp = tf.matmul(c , xd)
flat_cp = tf.reshape(cp,(1, self.m**self.n))
CP.append(flat_cp)
return tf.reshape(tf.stack(CP), (self.batch_size, self.m**self.n))
when I try run the coding it gives me following error,
InvalidArgumentError: slice index 3 of dimension 0 out of bounds.
[[node firstAnfis/ruleLayer/strided_slice_31
(defined at C:\Users\USER\PycharmProjects\FYPtest3\myanfis.py:269)
]] [Op:__inference_train_function_12784]
here is the parameters
param = myanfis.fis_parameters(
n_input=10, # no. of Regressors
n_memb=3, # no. of fuzzy memberships
batch_size=16, # 16 / 32 / 64 / ...
memb_func='gaussian', # 'gaussian' / 'gbellmf' / 'sigmoid'
optimizer='sgd', # sgd / adam / ...
loss=tf.keras.losses.MeanAbsoluteError(), # mse / mae / huber_loss / mean_absolute_percentage_error / ...
n_epochs=15 # 10 / 25 / 50 / 100 / ...
)
Related
Is there any operation that can achieve the following:
import torch
batch_size = 2
seq_len = 2
dim = 3
# batch of squences of embedding vecs:
x = torch.rand([batch_size, seq_len, dim])
# batch of target embedding vecs:
y = torch.rand([batch_size, dim])
# the computation I want to achieve:
print(torch.outer(x[0][0], y[0]))
print(torch.outer(x[0][1], y[0]))
print(torch.outer(x[1][0], y[1]))
print(torch.outer(x[1][1], y[1]))
print()
What I've tried but failed: torch.einsum('bij, bj->bij', x, y)).
I'm new to ML, I've been trying to implement a Neural Network using python, but when I use the minimize function with the tnc method from the scipy library I get the following error:
ValueError: tnc: invalid gradient vector.
I looked it up a bit and found this in the source code
arr_grad = (PyArrayObject *)PyArray_FROM_OTF((PyObject *)py_grad, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
if (arr_grad == NULL)
{
PyErr_SetString(PyExc_ValueError, "tnc: invalid gradient vector.");
goto failure;
Edit: This is my implementation of backpropagation and cost function as methods of the Network class I created, I am currently using a [400 25 10] structure similar to the one used in Andrew Ng's ML Coursea Course
def cost_function(self, theta, x, y):
u = self.num_layers
m = len(x)
Reg = 0 # Regulaization Term init and Calculation
for i in range(u - 1):
k = np.power(theta[i], 2)
Reg = np.sum(Reg + np.sum(k))
Reg = lmbda / (2 * m) * Reg
h = self.forwardprop(x)[-1] # Getting the activation of the last layer
J = (-1 / m) * np.sum(np.multiply(y, np.log(h)) + np.multiply((1 - y), np.log(1 - h))) + Reg # Cost Func
return J
def backprop(self, theta, x, y):
m = len(x) # number of training example
theta = np.asmatrix(theta) #
theta = self.rollPara(theta) # Roll weights into Matrices, Original shape (1, 10285), after rolling [(25, 401), (26, 10)]
tot_delta = list(range((self.num_layers-1))) # accumulated error init
delta =list(range(self.num_layers-1)) # error from each example init
for i in range(m): # loop for calculating error
a = self.forwardprop(x[i:i+1, :]) # get activation of each layer for ith example
delta[-1] = a[-1] - y[i] # error of output layer of ith example
for j in range(1, self.num_layers-1): # loop to calculate error of each layer for ith example
theta_ = theta[-1-j+1][:, 1:] # weights of jth layer (from back to front)('-1' represents last element)(1. weights index 2.exclude bias units)
act = (a[:-1])[-1-j+1][:, 1:] # activation of current layer (1. exclude output layer layer 2. activation index 3. exclude bias units)
delta_prv = delta[-1-j+1] # error of previous layer
delta[-1-j] = np.multiply(delta_prv#theta_, act) # error of current layer
delta = delta[::-1] # reverse the order of elements since BP starts from back to front
for j in range(self.num_layers-1): # loop to add ith example error to accumlated error
tot_delta[j] = tot_delta[j] + np.transpose(delta[j])#a[self.num_layers-2-j] # add jth layer error from ith example to jth layer accumulated error
ThetaGrad = np.add((1/m)*np.asarray(tot_delta[::-1]), (lmbda/m)*np.asarray(theta)) # calculate gradient
grad = self.unrollPara(ThetaGrad)
return grad
maxiter=500
options = {'maxiter': maxiter}
initTheta = N.unrollPara(N.weights) # flattening into vector
res = op.minimize(fun=N.cost_function, x0=initTheta, jac=N.backprop, method='tnc', args=(x, Y), options=options) # x, Y are training set that are already initialized
This is the scipy source code
Thanks in Advance,
After carefully reading the code I realized it the grad vector has to be a list and not a NumPy array. Not sure if my implementation works properly yet but the error is gone
I have a following loop where I am calculating softmax transform for batches of different sizes as below
import numpy as np
def softmax(Z,arr):
"""
:param Z: numpy array of any shape (output from hidden layer)
:param arr: numpy array of any shape (start, end)
:return A: output of multinum_logit(Z,arr), same shape as Z
:return cache: returns Z as well, useful during back propagation
"""
A = np.zeros(Z.shape)
for i in prange(len(arr)):
shiftx = Z[:,arr[i,1]:arr[i,2]+1] - np.max(Z[:,int(arr[i,1]):int(arr[i,2])+1])
A[:,arr[i,1]:arr[i,2]+1] = np.exp(shiftx)/np.exp(shiftx).sum()
cache = Z
return A,cache
Since this for loop is not vectorized it is the bottleneck in my code. What is a possible solution to make it faster. I have tried using #jit of numba which makes it little faster but not enough. I was wondering if there is another way to make it faster or vectorize/parallelize it.
Sample input data for the function
Z = np.random.random([1,10000])
arr = np.zeros([100,3])
arr[:,0] = 1
temp = int(Z.shape[1]/arr.shape[0])
for i in range(arr.shape[0]):
arr[i,1] = i*temp
arr[i,2] = (i+1)*temp-1
arr = arr.astype(int)
EDIT:
I forgot to stress here that my number of class is varying. For example batch 1 has say 10 classes, batch 2 may have 15 classes. Therefore I am passing an array arr which keeps track of the which rows belong to batch1 and so on. These batches are different than the batches in traditional neural network framework
In the above example arr keeps track of starting index and end index of rows. So the denominator in the softmax function will be sum of only those observations whose index lie between the starting and ending index.
Here's a vectorized softmax function. It's the implementation of an assignment from Stanford's cs231n course on conv nets.
The function takes in optimizable parameters, input data, targets, and a regularizer. (You can ignore the regularizer as that references another class exclusive to some cs231n assignments).
It returns a loss and gradients of the parameters.
def softmax_loss_vectorized(W, X, y, reg):
"""
Softmax loss function, vectorized version.
Inputs and outputs are the same as softmax_loss_naive.
"""
# Initialize the loss and gradient to zero.
loss = 0.0
dW = np.zeros_like(W)
num_train = X.shape[0]
scores = X.dot(W)
shift_scores = scores - np.amax(scores,axis=1).reshape(-1,1)
softmax = np.exp(shift_scores)/np.sum(np.exp(shift_scores), axis=1).reshape(-1,1)
loss = -np.sum(np.log(softmax[range(num_train), list(y)]))
loss /= num_train
loss += 0.5* reg * np.sum(W * W)
dSoftmax = softmax.copy()
dSoftmax[range(num_train), list(y)] += -1
dW = (X.T).dot(dSoftmax)
dW = dW/num_train + reg * W
return loss, dW
For comparison's sake, here is a naive (non-vectorized) implementation of the same method.
def softmax_loss_naive(W, X, y, reg):
"""
Softmax loss function, naive implementation (with loops)
Inputs have dimension D, there are C classes, and we operate on minibatches
of N examples.
Inputs:
- W: A numpy array of shape (D, C) containing weights.
- X: A numpy array of shape (N, D) containing a minibatch of data.
- y: A numpy array of shape (N,) containing training labels; y[i] = c means
that X[i] has label c, where 0 <= c < C.
- reg: (float) regularization strength
Returns a tuple of:
- loss as single float
- gradient with respect to weights W; an array of same shape as W
"""
loss = 0.0
dW = np.zeros_like(W)
num_train = X.shape[0]
num_classes = W.shape[1]
for i in xrange(num_train):
scores = X[i].dot(W)
shift_scores = scores - max(scores)
loss_i = -shift_scores[y[i]] + np.log(sum(np.exp(shift_scores)))
loss += loss_i
for j in xrange(num_classes):
softmax = np.exp(shift_scores[j])/sum(np.exp(shift_scores))
if j==y[i]:
dW[:,j] += (-1 + softmax) * X[i]
else:
dW[:,j] += softmax *X[i]
loss /= num_train
loss += 0.5 * reg * np.sum(W * W)
dW /= num_train + reg * W
return loss, dW
Source
Below I attached 4 pictures with error as a picture.
Generally, I'm training my neural network ( having a 2, 3, 1 architecture ) that consists of two input neurons in the input layer, 3 neurons in my hidden layer and 1 output neuron in my output layer.
So, I trained my network using back propagation and I am having small error ( which is specified in the picture ).
Can someone help me with that please.
Error: shapes (200,200) and (1,3) not aligned: 200 (dim 1) != 1 (dim 0)
import numpy as np
import random
# Generating training data set according to the function y=x^2+y^2
input1_train = np.random.uniform(low=-1, high=1, size=(200,))
input2_train = np.random.uniform(low=-1, high=1, size=(200,))
input1_sq_train= input1_train **2
input2_sq_train= input2_train **2
input_merge= np.column_stack((input1_train,input2_train))
# normalized input data
input_merge= input_merge / np.amax(input_merge, axis=0)
# output of the training data
y_output_train= input1_sq_train + input2_sq_train
# normalized output data
y_output_train= y_output_train / 100
# Generating test data set according to the function y=x^2+y^2
input1_test = np.random.uniform(low=-1, high=1, size=(100,))
input2_test = np.random.uniform(low=-1, high=1, size=(100,))
input1_sq_test= input1_test **2
input2_sq_test= input2_test **2
y_output_test= input1_sq_test + input2_sq_test
# Merging two inputs of testing data into an one matrix
input_merge1= np.column_stack((input1_test,input2_test))
# normalized input test data
input_merge1=input_merge1 / np.amax(input_merge1, axis=0)
# normalized output test data
y_output_test= y_output_test / 100
# Generating validation data set according to the function y=x^2+y^2
input1_validation = np.random.uniform(low=-1, high=1, size=(50,))
input2_validation = np.random.uniform(low=-1, high=1, size=(50,))
input1_sq_validation= input1_validation **2
input2_sq_validation= input2_validation **2
input_merge2= np.column_stack((input1_validation,input2_validation))
# normalized input validation data
input_merge2= input_merge2 / np.amax(input_merge2, axis=0)
y_output_validation= input1_sq_validation + input2_sq_validation
# normalized output validation data
y_output_validation= y_output_validation / 100
class Neural_Network(object):
def __init__(self):
# parameters
self.inputSize = 2
self.outputSize = 1
self.hiddenSize = 3
# weights
self.W1 = np.random.randn(self.inputSize, self.hiddenSize) # (3x2)
# weight matrix from input to hidden layer
self.W2 = np.random.randn(self.hiddenSize, self.outputSize) # (3x1)
# weight matrix from hidden to output layer
def forward(self, input_merge):
# forward propagation through our network
self.z = np.dot(input_merge, self.W1) # dot product of X (input) and first set of 3x2 weights
self.z2 = self.sigmoid(self.z) # activation function
self.z3 = np.dot(self.z2, self.W2) # dot product of hidden layer (z2)
# and second set of 3x1 weights
o = self.sigmoid(self.z3) # final activation function
return o
def costFunction(self, input_merge, y_output_train):
# Compute cost for given X,y, use weights already stored in class.
self.o = self.forward(input_merge)
J = 0.5*sum((y_output_train-self.yHat)**2)
return J
def costFunctionPrime(self, input_merge, y_output_train):
# Compute derivative with respect to W and W2 for a given X and y:
self.o = self.forward(input_merge)
delta3 = np.multiply(-(y_output_train-self.yHat),
self.sigmoidPrime(self.z3))
dJdW2 = np.dot(self.a2.T, delta3)
delta2 = np.dot(delta3, self.W2.T)*self.sigmoidPrime(self.z2)
dJdW1 = np.dot(input_merge.T, delta2)
return dJdW1, dJdW2
def sigmoid(self, s):
# activation function
return 1/(1+np.exp(-s))
def sigmoidPrime(self, s):
# derivative of sigmoid
return s * (1 - s)
def backward(self, input_merge, y_output_train, o):
# backward propgate through the network
self.o_error = y_output_train - o # error in output
self.o_delta = self.o_error*self.sigmoidPrime(o) # applying derivative of sigmoid to error
self.z2_error = self.o_delta.dot(self.W2.T) # z2 error: how much our hidden layer weights contributed to output error
self.z2_delta = self.z2_error*self.sigmoidPrime(self.z2) # applying derivative of sigmoid to z2 error
self.W1 += input_merge.T.dot(self.z2_delta) # adjusting first set (input --> hidden) weights
self.W2 += self.z2.T.dot(self.o_delta) # adjusting second set (hidden --> output) weights
def train (self, input_merge, y_output_train):
o = self.forward(input_merge)
self.backward(input_merge, y_output_train, o)
NN = Neural_Network()
for i in range(1000): # trains the NN 1,000 times
# print ( "Actual Output for training data: \n" + str(y_output_train))
# print ("Predicted Output for training data: \n" + str(NN.forward(input_merge)))
print ( "Loss for training: \n"
+ str( np.mean( np.square( y_output_train
- NN.forward( input_merge )
)
)
)
) # mean sum squared loss
NN.train(input_merge, y_output_train)
# NN.test(input_merge1,y_output_test)
# NN.validation(input_merge2,y_output_validation)
"having small error" is actually a major issue on mat/vec-dimensions:
so, first, it is a fair practice to post MCVE-based formulation of StackOverflow presented problems.
Here, that would mean to also copy the complete Error-Traceback, including the row numbers, where the Traceback has been thrown. Ok, you will get it right next time.
Your problem is not a small error -- your code is principally wrong, as it tries ( at an unknown location ) to process a pair of arrays, that do not match in shape for a yet unknown operation ( it just seems that a .multiply() is the right suspect, but not sure, where it could get called, as there is no clear request to ask for a .costFunctionPrime() method ).
Nevertheless, an attempt was done, somewhere, to process the pair of matrix/vector arrays,
one, being [200,200], the other, being [1,3] simply do not make their processing possible.
So, the error is in your code / syntax. Check it, possibly using a pre-printed shape-checks:
def aFormatSHAPE( anArray ):
return "[{0: >4d},{1: >4d}]".format( anArray.shape[0],
anArray.shape[1]
)
def aHelperPrintSHAPE( anArray1, anArray2 ):
try:
print( "CHK:{0:}-(op)-{1:}".format( aFormatSHAPE( anArray1 ),
aFormatSHAPE( anArray2 )
)
)
except:
pass
return
Once you repair your code so that it meets all the common matrix-vector algebra rules ( on how additions, subtractions, multiplications, dot-products are processed on arrays/vectors ), then your small error is solved.
You should never see anything like:
CHK:[200,200]-(op)-[1,3]
It seems to me your matrix dimensions don't fit. You cannot multiply (200,200) with (1,3). No. of columns of first matrix must match no. of rows of second matrix in simple terms. Hope this helps.
Hello
I just want to try binary classification with simple logistic regression.I've got unlabeled output data as {1,0} // (He/she passed exam or not)
cost function returns (NaN).What is wrong?
learning_rate = 0.05
total_iterator = 1500
display_per = 100
data = numpy.loadtxt("ex2data1.txt",dtype=numpy.float32,delimiter=",");
training_X = numpy.asarray(data[:,[0,1]]) # 100 x 2
training_X contains 100 x 2 matrix as the exam scores.e.g [98.771 4.817]
training_Y = numpy.asarray(data[:,[2]],dtype=numpy.int) # 100 x 1
training_Y contains 100x1 array as, [1] [0] [0] [1] i can't write line by line due to stackoverflow format
m = data.shape[0]
x_i = tf.placeholder(tf.float32,[None,2]) # None x 2
y_i = tf.placeholder(tf.float32,[None,1]) # None x 1
W = tf.Variable(tf.zeros([2,1])) # 2 x 1
b = tf.Variable(tf.zeros([1])) # 1 x 1
h = tf.nn.softmax(tf.matmul(x_i,W)+b)
cost = tf.reduce_sum(tf.add(tf.multiply(y_i,tf.log(h)),tf.multiply(1-
y_i,tf.log(1-h)))) / -m
i tried to use simple logistic cost function.it got returned 'NaN'.i thought my cost function is totally garbarage,got used tensorflow's example's cost function:
cost = tf.reduce_mean(-tf.reduce_sum(y_i*tf.log(h), reduction_indices=1))
but it didn't worked as well.
initializer= tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
print("cost: ", sess.run(cost, feed_dict={x_i:training_X,
y_i:training_Y}), "w: ", sess.run(W),"b: ", sess.run(b))
The function tf.nn.softmax expects the number of logits (last dimension) to be equal the number of classes (2 in your case {1,0}). since the last dimension in your case is 1, softmax will always return 1 (the probability of being in the only available class is always 1 since no other class exists). therefore h is a tensor filled with 1's and tf.log(1-h) will return negative infinity. Infinity multiplied by zero (1-y_i in some rows) returns NaN.
You should replace tf.nn.softmax with tf.nn.sigmoid.
A possible fix is:
h = tf.nn.sigmoid(tf.matmul(x_i,W)+b)
cost = tf.reduce_sum(tf.add(tf.multiply(y_i,tf.log(h)),tf.multiply(1-
y_i,tf.log(1-h)))) / -m
or better, you can use tf.sigmoid_cross_entropy_with_logits
in that case, it should be done as follows:
h = tf.matmul(x_i,W)+b
cost = tf.reduce_mean(tf.sigmoid_cross_entropy_with_logits(labels=y_i, logits=h))
this function is more numerically stable than using tf.nn.sigmoid followed by the cross_entropy function which can return a NaN if tf.nn.sigmoid gets near 0 or 1 due to the imprecision of float32.