import numpy as np
import pandas as pd
import numpy as np
from matplotlib import pyplot as pt
def computeCost(X,y,theta):
m=len(y)
predictions= X*theta-y
sqrerror=np.power(predictions,2)
return 1/(2*m)*np.sum(sqrerror)
def gradientDescent(X, y, theta, alpha, num_iters):
m = len(y)
jhistory = np.zeros((num_iters,1))
for i in range(num_iters):
h = X * theta
s = h - y
theta = theta - (alpha / m) * (s.T*X).T
jhistory_iter = computeCost(X, y, theta)
return theta,jhistory_iter
data = open(r'C:\Users\Coding\Desktop\machine-learning-ex1\ex1\ex1data1.txt')
data1=np.array(pd.read_csv(r'C:\Users\Coding\Desktop\machine-learning-ex1\ex1\ex1data1.txt',header=None))
y =np.array(data1[:,1])
m=len(y)
y=np.asmatrix(y.reshape(m,1))
X = np.array([data1[:,0]]).reshape(m,1)
X = np.asmatrix(np.insert(X,0,1,axis=1))
theta=np.zeros((2,1))
iterations = 1500
alpha = 0.01;
print('Testing the cost function ...')
J = computeCost(X, y, theta)
print('With theta = [0 , 0]\nCost computed = ', J)
print('Expected cost value (approx) 32.07')
theta=np.asmatrix([[-1,0],[1,2]])
J = computeCost(X, y, theta)
print('With theta = [-1 , 2]\nCost computed =', J)
print('Expected cost value (approx) 54.24')
theta,JJ = gradientDescent(X, y, theta, alpha, iterations)
print('Theta found by gradient descent:')
print(theta)
print('Expected theta values (approx)')
print(' -3.6303\n 1.1664\n')
predict1 = [1, 3.5] *theta
print(predict1*10000)
Result:
Testing the cost function ...
With theta = [0 , 0]
Cost computed = 32.072733877455676
Expected cost value (approx) 32.07
With theta = [-1 , 2]
Cost computed = 69.84811062494227
Expected cost value (approx) 54.24
Theta found by gradient descent:
[[-3.70304726 -3.64357517]
[ 1.17367146 1.16769684]]
Expected theta values (approx)
-3.6303
1.1664
[[4048.02858742 4433.63790186]]
There are two problems, the first Cost computed was right, but the second one was wrong. And there are 4 element in my gradient descent(suppose to be two)
When you mention "With theta = [-1 , 2]"
and you enter
theta=np.asmatrix([[-1,0],[1,2]])
I think this is incorrect. Assuming that you have single feature and you added a column of 1, and you are trying to do simple linear regression
The correct way should be
np.array([-1,2])
Also where have
predictions= X*theta-y
It would be better if you did
np.dot(X,theta)-y
When you multiply, it's not doing the same thing.
Related
X = df.drop(columns="Math")
y = df.iloc[:, 4]
theta = np.array([0]*len(X.columns))
def hypothesis(theta, X):
return theta*X
def computeCost(X, y, theta):
y1 = hypothesis(theta, X)
y1=np.sum(y1, axis=1)
return sum(np.sqrt((y1-y)**2))/(2*47)
def gradientDescent(X, y, theta, alpha, i):
J = [] #cost function in each iterations
k = 0
while k < i:
y1 = hypothesis(theta, X)
y1 = np.sum(y1, axis=1)
for c in range(0, len(X.columns)):
theta[c] = theta[c] - alpha*(sum((y1-y)*X.iloc[:,c])/len(X))
j = computeCost(X, y, theta)
J.append(j)
k += 1
return J, j, theta
J, j, theta = gradientDescent(X, y, theta, 0.05, 10000)
The dataset is consists of five columns. The first is the column of ones for the bias term. The second until the last are int64 consisting numerical value from 1-100. The second field represents the Physics scores, the third represents the Science scores, the fourth represents the Statistics scores, while the last one represents the Math scores. I am trying to use the 1st until the 4th column to predict the 5th column (Math)
The error will appear as follows:
OverflowError Traceback (most recent call last)
<ipython-input-26-d17a8fb83984> in <module>()
----> 1 J, j, theta = gradientDescent(X, y, theta, 0.05, 10000)
<ipython-input-25-bfec0d0edcfa> in gradientDescent(X, y, theta, alpha, i)
6 y1 = np.sum(y1, axis=1)
7 for c in range(0, len(X.columns)):
----> 8 theta[c] = theta[c] - alpha*(sum((y1-y)*X.iloc[:,c])/len(X))
9 j = computeCost(X, y, theta)
10 J.append(j)
OverflowError: Python int too large to convert to C
You ran into the error most likely due to a combination of:
Setting theta to be integer with theta = np.array([0]*len(X.columns)). You can do something like np.zeros(np.shape(X)[1])
Setting a learning rate that is too high, you can check your cost or J, it might be increasing indicating the learning rate is too high
Not very sure about your bias terms as 1, this might depend on your range of values.
So if I test your code with a simple example:
import pandas as pd
import numpy as np
np.random.seed(111)
df = pd.DataFrame(np.random.randint(0,100,(50,4)),
columns=['const','Physics','Science','Stats'])
df['const'] = 1
df['Math'] = 0.2*df['Physics'] + 0.4*df['Science'] + 0.5*df['Stats']
Then initialize:
X = df.drop(columns="Math")
y = df.iloc[:, 4]
theta = np.ones(X.shape[1])
Then run with a smaller learning rate:
J, j, theta = gradientDescent(X, y, theta, 0.0001,100)
theta
array([0.98851902, 0.1950524 , 0.39639991, 0.49143374])
I am trying to implement gradient descent in python. Though my code is returning result by I think results I am getting are completely wrong.
Here is the code I have written:
import numpy as np
import pandas
dataset = pandas.read_csv('D:\ML Data\house-prices-advanced-regression-techniques\\train.csv')
X = np.empty((0, 1),int)
Y = np.empty((0, 1), int)
for i in range(dataset.shape[0]):
X = np.append(X, dataset.at[i, 'LotArea'])
Y = np.append(Y, dataset.at[i, 'SalePrice'])
X = np.c_[np.ones(len(X)), X]
Y = Y.reshape(len(Y), 1)
def gradient_descent(X, Y, theta, iterations=100, learningRate=0.000001):
m = len(X)
for i in range(iterations):
prediction = np.dot(X, theta)
theta = theta - (1/m) * learningRate * (X.T.dot(prediction - Y))
return theta
theta = np.random.randn(2,1)
theta = gradient_descent(X, Y, theta)
print('theta',theta)
The result I get after running this program is:
theta [[-5.23237458e+228]
[-1.04560188e+233]]
Which are very high values. Can someone point out the mistake I have made in implementation.
Also, 2nd problem is I have to set value of learning rate very low (in this case i have set to 0.000001) to work other wise program throws an error.
Please help me in diagnosis the problem.
try to reduce the learning rate with iteration otherwise it wont be able to reach the optimal lowest.try this
import numpy as np
import pandas
dataset = pandas.read_csv('start.csv')
X = np.empty((0, 1),int)
Y = np.empty((0, 1), int)
for i in range(dataset.shape[0]):
X = np.append(X, dataset.at[i, 'R&D Spend'])
Y = np.append(Y, dataset.at[i, 'Profit'])
X = np.c_[np.ones(len(X)), X]
Y = Y.reshape(len(Y), 1)
def gradient_descent(X, Y, theta, iterations=50, learningRate=0.01):
m = len(X)
for i in range(iterations):
prediction = np.dot(X, theta)
theta = theta - (1/m) * learningRate * (X.T.dot(prediction - Y))
learningRate/=10;
return theta
theta = np.random.randn(2,1)
theta = gradient_descent(X, Y, theta)
print('theta',theta)
I'm currently working on Andrew Ng's gradient descent exercise using python but keeps getting me the wrong optimal theta. I followed this vectorization cheatsheet for gradient descent --- https://medium.com/ml-ai-study-group/vectorized-implementation-of-cost-functions-and-gradient-vectors-linear-regression-and-logistic-31c17bca9181.
Here is my code:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
def cost_func(X, Y, theta):
m = len(X)
H = X.dot(theta)
J = 1/(2*m) * (H - Y).T.dot(H - Y)
return J
def gradient_descent(X, Y, alpha=0.01, iterations=1500):
#initializing theta as a zero vector
theta = np.zeros(X.shape[1])
#initializing the a list of cost function value
J_list = [cost_func(X, Y, theta)]
m = len(X)
while iterations > 0:
H = X.dot(theta)
delta = (1/m)*X.T.dot(H - Y)
theta = theta - alpha * delta
iterations -= 1
J_list.append(cost_func(X, Y, theta))
return theta, J_list
def check_convergence(J_list):
plt.plot(range(len(J_list)), J_list)
plt.xlabel('Iterations')
plt.ylabel('Cost J')
plt.show()
file_name_1 = 'https://raw.githubusercontent.com/kaleko/CourseraML/master/ex1/data/ex1data1.txt'
df1 = pd.read_csv(file_name_1, header=None)
X = df1.values[:, 0]
Y = df1.values[:, 1]
m = len(X)
X = np.column_stack((np.ones(m), X))
theta_optimal, J_list = gradient_descent(X, Y, 0.01, 1500)
print(theta_optimal)
check_convergence(J_list)
My theta output is [-3.63029144 1.16636235], which is incorrect.
Here is my cost function graph. As you see, it converges way too quickly.
The correct graph should look like.
Thank you.
I am trying to write a program to calculate the slope and the intercept of a linear regression model but when I am running more than 10 iterations, the gradient descent function gives the np.nan value for both intercept as well as slope.
Below is my implementation
def get_gradient_at_b(x, y, b, m):
N = len(x)
diff = 0
for i in range(N):
x_val = x[i]
y_val = y[i]
diff += (y_val - ((m * x_val) + b))
b_gradient = -(2/N) * diff
return b_gradient
def get_gradient_at_m(x, y, b, m):
N = len(x)
diff = 0
for i in range(N):
x_val = x[i]
y_val = y[i]
diff += x_val * (y_val - ((m * x_val) + b))
m_gradient = -(2/N) * diff
return m_gradient
def step_gradient(b_current, m_current, x, y, learning_rate):
b_gradient = get_gradient_at_b(x, y, b_current, m_current)
m_gradient = get_gradient_at_m(x, y, b_current, m_current)
b = b_current - (learning_rate * b_gradient)
m = m_current - (learning_rate * m_gradient)
return [b, m]
def gradient_descent(x, y, learning_rate, num_iterations):
b = 0
m = 0
for i in range(num_iterations):
b, m = step_gradient(b, m, x, y, learning_rate)
return [b,m]
I am running it on the following data:
a=[3.87656018e+11, 4.10320300e+11, 4.15730874e+11, 4.52699998e+11,
4.62146799e+11, 4.78965491e+11, 5.08068952e+11, 5.99592902e+11,
6.99688853e+11, 8.08901077e+11, 9.20316530e+11, 1.20111177e+12,
1.18695276e+12, 1.32394030e+12, 1.65661707e+12, 1.82304993e+12,
1.82763786e+12, 1.85672212e+12, 2.03912745e+12, 2.10239081e+12,
2.27422971e+12, 2.60081824e+12]
b=[3.3469950e+10, 3.4784980e+10, 3.3218720e+10, 3.6822490e+10,
4.4560290e+10, 4.3826720e+10, 5.2719430e+10, 6.3842550e+10,
8.3535940e+10, 1.0309053e+11, 1.2641405e+11, 1.6313218e+11,
1.8529536e+11, 1.7875143e+11, 2.4981555e+11, 3.0596392e+11,
3.0040058e+11, 3.1440530e+11, 3.1033848e+11, 2.6229109e+11,
2.7585243e+11, 3.0352616e+11]
print(gradient_descent(a, b, 0.01, 100))
#result --> [nan, nan]
When I run the gradient_descent function on a dataset with smaller values, it gives the correct answers. Also I was able to obtain the intercept and slope for the above data with from sklearn.linear_model import LinearRegression
Any help will be appreciated in figuring out why the result is [nan, nan] instead of giving me the correct intercept and slope.
You need to reduce the learning rate. Since the values in a and b are so large (>= 1e11), the learning rate needs be approximately 1e-25 for this to even do the gradient descent, else it will randomly overshoot because of large gradients of a and b.
b, m = gradient_descent(a, b, 5e-25, 100)
print(b, m)
Out: -3.7387067636195266e-13 0.13854551291084335
I'm trying to implement regularized logistic regression using python for the coursera ML class but I'm having a lot of trouble vectorizing it. Using this repository:
I've tried many different ways but never get the correct gradient or cost heres my current implementation:
h = utils.sigmoid( np.dot(X, theta) )
J = (-1/m) * ( y.T.dot( np.log(h) ) + (1 - y.T).dot( np.log( 1 - h ) ) ) + ( lambda_/(2*m) ) * np.sum( np.square(theta[1:]) )
grad = ((1/m) * (h - y).T.dot( X )).T + grad_theta_reg
Here are the results:
Cost : 0.693147
Expected
cost: 2.534819
Gradients:
[-0.100000, -0.030000, -0.080000, -0.130000]
Expected gradients:
[0.146561, -0.548558, 0.724722, 1.398003]
Any help from someone who knows whats going on would be much appreciated.
Bellow a working snippet of a vectorized version of Logistic Regression. You can see more here https://github.com/hzitoun/coursera_machine_learning_matlab_python
Main
theta_t = np.array([[-2], [-1], [1], [2]])
data = np.arange(1, 16).reshape(3, 5).T
X_t = np.c_[np.ones((5,1)), data/10]
y_t = (np.array([[1], [0], [1], [0], [1]]) >= 0.5) * 1
lambda_t = 3
J, grad = lrCostFunction(theta_t, X_t, y_t, lambda_t), lrGradient(theta_t, X_t, y_t, lambda_t, flattenResult=False)
print('\nCost: f\n', J)
print('Expected cost: 2.534819\n')
print('Gradients:\n')
print(' f \n', grad)
print('Expected gradients:\n')
print(' 0.146561\n -0.548558\n 0.724722\n 1.398003\n')
lrCostFunction
from sigmoid import sigmoid
import numpy as np
def lrCostFunction(theta, X, y, reg_lambda):
"""LRCOSTFUNCTION Compute cost and gradient for logistic regression with
regularization
J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using
theta as the parameter for regularized logistic regression and the
gradient of the cost w.r.t. to the parameters.
"""
m, n = X.shape #number of training examples
theta = theta.reshape((n,1))
prediction = sigmoid(X.dot(theta))
cost_y_1 = (1 - y) * np.log(1 - prediction)
cost_y_0 = -1 * y * np.log(prediction)
J = (1.0/m) * np.sum(cost_y_0 - cost_y_1) + (reg_lambda/(2.0 * m)) * np.sum(np.power(theta[1:], 2))
return J
lrGradient
from sigmoid import sigmoid
import numpy as np
def lrGradient(theta, X,y, reg_lambda, flattenResult=True):
m,n = X.shape
theta = theta.reshape((n,1))
prediction = sigmoid(np.dot(X, theta))
errors = np.subtract(prediction, y)
grad = (1.0/m) * np.dot(X.T, errors)
grad_with_regul = grad[1:] + (reg_lambda/m) * theta[1:]
firstRow = grad[0, :].reshape((1,1))
grad = np.r_[firstRow, grad_with_regul]
if flattenResult:
return grad.flatten()
return grad
Hope that helped!