I'm trying to implement gradient descent in Simple linear regression. Whenever I run the code, I get the error
The code i am using is this:
def get_data(df, feature, predict):
X = df[feature]
Y = df[predict]
X = np.float64(X)
Y = np.float64(Y)
return X, Y
def average(X, Y, b, m, length):
temp1 = 0
temp2 = 0
for i in range(length):
temp1 += (b + m * X[i]) - Y[i]
temp2 += ((b + m * X[i]) - Y[i]) * X[i]
return temp1 / float(length), temp2 / float(length)
def gradient_descent(b, m, alpha, length, num_iterations, X, Y):
for i in range(num_iterations):
temp1, temp2 = average(X, Y, b, m, length)
b_temp = b - alpha * temp1
m_temp = m - alpha * temp2
b = b_temp
m = m_temp
return b, m
def run(b, m, alpha, feature, predict, df, num_iterations):
X, Y = get_data(df, feature, predict)
length = np.alen(X)
final_b, final_m = gradient(b, m, alpha, length, num_iterations, X, Y)
return final_b, final_m
b, m = run(0, 0, 0.05, 'sqft_living', 'price', df, 1000)
The error it gives my is this:
/Users/*****/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py:15:
RuntimeWarning: overflow encountered in double_scalars from
ipykernel import kernelapp as app
/Users/*****/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py:25:
RuntimeWarning: invalid value encountered in double_scalars.
I'm not able to identify which part of the code is causing the error. I tried to convert numpy array to float64 also my code is not running into Divide by Zero Error. Can someone identify the error? Also, How can it be rectified?
Related
I am writing a program that operates out of a main() function. I am unable to change the main function (this is part of a class). How would I go about carrying over a variable from one function to the next, without changing the main()?
def read_data(fname) :
file = fname
x = []
y = []
with open(file) as f:
for line in f:
xi, yi = [float(x) for x in line.split(",")]
x.append(xi)
y.append(yi)
return x, y
def compute_m_and_b(x, y) :
sx, sy, sx2, sxy, sy2 = 0, 0, 0, 0, 0
for i in range(len(x)):
sx += x[i]
sy += y[i]
sx2 += (x[i] ** 2)
sy2 += (y[i] ** 2)
sxy += (x[i] * y[i])
m = (sxy * len(x) - sx * sy) / (sx2 * len(x) - sx**2)
b = (sy - m * sx) / len(x)
return m, b
def compute_fx_residual(x, y, m, b) :
fx = []
for xi in x:
fx.append(m * xi + b)
residual = []
for i in range(len(y)):
residual.append(y[i] - fx[i])
return fx, residual
def compute_sum_of_squared_residuals(residual) :
least_squares_r = 0
for i in range(len(y)) :
least_squares_r += (residual[i]) ** 2
return least_squares_r
def compute_total_sum_of_squares(y) :
sum_squares = 0
ymean = sum(y) / len(y)
for i in range(len(y)) :
sum_squares += (yi - ymean) ** 2
return sum_squares
as you can see, I am restricted to only pulling the variables listed in the parentheses of the def functions. This leads to variables calculated in prior functions being undefined. How can I import them without needing to change the main()?
Please let me know if I should be more clear. I can provide more examples, but I wanted to maintain some brevity.
EDIT: here is the main function:
def main():
fname = input("Enter Input Filename: ")
x, y = regress.read_data(fname)
print()
print("Input File: ", fname)
print("Data points: ", len(x))
#compute coefficients m and b
m, b = regress.compute_m_and_b(x, y)
#compute fx and residual
fx, residual = regress.compute_fx_residual(x, y, m, b)
#compute sum of squared residuals
least_squares_r = regress.compute_sum_of_squared_residuals(residual)
#compute sum of squares
sum_squares = regress.compute_total_sum_of_squares(y)
#compute coefficeint of determination
coeff_of_d = regress.compute_coeff_of_determination(least_squares_r, sum_squares)
regress.print_least_squares(x, y, m, b, fx, residual, least_squares_r, sum_squares, coeff_of_d)
#compute pearson coefficient
pearson_r = regress.compute_pearson_coefficient(x, y)
regress.print_pearson(pearson_r)
return
main()
You haven't provided the main function so it's unclear how you're currently using it.
Looks to me like you can just get the variable for each consecutive function and pass them into the next:
fname = "some/path/to/file.txt"
x, y = read_data(fname)
m, b = compute_m_and_b(x, y, m, b)
fx, residual = compute_fx_residual(x, y, m, b)
least_squares_r = compute_sum_of_squared_residuals(residual)
sum_squares = compute_total_sum_of_squares(y)
X = df.drop(columns="Math")
y = df.iloc[:, 4]
theta = np.array([0]*len(X.columns))
def hypothesis(theta, X):
return theta*X
def computeCost(X, y, theta):
y1 = hypothesis(theta, X)
y1=np.sum(y1, axis=1)
return sum(np.sqrt((y1-y)**2))/(2*47)
def gradientDescent(X, y, theta, alpha, i):
J = [] #cost function in each iterations
k = 0
while k < i:
y1 = hypothesis(theta, X)
y1 = np.sum(y1, axis=1)
for c in range(0, len(X.columns)):
theta[c] = theta[c] - alpha*(sum((y1-y)*X.iloc[:,c])/len(X))
j = computeCost(X, y, theta)
J.append(j)
k += 1
return J, j, theta
J, j, theta = gradientDescent(X, y, theta, 0.05, 10000)
The dataset is consists of five columns. The first is the column of ones for the bias term. The second until the last are int64 consisting numerical value from 1-100. The second field represents the Physics scores, the third represents the Science scores, the fourth represents the Statistics scores, while the last one represents the Math scores. I am trying to use the 1st until the 4th column to predict the 5th column (Math)
The error will appear as follows:
OverflowError Traceback (most recent call last)
<ipython-input-26-d17a8fb83984> in <module>()
----> 1 J, j, theta = gradientDescent(X, y, theta, 0.05, 10000)
<ipython-input-25-bfec0d0edcfa> in gradientDescent(X, y, theta, alpha, i)
6 y1 = np.sum(y1, axis=1)
7 for c in range(0, len(X.columns)):
----> 8 theta[c] = theta[c] - alpha*(sum((y1-y)*X.iloc[:,c])/len(X))
9 j = computeCost(X, y, theta)
10 J.append(j)
OverflowError: Python int too large to convert to C
You ran into the error most likely due to a combination of:
Setting theta to be integer with theta = np.array([0]*len(X.columns)). You can do something like np.zeros(np.shape(X)[1])
Setting a learning rate that is too high, you can check your cost or J, it might be increasing indicating the learning rate is too high
Not very sure about your bias terms as 1, this might depend on your range of values.
So if I test your code with a simple example:
import pandas as pd
import numpy as np
np.random.seed(111)
df = pd.DataFrame(np.random.randint(0,100,(50,4)),
columns=['const','Physics','Science','Stats'])
df['const'] = 1
df['Math'] = 0.2*df['Physics'] + 0.4*df['Science'] + 0.5*df['Stats']
Then initialize:
X = df.drop(columns="Math")
y = df.iloc[:, 4]
theta = np.ones(X.shape[1])
Then run with a smaller learning rate:
J, j, theta = gradientDescent(X, y, theta, 0.0001,100)
theta
array([0.98851902, 0.1950524 , 0.39639991, 0.49143374])
I started machine learning and wrote this code. But for some reason I am getting zig-zag error curve instead of a decreasing logarithmic curve. The "form_binary_classes" for now does nothing but take the start and end indices of two similar datasets with different labels. The error function returns the error in every iteration(most probably this is where the bug is) and acc returns the accuracy. gradient_descent is basically used to return the trained weights and bias terms. Looking only for the bug and not for an efficient method.
def hypothesis(x, theta, b):
h = np.dot(x, theta) + b
return sigmoid(h)
def sigmoid(z):
return 1.0/(1.0+np.exp(-1.0*z))
def error(y_true, x, w, b):
m = x.shape[0]
err = 0.0
for i in range(m):
hx = hypothesis(x[i], w, b)
if(hx==0):
err += (1-y_true[i])*np.log2(1-hx)
elif(hx==1):
err += y_true[i]*np.log2(hx)
else:
err += y_true[i]*np.log2(hx) + (1-y_true[i])*np.log2(1-hx)
return -err/m
def get_gradient(y_true, x, w, b):
grad_w = np.zeros(w.shape)
grad_b = 0.0
m = x.shape[0]
for i in range(m):
hx = hypothesis(x[i], w, b)
grad_w += (y_true[i] - hx)*x[i]
grad_b += (y_true[i] - hx)
grad_w /= m
grad_b /= m
return [grad_w, grad_b]
def gradient_descent(y_true, x, w, b, learning_rate=0.1):
err = error(y_true, x, w, b)
grad_w, grad_b = get_gradient(y_true, x, w, b)
w = w + learning_rate*grad_w
b = b + learning_rate*grad_b
return err, w, b
def predict(x,w,b):
confidence = hypothesis(x,w,b)
if confidence<0.5:
return 0
else:
return 1
def get_acc(x_tst,y_tst,w,b):
y_pred = []
for i in range(y_tst.shape[0]):
p = predict(x_tst[i],w,b)
y_pred.append(p)
y_pred = np.array(y_pred)
return float((y_pred==y_tst).sum())/y_tst.shape[0]
def form_binary_classes(a_start, a_end, b_start, b_end):
x = np.vstack((X[a_start:a_end], X[b_start:b_end]))
y = np.hstack((Y[a_start:a_end], Y[b_start:b_end]))
print("{} {}".format(x.shape,y.shape[0]))
loss = []
acc = []
w = 2*np.random.random((x.shape[1],))
b = 5*np.random.random()
for i in range(100):
l, w, b = gradient_descent(y, x, w, b, learning_rate=0.5)
acc.append(get_acc(X_test,Y_test,w))
loss.append(l)
plt.plot(loss)
plt.ylabel("Negative of Log Likelihood")
plt.xlabel("Time")
plt.show()
What error plot looks like:
What it SHOULD look like:
You have an issue in computing the error, and that can very possibly cause your model's issue for not converging.
In your code, when you consider the corner cases,if hx==0 or if hx==1 any way the error you are computing is zero, even if we have prediction errors, like hx==0 while ytrue=1
in this case, we come inside the first if, and the error will be
(1-1)*log2(1) =0, which is not correct.
You can solve this issue by modifying your first two ifs in this way:
def error(y_true, x, w, b):
m = x.shape[0]
err = 0.0
for i in range(m):
hx = hypothesis(x[i], w, b)
if(hx==y_true[i]): #Corner cases where we have zero error
err += 0
elif((hx==1 and y_true[i]==0) or (hx==0 and y_true[i]==1) ): #Corner cases where we will have log2 of zero
err += np.iinfo(np.int32).min # which is an approximation for log2(0), and we penalzie the model at most with the greatest error possible
else:
err += y_true[i]*np.log2(hx) + (1-y_true[i])*np.log2(1-hx)
return -err/m
In this part of the code, I assumed you have binary labels
I am trying to write a program to calculate the slope and the intercept of a linear regression model but when I am running more than 10 iterations, the gradient descent function gives the np.nan value for both intercept as well as slope.
Below is my implementation
def get_gradient_at_b(x, y, b, m):
N = len(x)
diff = 0
for i in range(N):
x_val = x[i]
y_val = y[i]
diff += (y_val - ((m * x_val) + b))
b_gradient = -(2/N) * diff
return b_gradient
def get_gradient_at_m(x, y, b, m):
N = len(x)
diff = 0
for i in range(N):
x_val = x[i]
y_val = y[i]
diff += x_val * (y_val - ((m * x_val) + b))
m_gradient = -(2/N) * diff
return m_gradient
def step_gradient(b_current, m_current, x, y, learning_rate):
b_gradient = get_gradient_at_b(x, y, b_current, m_current)
m_gradient = get_gradient_at_m(x, y, b_current, m_current)
b = b_current - (learning_rate * b_gradient)
m = m_current - (learning_rate * m_gradient)
return [b, m]
def gradient_descent(x, y, learning_rate, num_iterations):
b = 0
m = 0
for i in range(num_iterations):
b, m = step_gradient(b, m, x, y, learning_rate)
return [b,m]
I am running it on the following data:
a=[3.87656018e+11, 4.10320300e+11, 4.15730874e+11, 4.52699998e+11,
4.62146799e+11, 4.78965491e+11, 5.08068952e+11, 5.99592902e+11,
6.99688853e+11, 8.08901077e+11, 9.20316530e+11, 1.20111177e+12,
1.18695276e+12, 1.32394030e+12, 1.65661707e+12, 1.82304993e+12,
1.82763786e+12, 1.85672212e+12, 2.03912745e+12, 2.10239081e+12,
2.27422971e+12, 2.60081824e+12]
b=[3.3469950e+10, 3.4784980e+10, 3.3218720e+10, 3.6822490e+10,
4.4560290e+10, 4.3826720e+10, 5.2719430e+10, 6.3842550e+10,
8.3535940e+10, 1.0309053e+11, 1.2641405e+11, 1.6313218e+11,
1.8529536e+11, 1.7875143e+11, 2.4981555e+11, 3.0596392e+11,
3.0040058e+11, 3.1440530e+11, 3.1033848e+11, 2.6229109e+11,
2.7585243e+11, 3.0352616e+11]
print(gradient_descent(a, b, 0.01, 100))
#result --> [nan, nan]
When I run the gradient_descent function on a dataset with smaller values, it gives the correct answers. Also I was able to obtain the intercept and slope for the above data with from sklearn.linear_model import LinearRegression
Any help will be appreciated in figuring out why the result is [nan, nan] instead of giving me the correct intercept and slope.
You need to reduce the learning rate. Since the values in a and b are so large (>= 1e11), the learning rate needs be approximately 1e-25 for this to even do the gradient descent, else it will randomly overshoot because of large gradients of a and b.
b, m = gradient_descent(a, b, 5e-25, 100)
print(b, m)
Out: -3.7387067636195266e-13 0.13854551291084335
I'm getting hung trying to create a contour plot of three [1, m] numpy matrices. When I try to plot:
plt.contour(reg.theta[0,:], reg.theta[1,:], reg.J[0,:])
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
I think this has something to do with the fact that
In[167]: shape(reg.theta[0,:])
Out[167]: (1, 15000)
and contour wants something along the lines of (15000,)
I've tried to create a meshgrid similar to how I would in matlab using
X , Y = meshgrid(reg.theta[0,:], reg.theta[1,:]
ValueError: total size of new array must be unchanged[1,:])
However, I have no idea how to interpret this.
Please advise stackoverflow!
Here is my complete code, tested on Python 2.7
from numpy import *
import matplotlib.pyplot as plt
class LinearRegression:
def __init__(self, data):
self.data_loc = data
self.max_it = 15000
self.theta = matrix(ones((2, self.max_it)))
self.alpha = .01
self.J = matrix(zeros((1, self.max_it)))
def importData(self):
Data = loadtxt(self.data_loc, delimiter = ',')
rawData = matrix(Data)
x = rawData[:,0]
y = rawData[:,1]
[m,n] = shape(x)
x_0 = matrix(ones((m,1)))
x = concatenate((x_0, x), axis = 1)
return x, y, m, n
def center(self, x):
x = x / mean(x)
return x
def scale(self, x):
x = x/ std(x)
return x
def funct(self, m, b):
return lambda x: m*x + b
def cost(self, x, y, m, mx, b):
f = self.funct(mx, b)
err = power( (f(x) - y), 2).sum()
err = err / (2*len(x))
return err
def gradientDesc(self, x, y, m):
for i in range(0, self.max_it -1):
error = (1.0/m) * transpose(x) * ((x * self.theta[:,i]) - y )
delta = self.theta[:,i] - self.alpha*error
self.J[0,i] = (self.cost( x, y, m, self.theta[1,i], self.theta[0,i]))
self.theta[:, i+1] = delta
print('Calculation Complete')
return self.theta, error
reg = LinearRegression('/users/michael/documents/machine_learning/ex1/ex1data1.txt')
x, y, m ,n = reg.importData()
theta, error = reg.gradientDesc(x,y,m)
print (reg.theta[:,reg.max_it -1])
XX = linspace(0,m,m)
J = reg.theta[0,reg.max_it-1] + reg.theta[1, reg.max_it-1] * XX
plt.plot(XX, J)
plt.scatter(x[:,1],y)`