Related
I implement the log-polar transform and its inversion. The forward transform is ok but the inversion picture shows the incorrect result. I think the issue is on the normalization of the grid coordinates. But I can't figure out how this happend.
LPT:
def log_polar_transform(x, radius_factor = tf.sqrt(2.)):
b, h, w, c = x.shape
grid = make_grid(h, w) #(2, hw), represent log-polar coordinate system
grid = tf.repeat(grid[None, ...], b, axis=0) #(b, 2, hw)
X, Y = grid[:, 0], grid[:, 1]
#theta
theta = (Y + 1) * math.pi #[0, 2pi]
#radius
maxR = max(h, w) * radius_factor
r = tf.exp((X + 1)/2 * tf.math.log(maxR)) #[1, h]
r = (r-1)/(maxR -1) #[0, h]-->[0, 1]
r = r * (maxR/h) #scale factorize
#map to cartesian coordinate system
xs = tf.reshape(r * tf.math.cos(theta), [b, h, w])
ys = tf.reshape(r * tf.math.sin(theta), [b, h, w])
output = interpolate(x, xs, ys)
output = tf.reshape(output, [b, h, w, c])
return output
Inverse LPT:
def inverse_log_polar_transform(x):
b, h, w, c = x.shape
grid = make_grid(h, w)
grid = tf.repeat(grid[None, ...], b, axis=0) #(b, 2, hw)
X, Y = grid[:, 0], grid[:, 1]
rs = tf.sqrt(X**2 + Y **2)/tf.sqrt(2.)
ts = (tf.atan2(-Y, -X))/math.pi #[-1., 1.]
rs = tf.reshape(rs, [b, h, w])
ts = tf.reshape(ts, [b, h, w])
output = interpolate(x, rs, ts)
output = tf.reshape(output, [b, h, w, c])
return output
Here is the full code link:
https://www.kaggle.com/code/tom99763/log-polar-transfomr-inverse
and the reusults:
Results Image
I have to compute score = dot(a, LeakyReLU(x_i+y_j)) for each i, j in [N], where a, x_i, y_j is the D-dimensional vecotr, and dot() is the dot-product that outputs a scalar value. So finally, I have to get NxN score.
In keras, I implemented as:
#given X (N x D), Y(N x D), A (D x 1)
X = tf.expand_dims(X, axis=1) #(N x 1 x D)
Y = tf.expand_dims(Y, axis=0) #(1 x N x D)
feature_sum = X+ Y #(N x N x D) broadcast automatically
dense = K.dot(LeakyReLU(alpha=0.1)(feature_sum), A) # (N x N x 1)
The problem is that feature_sum is GPU-memory expensive, where N,D>1000. Then any other efficient implementation?
The dot product is a commutative operation with respect to the sum. Therefore:
dot(LRelu(X + Y), A) = dot(LRelu(X), A) + dot(LRelu(Y), A)
So, you can do:
dense_x = K.dot(LRelu(X), A)
dense_y = K.dot(LRelu(Y), A)
dense_x = tf.expand_dims(dense_x, axis=1)
dense_y = tf.expand_dims(dense_y, axis=0)
dense = dense_x + dense_y
In this way, all operations are done at most on N x D elements and you only have to store a maximum of N x N elements (assuming N > D).
Quantitative comparisons. N=1000, D=500
def timeit(func):
def run(*args, **kwargs):
start = time.time()
out = func(*args, **kwargs)
end = time.time()
print(f"Exec: {(end-start)*1000:.4f}ms")
return out
return run
#timeit
def fast(X, Y, A, N, D):
X = X.reshape(N, 1, D)
Y = Y.reshape(1, N, D)
feature_sum = X + Y
dense = feature_sum # A
return dense
#timeit
def fast(X, Y, A, N, D):
dense_x = X # A
dense_y = Y # A
dense_x = dense_x.reshape(N, 1, 1)
dense_y = dense_y.reshape(1, N, 1)
dense = dense_x + dense_y
return dense
def main():
N = 1000
D = 500
X = np.random.rand(N, D)
Y = np.random.rand(N, D)
A = np.random.rand(D, 1)
dense1 = slow(X, Y, A, N, D)
dense2 = fast(X, Y, A, N, D)
print("Same result: ", np.allclose(dense1, dense2))
Output:
Exec: 1547.9290ms # slow
Exec: 2.9860ms # fast
Same result: True
I am trying to fit a line to a couple of points using gradient descent. I am no expert on this and tried to write down the mathematical algorithm for it in python. It runs for a couple of iterations, but my predictions seem to explode at some point. Here is the code:
import numpy as np
import matplotlib.pyplot as plt
def mean_squared_error(n, A, b, m, c):
e = 0
for i in range(n):
e += (b[i] - (m*A[i] + c)) ** 2
return e/n
def der_wrt_m(n,A,b,m,c):
d = 0
for i in range(n):
d += (2 * (b[i] - (m*A[i] + c)) * (-A[i]))
return d/n
def der_wrt_c(n,A,b,m,c):
d = 0
for i in range(n):
d += (2 * (b[i] - (m*A[i] + c)))
return d/n
def update(n,A,b,m,c,descent_rate):
return descent_rate * der_wrt_m(n,A,b,m,c)), descent_rate * der_wrt_c(n,A,b,m,c))
A = np.array(((0,1),
(1,1),
(2,1),
(3,1)))
x = A.T[0]
b = np.array((1,2,0,3), ndmin=2 ).T
y = b.reshape(4)
def descent(x,y):
m = 0
c = 0
descent_rate = 0.00001
iterations = 100
n = len(x)
plt.scatter(x, y)
u = np.linspace(0,3,100)
prediction = 0
for itr in range(iterations):
print(m,c)
prediction = prediction + m * x + c
m,c = update(n,x,y,m,c,descent_rate)
plt.plot(u, u * m + c, '-')
descent(x,y)
And that's my output:
0 0
19.25 -10.5
-71335.1953125 24625.9453125
5593771382944640.0 -2166081169939480.2
-2.542705027685638e+48 9.692684648057364e+47
2.40856742196228e+146 -9.202614421953049e+145
-inf inf
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
etc...
Update: The values aren't exploding anymore, but it's still not converging in a nice manner:
# We could also solve it using gradient descent
import numpy as np
import matplotlib.pyplot as plt
def mean_squared_error(n, A, b, m, c):
e = 0
for i in range(n):
e += ((b[i] - (m * A[i] + c)) ** 2)
#print("mse:",e/n)
return e/n
def der_wrt_m(n,A,b,m,c):
d = 0
for i in range(n):
# d += (2 * (b[i] - (m*A[i] + c)) * (-A[i]))
d += (A[i] * (b[i] - (m*A[i] + c)))
#print("Dm",-2 * d/n)
return (-2 * d/n)
def der_wrt_c(n,A,b,m,c):
d = 0
for i in range(n):
d += (2 * (b[i] - (m*A[i] + c)))
#print("Dc",d/n)
return d/n
def update(n,A,b,m,c, descent_rate):
return (m - descent_rate * der_wrt_m(n,A,b,m,c)),(c - descent_rate * der_wrt_c(n,A,b,m,c))
A = np.array(((0,1),
(1,1),
(2,1),
(3,1)))
x = A.T[0]
b = np.array((1,2,0,3), ndmin=2 ).T
y = b.reshape(4)
def descent(x,y):
m = 0
c = 0
descent_rate = 0.0001
iterations = 10000
n = len(x)
plt.scatter(x, y)
u = np.linspace(0,3,100)
prediction = 0
for itr in range(iterations):
prediction = prediction + m * x + c
m,c = update(n,x,y,m,c,descent_rate)
loss = mean_squared_error(n, A, b, m, c)
print(loss)
print(m,c)
plt.plot(u, u * m + c, '-')
descent(x,y)
And now the graph looks like this after about 10000 iterations with a learning rate of 0.0001:
[4.10833186 5.21468937]
1.503547594304175 -1.9947003678083184
Whereas the least square fit shows something like this:
In your update function, you should subtract calculated gradients from current m and c
def update(n,A,b,m,c,descent_rate):
return m - (descent_rate * der_wrt_m(n,A,b,m,c)), c - (descent_rate * der_wrt_c(n,A,b,m,c))
Update: Here is the working version. I got rid of A matrix after obtaining x,y since it confuses me =). For example in your gradient calculations you have an expression d += (A[i] * (b[i] - (m*A[i] + c))) but it should be d += (x[i] * (b[i] - (m*x[i] + c))) since x[i] gives you a single element whereas A[i] gives you a list.
Also you forgot a minus sign while calculating derivative with respect to c. If your expression is (y - (m*x + c))^2) than derivative with respect to c should be 2 * (-1) * (y - (m*x + c)) since there is a minus in front of c.
# We could also solve it using gradient descent
import numpy as np
import matplotlib.pyplot as plt
def mean_squared_error(n, x, y, m, c):
e = 0
for i in range(n):
e += (m*x[i]+c - y[i])**2
e = e/n
return e/n
def der_wrt_m(n, x, y, m, c):
d = 0
for i in range(n):
d += x[i] * (y[i] - (m*x[i] + c))
d = -2 * d/n
return d
def der_wrt_c(n, x, y, m, c):
d = 0
for i in range(n):
d += (y[i] - (m*x[i] + c))
d = -2 * d/n
return d
def update(n,x,y,m,c, descent_rate):
return (m - descent_rate * der_wrt_m(n,x,y,m,c)),(c - descent_rate * der_wrt_c(n,x,y,m,c))
A = np.array(((0,1),
(1,1),
(2,1),
(3,1)))
x = A.T[0]
b = np.array((1,2,0,3), ndmin=2 ).T
y = b.reshape(4)
print(x)
print(y)
def descent(x,y):
m = 0.0
c = 0.0
descent_rate = 0.01
iterations = 10000
n = len(x)
plt.scatter(x, y)
u = np.linspace(0,3,100)
prediction = 0
for itr in range(iterations):
prediction = prediction + m * x + c
m,c = update(n,x,y,m,c,descent_rate)
loss = mean_squared_error(n, x, y, m, c)
print(loss)
print(loss)
print(m,c)
plt.plot(u, u * m + c, '-')
plt.show()
descent(x,y)
I have two multivariate equations that determine the inverse kinematics of a robot. These equations depend on the variables theta1 and theta2 (the other variables are geometric constants)
import numpy as np
def x(theta1, theta2, w, h, L1, L2):
sint1 = np.sin(theta1)
cost1 = np.cos(theta1)
sint2 = np.sin(theta2)
cost2 = np.cos(theta2)
i1 = L1 * (cost1 + cost2) + w
j1 = L1 * (sint1 - sint2) - h
D = np.sqrt((L1*(cost2-cost1)+w)**2+(L1*(sint2-sint1)+h)**2)
a = (0.25)*np.sqrt((4*L2**2-D**2)*D**2)
return i1/2 + 2*j1*a/(D**2)
def y(theta1, theta2, w, h, L1, L2):
sint1 = np.sin(theta1)
cost1 = np.cos(theta1)
sint2 = np.sin(theta2)
cost2 = np.cos(theta2)
i2 = L1 * (sint1 + sint2) + h
j2 = L1 * (cost1 - cost2) - w
D = np.sqrt((L1*(cost2-cost1)+w)**2+(L1*(sint2-sint1)+h)**2)
a = (0.25)*np.sqrt((4*L2**2-D**2)*D**2)
return i2/2 - 2*j2*a/(D**2)
using these equations I calculate the determinant of the Jacobian matrix (partial derivative matrix) using the second order finite difference method
def det_jacobian(theta1, theta2, w, h, L1, L2,eps):
dxdt1 = (-x(theta1+eps, theta2, w, h, L1, L2)+4*x(theta1, theta2, w, h, L1, L2)-3*x(theta1-eps, theta2, w, h, L1, L2))/(2*eps)
dxdt2 = (-x(theta1, theta2+eps, w, h, L1, L2)+4*x(theta1, theta2, w, h, L1, L2)-3*x(theta1, theta2-eps, w, h, L1, L2))/(2*eps)
dydt1 = (-y(theta1+eps, theta2, w, h, L1, L2)+4*y(theta1, theta2, w, h, L1, L2)-3*y(theta1-eps, theta2, w, h, L1, L2))/(2*eps)
dydt2 = (-y(theta1, theta2+eps, w, h, L1, L2)+4*y(theta1, theta2, w, h, L1, L2)-3*y(theta1, theta2-eps, w, h, L1, L2))/(2*eps)
return dxdt1,dxdt2,dydt1,dydt2
Evaluated for the values of theta1 and theta2 belonging to an interval
theta1 = np.linspace(theta1_min,theta1_max,n)
theta2 = np.linspace(theta2_min,theta2_max,n)
theta1, theta2 = np.meshgrid(theta1,theta2)
What I want to know is if there is an efficient way (using numpy arrays) to calculate the values of x and y where the determinant takes values between -tol and tol (tol=1e-08). Currently I'm using two nested for cycles, but it is very slow
I have written a function using for cycles, but it is very slow
def singularidades(theta1_min,theta1_max, theta2_min,theta2_max, n,tol, w, h, L1, L2,eps):
x_s = []
y_s = []
theta1_s = []
theta2_s = []
det = []
theta1 = np.linspace(theta1_min,theta1_max,n)
theta2 = np.linspace(theta2_min,theta2_max,n)
theta1, theta2 = np.meshgrid(theta1,theta2)
det_jac = det_jacobiano(theta1,theta2,w,h,L1,L2,eps)
for i in range(n):
for j in range(n):
if (g_tol[i,j] and l_tol[i,j]):
x_s.append(x(theta1[i,j], theta2[i,j], w, h, L1, L2))
y_s.append(y(theta1[i,j], theta2[i,j], w, h, L1, L2))
theta1_s.append(theta1[i,j])
theta2_s.append(theta2[i,j])
det.append(det_jac[i,j])
return x_s,y_s,theta1_s,theta2_s,det,(g_tol and l_tol)
Edit: I've modified the det_jacobian function to use it wiht scipy.optimize.root
def det_jacobiano(theta, w, h, L1, L2,eps):
theta1,theta2 = theta
dxdt1 = (-x(theta1+eps, theta2, w, h, L1, L2)+4*x(theta1, theta2, w, h, L1, L2)-3*x(theta1-eps, theta2, w, h, L1, L2))/(2*eps)
dxdt2 = (-x(theta1, theta2+eps, w, h, L1, L2)+4*x(theta1, theta2, w, h, L1, L2)-3*x(theta1, theta2-eps, w, h, L1, L2))/(2*eps)
dydt1 = (-y(theta1+eps, theta2, w, h, L1, L2)+4*y(theta1, theta2, w, h, L1, L2)-3*y(theta1-eps, theta2, w, h, L1, L2))/(2*eps)
dydt2 = (-y(theta1, theta2+eps, w, h, L1, L2)+4*y(theta1, theta2, w, h, L1, L2)-3*y(theta1, theta2-eps, w, h, L1, L2))/(2*eps)
return dxdt1*dydt2 - dxdt2*dydt1
and I'm trying to find the roots using
initial_guess = [2.693, 0.4538]
result = optimize.root(det_jacobiano, initial_guess,tol=1e-8,args=(20,0,100,100,1e-10),method='lm')
But I'm getting the error:
TypeError: Improper input: N=2 must not exceed M=1
You don't need a cycle for that. Your function can work with numpy arrays as well as single values:
def f(x,y):
return np.sin(x + y) / np.sqrt(x**2 + y**2)
x = [0.1, 0.2, 0.3, 0.4, 0.5]
y = [0.1, 0.2, 0.3, 0.4, 0.5]
print(f(x, y))
will return:
[1.40480431, 1.37680175, 1.33087507, 1.26811839, 1.19001968]
which is an array of function values for each pair of x and y
One way to do this is to define:
def f(x,y,a):
return np.sin(x + y) / np.sqrt(x**2 + y**2) - a
where a can take the limits of your interval and then calculate the roots x0 and y0 of this function using scipy.optimize.
These roots correspond to the x and y values of your function between which the function returns the desired values.
I'm trying to implement gradient descent in Simple linear regression. Whenever I run the code, I get the error
The code i am using is this:
def get_data(df, feature, predict):
X = df[feature]
Y = df[predict]
X = np.float64(X)
Y = np.float64(Y)
return X, Y
def average(X, Y, b, m, length):
temp1 = 0
temp2 = 0
for i in range(length):
temp1 += (b + m * X[i]) - Y[i]
temp2 += ((b + m * X[i]) - Y[i]) * X[i]
return temp1 / float(length), temp2 / float(length)
def gradient_descent(b, m, alpha, length, num_iterations, X, Y):
for i in range(num_iterations):
temp1, temp2 = average(X, Y, b, m, length)
b_temp = b - alpha * temp1
m_temp = m - alpha * temp2
b = b_temp
m = m_temp
return b, m
def run(b, m, alpha, feature, predict, df, num_iterations):
X, Y = get_data(df, feature, predict)
length = np.alen(X)
final_b, final_m = gradient(b, m, alpha, length, num_iterations, X, Y)
return final_b, final_m
b, m = run(0, 0, 0.05, 'sqft_living', 'price', df, 1000)
The error it gives my is this:
/Users/*****/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py:15:
RuntimeWarning: overflow encountered in double_scalars from
ipykernel import kernelapp as app
/Users/*****/anaconda2/lib/python2.7/site-packages/ipykernel_launcher.py:25:
RuntimeWarning: invalid value encountered in double_scalars.
I'm not able to identify which part of the code is causing the error. I tried to convert numpy array to float64 also my code is not running into Divide by Zero Error. Can someone identify the error? Also, How can it be rectified?