I implemented the conjugate gradient method using TensorFlow to invert a sparse matrix.
The matrix I used to test the method is well-conditioned, as it is the sum of a mass matrix and a stiffness matrix obtained with finite elements.
I compared with the same method implemented using scipy and on the same data.
The solutions obtained with either methods are the same, however, TensorFlow is 5 times slower (I tested under colab environment).
Under colab environment, scipy ran in 0.27 s, while TensorFlow required 1.37 s
Why the algorithm is so slow under TensorFlow?
I can not cast to dense matrices, as I want to use the formula with matrices of large size (100k X100k or more).
Thanks,
Cesare
Here is the code I used to test this:
import tensorflow as tf
import numpy as np
from scipy.sparse import coo_matrix,linalg
import os
import sys
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from time import time
from scipy.spatial import Delaunay
def create_mesh(Lx=1,Ly=1,Nx=100,Ny=100):
mesh0=dict()
dx = Lx/Nx
dy = Ly/Ny
XX,YY=np.meshgrid(np.arange(0,Lx+dx,dx),np.arange(0,Ly+dy,dy))
points=np.vstack((XX.ravel(),YY.ravel())).T
#np.random.shuffle(points)
tri = Delaunay(points)
mesh0['Pts']=np.copy(points).astype(np.float32)
mesh0['Tria']=np.copy(tri.simplices).astype(int)
return(mesh0)
def eval_connectivity(mesh0):
print('computing mesh connectivity')
npt=mesh0['Pts'].shape[0]
connectivity = {}
for jpt in range(npt):
connectivity[jpt] = []
for Tria in mesh0['Tria']:
for ilpt in range(3):
iglobalPt=Tria[ilpt]
for jlpt in range(1+ilpt,3):
jglobalPt=Tria[jlpt]
connectivity[iglobalPt].append(jglobalPt)
connectivity[jglobalPt].append(iglobalPt)
for key,value in connectivity.items():
connectivity[key]=np.unique(np.array(value,dtype=int))
return(connectivity)
def eval_local_mass(mesh0,iTri):
lmass = np.zeros(shape=(3,3),dtype=np.float32)
Tria=mesh0['Tria'][iTri]
v10 = mesh0['Pts'][Tria[1],:]-mesh0['Pts'][Tria[0],:]
v20 = mesh0['Pts'][Tria[2],:]-mesh0['Pts'][Tria[0],:]
N12 = np.cross(v10,v20)
Tsurf = 0.5*np.linalg.norm(N12)
for ipt in range(3):
lmass[ipt,ipt]=1.0/12.0
for jpt in range(1+ipt,3):
lmass[ipt,jpt] = 1.0/24.0
lmass[jpt,ipt] = lmass[ipt,jpt]
lmass = 2.0*Tsurf*lmass
return(lmass)
def eval_local_stiffness(mesh0,iTri):
Tria = mesh0['Tria'][iTri]
v10 = mesh0['Pts'][Tria[1],:]-mesh0['Pts'][Tria[0],:]
v20 = mesh0['Pts'][Tria[2],:]-mesh0['Pts'][Tria[0],:]
N12 = np.cross(v10,v20)
Tsurf = 0.5*np.linalg.norm(N12)
covbT = np.zeros(shape=(3,3),dtype=np.float32)
covbT[0,:2] = v10
covbT[1,:2] = v20
covbT[2,2] = N12/(2*Tsurf)
contrb = np.linalg.inv(covbT)
v1 = contrb[:,0]
v2 = contrb[:,1]
a = np.dot(v1,v1)
b = np.dot(v1,v2)
c = np.dot(v2,v2)
gij_c = np.array([[a,b],[b,c]],dtype=np.float32)
lgrad = np.array([[-1.0,1.0,0.0], [-1.0,0.0,1.0] ],dtype=np.float32)
lstif = Tsurf*np.matmul( np.matmul(lgrad.T,gij_c), lgrad )
return(lstif)
def compute_vectors_sparse_matrices(mesh0):
npt = mesh0['Pts'].shape[0]
connect = eval_connectivity(mesh0)
nzero = 0
for key,value in connect.items():
nzero += (1+value.shape[0])
I = np.zeros(shape=(nzero),dtype=int)
J = np.zeros(shape=(nzero),dtype=int)
VM = np.zeros(shape=(nzero),dtype=np.float32)
VS = np.zeros(shape=(nzero),dtype=np.float32)
k0 = np.zeros(shape=(npt+1),dtype=int)
k0[0] = 0
k = -1
for jpt in range(npt):
loc_con = connect[jpt].tolist()[:]
loc_con.append(jpt)
loc_con = np.sort(loc_con)
k0[jpt+1]=k0[jpt]+loc_con.shape[0]
for jloc in range(loc_con.shape[0]):
k=k+1
I[k]= jpt
J[k]= loc_con[jloc]
for iTr, Tria in enumerate(mesh0['Tria']):
lstiff = eval_local_stiffness(mesh0,iTr)
lmass = eval_local_mass(mesh0,iTr)
for iEntry,irow in enumerate(Tria):
loc_con = connect[irow].tolist()[:]
loc_con.append(irow)
loc_con = np.sort(loc_con)
for jEntry,jcol in enumerate(Tria):
indexEntry = k0[irow]+np.where(loc_con==jcol)[0]
VM[indexEntry] = VM[indexEntry]+lmass[iEntry,jEntry]
VS[indexEntry] = VS[indexEntry]+lstiff[iEntry,jEntry]
return(I,J,VM,VS)
def compute_global_sparse_matrices(mesh0):
I,J,VM,VS = compute_vectors_sparse_matrices(mesh0)
npt = mesh0['Pts'].shape[0]
MASS = coo_matrix((VM,(I,J)),shape=(npt,npt))
STIFF = coo_matrix((VS,(I,J)),shape=(npt,npt))
return(MASS,STIFF)
def compute_global_sparse_tensors(mesh0):
I,J,VM,VS = compute_vectors_sparse_matrices(mesh0)
npt = mesh0['Pts'].shape[0]
indices = np.hstack([I[:,np.newaxis], J[:,np.newaxis]])
MASS = tf.sparse.SparseTensor(indices=indices, values=VM.astype(np.float32), dense_shape=[npt, npt])
STIFF = tf.sparse.SparseTensor(indices=indices, values=VS.astype(np.float32), dense_shape=[npt, npt])
return(MASS,STIFF)
def compute_matrices_scipy(mesh0):
MASS,STIFF = compute_global_sparse_matrices(mesh0)
return(MASS,STIFF)
def compute_matrices_tensorflow(mesh0):
MASS,STIFF = compute_global_sparse_tensors(mesh0)
return(MASS,STIFF)
def conjgrad_scipy(A,b,x0,niter=100,toll=1.e-5):
x = np.copy(x0)
r = b - A * x
p = np.copy(r)
rsold = np.dot(r,r)
for it in range(niter):
Ap = A * p
alpha = rsold /np.dot(p,Ap)
x += alpha * p
r -= alpha * Ap
rsnew = np.dot(r,r)
if (np.sqrt(rsnew) < toll):
break
p = r + (rsnew / rsold) * p
rsold = rsnew
return([x,it,np.sqrt(rsnew)])
def conjgrad_tensorflow(A,b,x0,niter=100,toll=1.e-5):
x = x0
r = b - tf.sparse.sparse_dense_matmul(A,x)
p = r
rsold = tf.reduce_sum(tf.multiply(r, r))
for it in range(niter):
Ap = tf.sparse.sparse_dense_matmul(A,p)
alpha = rsold /tf.reduce_sum(tf.multiply(p, Ap))
x += alpha * p
r -= alpha * Ap
rsnew = tf.reduce_sum(tf.multiply(r, r))
if (tf.sqrt(rsnew) < toll):
break
p = r + (rsnew / rsold) * p
rsold = rsnew
return([x,it,tf.sqrt(rsnew)])
mesh = create_mesh(Lx=10,Ly=10,Nx=100,Ny=100)
x0 = tf.constant( (mesh['Pts'][:,0]<5 ).astype(np.float32) )
nit_time = 10
dcoef = 1.0
maxit = x0.shape[0]//2
stoll = 1.e-6
print('nb of nodes:\t{}'.format(mesh['Pts'].shape[0]))
print('nb of trias:\t{}'.format(mesh['Tria'].shape[0]))
t0 = time()
MASS0,STIFF0 = compute_matrices_scipy(mesh)
elapsed_scipy=time()-t0
print('Matrices; elapsed: {:3.5f} s'.format(elapsed_scipy))
A = MASS0+dcoef*STIFF0
x = np.copy(np.squeeze(x0.numpy()) )
t0 = time()
for jt in range(nit_time):
b = MASS0*x
x1,it,tol=conjgrad_scipy(A,b,x,niter=maxit,toll=stoll)
x=np.copy(x1)
print('time {}; iters {}; resid: {:3.2f}'.format(1+jt,it,tol) )
elapsed_scipy=time()-t0
print('elapsed, scipy: {:3.5f} s'.format(elapsed_scipy))
t0 = time()
MASS,STIFF =compute_matrices_tensorflow(mesh)
elapsed=time()-t0
print('Matrices; elapsed: {:3.5f} s'.format(elapsed))
x = None
x1 = None
A = tf.sparse.add(MASS,tf.sparse.map_values(tf.multiply, STIFF, dcoef))
x = tf.expand_dims(tf.identity(x0),axis=1)
t0 = time()
for jt in range(nit_time):
b = tf.sparse.sparse_dense_matmul(MASS,x)
x1,it,tol=conjgrad_tensorflow(A,b,x,niter=maxit,toll=stoll)
x = x1
print('time {}; iters {}; resid: {:3.2f}'.format(1+jt,it,tol) )
elapsed_tf=time()-t0
print('elapsed, tf: {:3.2f} s'.format(elapsed_tf))
print('elapsed times:')
print('scipy: {:3.2f} s\ttf: {:3.2f} s'.format(elapsed_scipy,elapsed_tf))
I am using this code to implement a region growing algorithm but instead of sklearn I want to use open 3d.This is the original code and below you will find the code that I am using.
import math
import numpy as np
from sklearn.neighbors import KDTree
unique_rows=np.loadtxt("test.txt")
tree = KDTree(unique_rows, leaf_size=2)
dist,nn_glob = tree.query(unique_rows[:len(unique_rows)], k=30)
def normalsestimation(pointcloud,nn_glob,VP=[0,0,0]):
ViewPoint = np.array(VP)
normals = np.empty((np.shape(pointcloud)))
curv = np.empty((len(pointcloud),1))
for index in range(len(pointcloud)):
nn_loc = pointcloud[nn_glob[index]]
COV = np.cov(nn_loc,rowvar=False)
eigval, eigvec = np.linalg.eig(COV)
idx = np.argsort(eigval)
nor = eigvec[:,idx][:,0]
if nor.dot((ViewPoint-pointcloud[index,:])) > 0:
normals[index] = nor
else:
normals[index] = - nor
curv[index] = eigval[idx][0] / np.sum(eigval)
return normals,curv
#seed_count = 0
#while seed_count < len(current_seeds)
def regiongrowing1(pointcloud,nn_glob,theta_th = 'auto', cur_th = 'auto'):
normals,curvature = normalsestimation(pointcloud,nn_glob=nn_glob)
order = curvature[:,0].argsort().tolist()
region = []
if theta_th == 'auto':
theta_th = 15.0 / 180.0 * math.pi # in radians
if cur_th == 'auto':
cur_th = np.percentile(curvature,98)
while len(order) > 0:
region_cur = []
seed_cur = []
poi_min = order[0] #poi_order[0]
region_cur.append(poi_min)
seedval = 0
seed_cur.append(poi_min)
order.remove(poi_min)
# for i in range(len(seed_cur)):#change to while loop
while seedval < len(seed_cur):
nn_loc = nn_glob[seed_cur[seedval]]
for j in range(len(nn_loc)):
nn_cur = nn_loc[j]
if all([nn_cur in order , np.arccos(np.abs(np.dot(normals[seed_cur[seedval]],normals[nn_cur])))<theta_th]):
region_cur.append(nn_cur)
order.remove(nn_cur)
if curvature[nn_cur] < cur_th:
seed_cur.append(nn_cur)
seedval+=1
region.append(region_cur)
return region
region1 = regiongrowing1(unique_rows,nn_glob)
This is the code that I want to change.And than to use the use of the normals and region growing function.
import math
import numpy as np
import open3d as o3d
pcd = o3d.io.read_point_cloud("C:0000.ply")
points = np.asarray(pcd.points)
pcd_tree = o3d.geometry.KDTreeFlann(points)
[k, idy, _] = pcd_tree.search_knn_vector_3d(pcd.points[1500], 200)
I am trying to implement binary addition of 2 numbers using RNN from scratch. I solved the math correctly and implemented the model it is working fine without any errors, however it is not converging. I read a blog online, the author was using MSE for calculating cost and i am using cross-entropy. I don't why but the model is not converging.
import numpy as np
import matplotlib.pyplot as plt
from progressbar import ProgressBar
from tqdm import tqdm
from scipy.special import expit
def sigmoid_derivative(z):
return expit(z) * (1 - expit(z))
def tanh_derivative(z):
return 1 - np.tanh(z)**2
values_map = dict()
bin_dimension = 8
bin_values = np.unpackbits(np.array([range(2**bin_dimension)], dtype=np.uint8).T, axis=1)
for i in range(2**bin_dimension):
values_map[i] = bin_values[i, :]
lr = 0.1
epochs = 20000
wa = 2*np.random.random((13, 13)) - 1
wx = 2*np.random.random((13, 2)) - 1
wy = 2*np.random.random((1, 13)) - 1
d_wa = np.zeros_like(wa)
d_wx = np.zeros_like(wx)
d_wy = np.zeros_like(wy)
za, zy, aa = dict(), dict(), dict()
aa[8] = np.random.random((13, 1))
daprev = np.zeros_like(aa[8])
preds = np.zeros((1, bin_dimension))
for _ in tqdm(range(epochs)):
a = np.random.randint(2**bin_dimension/2)
b = np.random.randint(2**bin_dimension/2)
c = a + b
a = values_map[a]
b = values_map[b]
c = values_map[c]
for t in range(bin_dimension)[::-1]:
x = np.array([[a[t]], [b[t]]])
y = np.array([[c[t]]])
za[t] = np.matmul(wa, aa[t+1]) + np.matmul(wx, x)
aa[t] = np.tanh(za[t])
zy[t] = np.matmul(wy, aa[t])
preds[:, t] = expit(zy[t])
for t in range(bin_dimension):
x = np.array([[a[t]], [b[t]]])
error = preds[:, t] - np.array([[c[t]]]).astype(np.int)
da = (wa.T#daprev + wy.T#error) * tanh_derivative(za[t])
d_wy += error#aa[t].T
d_wx += da#x.T
d_wa += da#aa[t+1].T
daprev = da
wa -= d_wa*lr
wy -= d_wy*lr
wx -= d_wx*lr
print(np.packbits((np.where(preds>.5, 1, 0)).astype(np.int)))
print(np.packbits(c))
I have been trying to convert the following matlab code into python and I am having difficulties with construction of the matrix algebra.
The product of the python code is vastly different from matlab and I can't figure out the problem (I assume its in the matrix multiplication).
Just for some background info: Using the data set, I was attempting to create a forecast with one dummy varialbe 'D1'
Here is the matlab code:
load 'AUSRetail.csv'
y = AUSRetail(:,1); Q = AUSRetail(:,2);
T = length(y); t = (1:T)';
D4 = (Q == 4);
T0 = 15;
h = 1; % h−step−ahead forecast
syhat = zeros(T-h-T0+1,1);
ytph = y(T0+h:end); % observed y {t+h}
for t = T0:T-h
yt = y(1:t);
D4t = D4(1:t);
Xt = [ones(t,1) (1:t)' D4t];
beta2 = (Xt'*Xt)\(Xt'*yt);
yhat2 = [1 t+h D4(t+h)]*beta2;
syhat(t-T0+1) = yhat2;
end
MSFE2 = mean((ytph-syhat).^2);
plot([1:T], y)
hold on
plot([T0 + h:T], syhat)
and here is my python code attempt:
data = pd.read_csv("dataretail.csv").dropna()
D4 = np.asarray(data["Dummy4"])
dataValues = np.asarray(data["Value"])
T = len(D4)
T0 = 15
h = 1
syhat = []
ytph = np.asarray(dataValues) # Real values to test against
for t in range(T0,T-1):
yt = dataValues[:t].reshape(t,1)
D4t = D4[:t]
#Construction of Xt
xt1 = np.ones((t,1))
xt2 = np.arange(1,t+1).reshape(t,1)
xt3 = D4t.reshape(t,1)
Xt = np.column_stack((xt1,xt2,xt3))
Xt2 = np.transpose(Xt)
A = Xt2 # Xt
b = Xt2 # yt
beta2 = np.transpose(A) # b
yhat2 = np.array([1, t+h, D4[t+h]]) # beta2
syhat.append(int(yhat2))
fig = plt.figure()
axes = fig.add_axes([0.2, 0.2, 0.8, 1])
axes.set_xlabel("T (time)")
axes.set_ylabel("Yhat2")
axes.plot(range(25), abc)
axes.set_title("Model")
This question is a follow-up to my previous question here: Assistance, tips and guidelines for converting Matlab code to Python
I have converted the Matlab code manually. I am using a MAC OS and running Python from the terminal. But how do I run the code below, for some value of N, where N is an even number? I should get a graph (specified by the plot code).
When I run it as is, I get nothing.
My code is below:
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
def Array(N):
K00 = np.logspace(0,3,101,10)
len1 = len(K00)
y0 = [0]*(3*N/2+3)
S = [np.zeros((len1,1)) for kkkk in range(N/2+1)]
KS = [np.zeros((len1,1)) for kkkk in range(N/2)]
PS = [np.zeros((len1,1)) for kkkk in range(N/2)]
Splot = [np.zeros((len1,1)) for kkkk in range(N/2+1)]
KSplot = [np.zeros((len1,1)) for kkkk in range(N/2)]
PSplot = [np.zeros((len1,1)) for kkkk in range(N/2)]
Kplot = np.zeros((len1,1))
Pplot = np.zeros((len1,1))
for series in range(0,len1):
K0 = K00[series]
Q = 10
r1 = 0.0001
r2 = 0.001
d = 0.001
a = 0.001
k = 0.999
P0 = 1
S10 = 1e5
tf = 1e10
time = np.linspace(0,tf,len1)
y0[0] = S10
y0[3*N/2+1] = K0
y0[3*N/2+2] = P0
for i in range(1,3*N/2+1):
y0[i] = 0
[t,y] = odeint(EqnsArray,y0,time, mxstep = 5000)
for alpha in range(0,(N/2+1)):
S[alpha] = y[:,alpha]
for beta in range((N/2)+1,N+1):
KS[beta-N/2-1] = y[:,beta]
for gamma in range(N+1,3*N/2+1):
PS[gamma-N-1] = y[:,gamma]
for alpha in range(0,(N/2+1)):
Splot[alpha][series] = y[len1-1,alpha]
for beta in range((N/2)+1,N+1):
KSplot[beta-N/2-1][series] = y[len1-1,beta]
for gamma in range(N+1,3*N/2+1):
PSplot[gamma-N-1][series] = y[len1-1,gamma]
for alpha in range(0,(N/2+1)):
u1 = u1 + Splot[alpha]
for beta in range((N/2)+1,N+1):
u2 = u2 + KSplot[beta-N/2-1]
for gamma in range(N+1,3*N/2+1):
u3 = u3 + PSplot[gamma-N-1]
K = soln[:,3*N/2+1]
P = soln[:,3*N/2+2]
Kplot[series] = soln[len1-1,3*N/2+1]
Pplot[series] = soln[len1-1,3*N/2+2]
utot = u1+u2+u3
#Plot
plt.plot(np.log10(K00),utot)
plt.show()
def EqnsArray(y,t):
for alpha in range(0,(N/2+1)):
S[alpha] = y[alpha]
for beta in range((N/2)+1,N+1):
KS[beta-N/2-1] = y[beta]
for gamma in range(N+1,3*N/2+1):
PS[gamma-N-1] = y[gamma]
K = y[3*N/2+1]
P = y[3*N/2+2]
# The model equations
ydot = np.zeros((3*N/2+3,1))
B = range((N/2)+1,N+1)
G = range(N+1,3*N/2+1)
runsumPS = 0
runsum1 = 0
runsumKS = 0
runsum2 = 0
for m in range(0,N/2):
runsumPS = runsumPS + PS[m]
runsum1 = runsum1 + S[m+1]
runsumKS = runsumKS + KS[m]
runsum2 = runsum2 + S[m]
ydot[B[m]] = a*K*S[m]-(d+k+r1)*KS[m]
for i in range(0,N/2-1):
ydot[G[i]] = a*P*S[i+1]-(d+k+r1)*PS[i]
for p in range(1,N/2):
ydot[p] = -S[p]*(r1+a*K+a*P)+k*KS[p-1]+d*(PS[p-1]+KS[p])
ydot[0] = Q-(r1+a*K)*S[0]+d*KS[0]+k*runsumPS
ydot[N/2] = k*KS[N/2-1]-(r2+a*P)*S[N/2]+d*PS[N/2-1]
ydot[G[N/2-1]] = a*P*S[N/2]-(d+k+r2)*PS[N/2-1]
ydot[3*N/2+1] = (d+k+r1)*runsumKS-a*K*runsum2
ydot[3*N/2+2] = (d+k+r1)*(runsumPS-PS[N/2-1])- \
a*P*runsum1+(d+k+r2)*PS[N/2-1]
ydot_new = []
for j in range(0,3*N/2+3):
ydot_new.extend(ydot[j])
return ydot_new
You have to call your function, like:
Array(12)
You have to add this at the end of your code.