I am using this code to implement a region growing algorithm but instead of sklearn I want to use open 3d.This is the original code and below you will find the code that I am using.
import math
import numpy as np
from sklearn.neighbors import KDTree
unique_rows=np.loadtxt("test.txt")
tree = KDTree(unique_rows, leaf_size=2)
dist,nn_glob = tree.query(unique_rows[:len(unique_rows)], k=30)
def normalsestimation(pointcloud,nn_glob,VP=[0,0,0]):
ViewPoint = np.array(VP)
normals = np.empty((np.shape(pointcloud)))
curv = np.empty((len(pointcloud),1))
for index in range(len(pointcloud)):
nn_loc = pointcloud[nn_glob[index]]
COV = np.cov(nn_loc,rowvar=False)
eigval, eigvec = np.linalg.eig(COV)
idx = np.argsort(eigval)
nor = eigvec[:,idx][:,0]
if nor.dot((ViewPoint-pointcloud[index,:])) > 0:
normals[index] = nor
else:
normals[index] = - nor
curv[index] = eigval[idx][0] / np.sum(eigval)
return normals,curv
#seed_count = 0
#while seed_count < len(current_seeds)
def regiongrowing1(pointcloud,nn_glob,theta_th = 'auto', cur_th = 'auto'):
normals,curvature = normalsestimation(pointcloud,nn_glob=nn_glob)
order = curvature[:,0].argsort().tolist()
region = []
if theta_th == 'auto':
theta_th = 15.0 / 180.0 * math.pi # in radians
if cur_th == 'auto':
cur_th = np.percentile(curvature,98)
while len(order) > 0:
region_cur = []
seed_cur = []
poi_min = order[0] #poi_order[0]
region_cur.append(poi_min)
seedval = 0
seed_cur.append(poi_min)
order.remove(poi_min)
# for i in range(len(seed_cur)):#change to while loop
while seedval < len(seed_cur):
nn_loc = nn_glob[seed_cur[seedval]]
for j in range(len(nn_loc)):
nn_cur = nn_loc[j]
if all([nn_cur in order , np.arccos(np.abs(np.dot(normals[seed_cur[seedval]],normals[nn_cur])))<theta_th]):
region_cur.append(nn_cur)
order.remove(nn_cur)
if curvature[nn_cur] < cur_th:
seed_cur.append(nn_cur)
seedval+=1
region.append(region_cur)
return region
region1 = regiongrowing1(unique_rows,nn_glob)
This is the code that I want to change.And than to use the use of the normals and region growing function.
import math
import numpy as np
import open3d as o3d
pcd = o3d.io.read_point_cloud("C:0000.ply")
points = np.asarray(pcd.points)
pcd_tree = o3d.geometry.KDTreeFlann(points)
[k, idy, _] = pcd_tree.search_knn_vector_3d(pcd.points[1500], 200)
Related
I implemented the conjugate gradient method using TensorFlow to invert a sparse matrix.
The matrix I used to test the method is well-conditioned, as it is the sum of a mass matrix and a stiffness matrix obtained with finite elements.
I compared with the same method implemented using scipy and on the same data.
The solutions obtained with either methods are the same, however, TensorFlow is 5 times slower (I tested under colab environment).
Under colab environment, scipy ran in 0.27 s, while TensorFlow required 1.37 s
Why the algorithm is so slow under TensorFlow?
I can not cast to dense matrices, as I want to use the formula with matrices of large size (100k X100k or more).
Thanks,
Cesare
Here is the code I used to test this:
import tensorflow as tf
import numpy as np
from scipy.sparse import coo_matrix,linalg
import os
import sys
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from time import time
from scipy.spatial import Delaunay
def create_mesh(Lx=1,Ly=1,Nx=100,Ny=100):
mesh0=dict()
dx = Lx/Nx
dy = Ly/Ny
XX,YY=np.meshgrid(np.arange(0,Lx+dx,dx),np.arange(0,Ly+dy,dy))
points=np.vstack((XX.ravel(),YY.ravel())).T
#np.random.shuffle(points)
tri = Delaunay(points)
mesh0['Pts']=np.copy(points).astype(np.float32)
mesh0['Tria']=np.copy(tri.simplices).astype(int)
return(mesh0)
def eval_connectivity(mesh0):
print('computing mesh connectivity')
npt=mesh0['Pts'].shape[0]
connectivity = {}
for jpt in range(npt):
connectivity[jpt] = []
for Tria in mesh0['Tria']:
for ilpt in range(3):
iglobalPt=Tria[ilpt]
for jlpt in range(1+ilpt,3):
jglobalPt=Tria[jlpt]
connectivity[iglobalPt].append(jglobalPt)
connectivity[jglobalPt].append(iglobalPt)
for key,value in connectivity.items():
connectivity[key]=np.unique(np.array(value,dtype=int))
return(connectivity)
def eval_local_mass(mesh0,iTri):
lmass = np.zeros(shape=(3,3),dtype=np.float32)
Tria=mesh0['Tria'][iTri]
v10 = mesh0['Pts'][Tria[1],:]-mesh0['Pts'][Tria[0],:]
v20 = mesh0['Pts'][Tria[2],:]-mesh0['Pts'][Tria[0],:]
N12 = np.cross(v10,v20)
Tsurf = 0.5*np.linalg.norm(N12)
for ipt in range(3):
lmass[ipt,ipt]=1.0/12.0
for jpt in range(1+ipt,3):
lmass[ipt,jpt] = 1.0/24.0
lmass[jpt,ipt] = lmass[ipt,jpt]
lmass = 2.0*Tsurf*lmass
return(lmass)
def eval_local_stiffness(mesh0,iTri):
Tria = mesh0['Tria'][iTri]
v10 = mesh0['Pts'][Tria[1],:]-mesh0['Pts'][Tria[0],:]
v20 = mesh0['Pts'][Tria[2],:]-mesh0['Pts'][Tria[0],:]
N12 = np.cross(v10,v20)
Tsurf = 0.5*np.linalg.norm(N12)
covbT = np.zeros(shape=(3,3),dtype=np.float32)
covbT[0,:2] = v10
covbT[1,:2] = v20
covbT[2,2] = N12/(2*Tsurf)
contrb = np.linalg.inv(covbT)
v1 = contrb[:,0]
v2 = contrb[:,1]
a = np.dot(v1,v1)
b = np.dot(v1,v2)
c = np.dot(v2,v2)
gij_c = np.array([[a,b],[b,c]],dtype=np.float32)
lgrad = np.array([[-1.0,1.0,0.0], [-1.0,0.0,1.0] ],dtype=np.float32)
lstif = Tsurf*np.matmul( np.matmul(lgrad.T,gij_c), lgrad )
return(lstif)
def compute_vectors_sparse_matrices(mesh0):
npt = mesh0['Pts'].shape[0]
connect = eval_connectivity(mesh0)
nzero = 0
for key,value in connect.items():
nzero += (1+value.shape[0])
I = np.zeros(shape=(nzero),dtype=int)
J = np.zeros(shape=(nzero),dtype=int)
VM = np.zeros(shape=(nzero),dtype=np.float32)
VS = np.zeros(shape=(nzero),dtype=np.float32)
k0 = np.zeros(shape=(npt+1),dtype=int)
k0[0] = 0
k = -1
for jpt in range(npt):
loc_con = connect[jpt].tolist()[:]
loc_con.append(jpt)
loc_con = np.sort(loc_con)
k0[jpt+1]=k0[jpt]+loc_con.shape[0]
for jloc in range(loc_con.shape[0]):
k=k+1
I[k]= jpt
J[k]= loc_con[jloc]
for iTr, Tria in enumerate(mesh0['Tria']):
lstiff = eval_local_stiffness(mesh0,iTr)
lmass = eval_local_mass(mesh0,iTr)
for iEntry,irow in enumerate(Tria):
loc_con = connect[irow].tolist()[:]
loc_con.append(irow)
loc_con = np.sort(loc_con)
for jEntry,jcol in enumerate(Tria):
indexEntry = k0[irow]+np.where(loc_con==jcol)[0]
VM[indexEntry] = VM[indexEntry]+lmass[iEntry,jEntry]
VS[indexEntry] = VS[indexEntry]+lstiff[iEntry,jEntry]
return(I,J,VM,VS)
def compute_global_sparse_matrices(mesh0):
I,J,VM,VS = compute_vectors_sparse_matrices(mesh0)
npt = mesh0['Pts'].shape[0]
MASS = coo_matrix((VM,(I,J)),shape=(npt,npt))
STIFF = coo_matrix((VS,(I,J)),shape=(npt,npt))
return(MASS,STIFF)
def compute_global_sparse_tensors(mesh0):
I,J,VM,VS = compute_vectors_sparse_matrices(mesh0)
npt = mesh0['Pts'].shape[0]
indices = np.hstack([I[:,np.newaxis], J[:,np.newaxis]])
MASS = tf.sparse.SparseTensor(indices=indices, values=VM.astype(np.float32), dense_shape=[npt, npt])
STIFF = tf.sparse.SparseTensor(indices=indices, values=VS.astype(np.float32), dense_shape=[npt, npt])
return(MASS,STIFF)
def compute_matrices_scipy(mesh0):
MASS,STIFF = compute_global_sparse_matrices(mesh0)
return(MASS,STIFF)
def compute_matrices_tensorflow(mesh0):
MASS,STIFF = compute_global_sparse_tensors(mesh0)
return(MASS,STIFF)
def conjgrad_scipy(A,b,x0,niter=100,toll=1.e-5):
x = np.copy(x0)
r = b - A * x
p = np.copy(r)
rsold = np.dot(r,r)
for it in range(niter):
Ap = A * p
alpha = rsold /np.dot(p,Ap)
x += alpha * p
r -= alpha * Ap
rsnew = np.dot(r,r)
if (np.sqrt(rsnew) < toll):
break
p = r + (rsnew / rsold) * p
rsold = rsnew
return([x,it,np.sqrt(rsnew)])
def conjgrad_tensorflow(A,b,x0,niter=100,toll=1.e-5):
x = x0
r = b - tf.sparse.sparse_dense_matmul(A,x)
p = r
rsold = tf.reduce_sum(tf.multiply(r, r))
for it in range(niter):
Ap = tf.sparse.sparse_dense_matmul(A,p)
alpha = rsold /tf.reduce_sum(tf.multiply(p, Ap))
x += alpha * p
r -= alpha * Ap
rsnew = tf.reduce_sum(tf.multiply(r, r))
if (tf.sqrt(rsnew) < toll):
break
p = r + (rsnew / rsold) * p
rsold = rsnew
return([x,it,tf.sqrt(rsnew)])
mesh = create_mesh(Lx=10,Ly=10,Nx=100,Ny=100)
x0 = tf.constant( (mesh['Pts'][:,0]<5 ).astype(np.float32) )
nit_time = 10
dcoef = 1.0
maxit = x0.shape[0]//2
stoll = 1.e-6
print('nb of nodes:\t{}'.format(mesh['Pts'].shape[0]))
print('nb of trias:\t{}'.format(mesh['Tria'].shape[0]))
t0 = time()
MASS0,STIFF0 = compute_matrices_scipy(mesh)
elapsed_scipy=time()-t0
print('Matrices; elapsed: {:3.5f} s'.format(elapsed_scipy))
A = MASS0+dcoef*STIFF0
x = np.copy(np.squeeze(x0.numpy()) )
t0 = time()
for jt in range(nit_time):
b = MASS0*x
x1,it,tol=conjgrad_scipy(A,b,x,niter=maxit,toll=stoll)
x=np.copy(x1)
print('time {}; iters {}; resid: {:3.2f}'.format(1+jt,it,tol) )
elapsed_scipy=time()-t0
print('elapsed, scipy: {:3.5f} s'.format(elapsed_scipy))
t0 = time()
MASS,STIFF =compute_matrices_tensorflow(mesh)
elapsed=time()-t0
print('Matrices; elapsed: {:3.5f} s'.format(elapsed))
x = None
x1 = None
A = tf.sparse.add(MASS,tf.sparse.map_values(tf.multiply, STIFF, dcoef))
x = tf.expand_dims(tf.identity(x0),axis=1)
t0 = time()
for jt in range(nit_time):
b = tf.sparse.sparse_dense_matmul(MASS,x)
x1,it,tol=conjgrad_tensorflow(A,b,x,niter=maxit,toll=stoll)
x = x1
print('time {}; iters {}; resid: {:3.2f}'.format(1+jt,it,tol) )
elapsed_tf=time()-t0
print('elapsed, tf: {:3.2f} s'.format(elapsed_tf))
print('elapsed times:')
print('scipy: {:3.2f} s\ttf: {:3.2f} s'.format(elapsed_scipy,elapsed_tf))
What is the most efficient implementation of a scalable autonomous tridiagonal system using JAX?
import functools as ft
import jax as jx
import jax.numpy as jnp
import jax.random as jrn
import jax.lax as jlx
def make_T(m):
# Create a psuedo-random tridiagonal Jacobian and store band
T = jnp.zeros((3,m), dtype='f8')
T = T.at[0, 1: ].set(jrn.normal(jrn.PRNGKey(0), shape=(m-1,)))
T = T.at[1, : ].set(jrn.normal(jrn.PRNGKey(1), shape=(m ,)))
T = T.at[2, :-1].set(jrn.normal(jrn.PRNGKey(2), shape=(m-1,)))
return T
def make_y(m):
# Create a pseudo-random state array
y = jrn.normal(jrn.PRNGKey(3), shape=(m ,))
return y
def calc_f_base(y, T):
# Calculate the rate given the current state
f = T[1,:]*y
f = f.at[ 1: ].set(f[ 1: ]+T[0, 1: ]*y[ :-1])
f = f.at[ :-1].set(f[ :-1]+T[2, :-1]*y[ 1: ])
return f
m = 2**22 # potentially exhausts resources
T = make_T(m)
y = make_y(m)
calc_f = ft.partial(calc_f_base, T=T)
Using jax.jacrev or jax.jacfwd will generate a full Jacobian which limits the size of the system.
One attempt to overcome this limitation is as follows
#ft.partial(jx.jit, static_argnums=(0,))
def calc_jacfwd_trid(calc_f, y):
# Determine the Jacobian (forward-mode) tridiagonal band
def scan_body(carry, i):
t, T = carry
t = t.at[i ].set(1.0)
f, dfy = jx.jvp(calc_f, (y,), (t,))
T = T.at[2,i-1].set(dfy[i-1])
T = T.at[1,i ].set(dfy[i ])
T = T.at[0,i+1].set(dfy[i+1])
t = t.at[i-1].set(0.0)
return (t, T), None
# Initialise
m = y.size
t = jnp.zeros_like(y)
T = jnp.zeros((3,m), dtype=y.dtype)
# Differentiate wrt y[0]
t = t.at[0].set(1.0)
f, dfy = jx.jvp(calc_f, (y,), (t,))
idxs = jnp.array([1,0]), jnp.array([0,1])
T = T.at[idxs].set(dfy[0:2])
# Differentiate wrt y[1:-1]
(t, T), empty = jlx.scan(scan_body, (t,T), jnp.arange(1,m-1))
# Differentiate wrt y[-1]
t = t.at[m-2:].set(jnp.array([0.0,1.0]))
f, dfy = jx.jvp(calc_f, (y,), (t,))
idxs = jnp.array([2,1]), jnp.array([m-2,m-1])
T = T.at[idxs].set(dfy[-2:])
return T
which permits
T = jacfwd_trid(calc_f, y)
df = jrn.normal(jrn.PRNGKey(4), shape=y.shape)
dx = jlx.linalg.tridiagonal_solve(*T,df[:,None]).flatten()
Is there a better approach and/or can the time complexity of calc_jacfwd_trid be reduced further?
EDIT
The following implementation is more compact, but run times are slightly slower
#ft.partial(jx.jit, static_argnums=(0,))
def calc_jacfwd_trid_map(calc_f, y):
# Determine the Jacobian (forward-mode) tridiagonal band with lax map
def map_body(i, t):
t = t.at[i-1].set(0.0)
f, dfy = jx.jvp(calc_f, (y,), (t,))
im1 = jnp.where(i > 0, i-1, 0)
Ti = jlx.dynamic_slice(dfy, (im1,), (3,))
Ti = jnp.where(i > 0, Ti, jnp.roll(Ti, shift=+1))
Ti = jnp.where(i < m-1, Ti, jnp.roll(Ti, shift=-1))
t = t.at[i ].set(1.0)
return Ti
# Initialise
m = y.size
t = jnp.zeros_like(y)
# Differentiate wrt y[:]
T = jlx.map(lambda i : map_body(i, t=t), jnp.arange(m))
# Correct the orientation of T
T = T.transpose()
T = jnp.flip(T, axis=0)
T = T.at[0,:].set(jnp.roll(T[0,:], shift=+1))
T = T.at[2,:].set(jnp.roll(T[2,:], shift=-1))
return T
I’ve been trying to solve the water hammer PDE’s from the Maple example linked below in python (numpy/scipy). I’m getting very unstable results. Can anyone see my mistake? Guessing something is wrong with the boundary conditions.
https://www.maplesoft.com/support/help/view.aspx?path=applications/WaterHammer
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
## Parameters
Dia = 0.1
V = 14.19058741 # Stead state
p = 1000 # Liquid density
u = 0.001 # Viscosity
L = 25
e = 0.0001 # Roughness
Psource = 0.510E6
thick = 0.001
E= 7010*10**9
K=20010E6
Vsteady= 14.19058741
Ks = 1/((1/K)+(Dia/E*thick))
# Darcy-Weisbach
def Friction(V):
Rey = ((Dia*V*p)/u)
fL = 64/Rey
fT = 1/((1.8*np.log10((6.9/Rey) + (e/(3.7*Dia))**1.11))**2)
if Rey >= 0 and Rey < 2000:
return fL
if Rey >= 2000 and Rey<4000:
return fL + ((fT-fL)*(Rey-2000))/(4000-2000)
if Rey >= 4000:
return fT
return 0
def model(D, t):
V = D[:N]
P = D[N:]
dVdt = np.zeros(N)
for i in range(1, len(dVdt)-1):
dVdt[i] = -(1/p)*((P[i+1]-P[i-1])/2*dx)-((Friction(np.abs(V[i]))*(np.abs(V[i])**2))/(2*Dia))
dPdt = np.zeros(N)
for i in range(1, len(dPdt)-1):
dPdt[i] = -((V[i+1]-V[i-1])/(2*dx))*Ks
if t < 2:
dVdt[29] = 0
else:
dVdt[29] = -1
dPdt[29] = 0
dVdt[0] = dVdt[1]
return np.append(dVdt,dPdt)
N = 30
x = np.linspace(0, L, N)
dx = x[1] - x[0]
## Initial conditions
Vi_0 = np.ones(N)*Vsteady
Pi_0 = np.arange(N)
for i in Pi_0:
Pi_0[i] = Psource - (i*dx/L)*Psource
# initial condition
y0 = np.append(Vi_0, Pi_0)
# time points
t = np.linspace(0,3,10000)
# solve ODE
y = odeint(model,y0,t)
Vr = y[:,0:N]
Pr = y[:,N:]
plt.plot(t,Pr[:,5])
I am converting some code from MATLAB to Python, and I have encountered an issue I can't resolve. When iterating over the For loop in the section of code, my for loop is spitting out repeated values, that are also incorrect. I believe this has to do with my definition of "x" and "z", but I am not quite Here is my Python script and the matrices D2A1 and D2A2 are giving the repeated blocks of incorrect values.
import sys
import numpy as np
import scipy as sp
import scipy.special as scl
import numpy.matlib as mat
###
#np.set_printoptions(threshold = sys.maxsize)
##
###Constants and Parameters
w = np.array([.09,.089])
a = np.array([0,3])
coup = np.array([w[0],0])/10
dE12 = -2*w[0]
gs = np.array([0,0])
ws = w**2
alpha = a[0]*ws[0]/a[1]/ws[1]
dEp = (dE12+a[0]**2*ws[0]/2+a[1]**2*ws[1]/2)/a[1]/ws[1]
ac = np.array([0,0],dtype = 'float')
ac[0] = alpha*dEp*ws[1]/(ws[0]+alpha**2*ws[1])
ac[1] = dEp - alpha*ac[0]
iS = 0 ## starting state
z0c = gs[1]
x0c = gs[0]
Mx = 128*2
Mz = 128*2
N = 2
dt = 0.05
#Now we need grid lengths L[1x1]
Lx = 10
Lz = 10
LxT = Lx*2
LzT = Lz*2
#x0-z0 = z0[1XM] = Grod of M points from 0 to L
x0 = np.array([np.linspace(-Lx,Lx,Mx)])
z0 = np.array([np.linspace(-Lz,Lz, Mz)])
x0op = np.transpose(np.matlib.repmat(x0,Mz,1))
z0op = np.matlib.repmat(z0,Mx,1)
## For loop over matricies
VDI = np.zeros((2,2),dtype = 'complex')
D2A1 = np.zeros(((2,Mx*Mz)),dtype = 'complex')
D2A2 = D2A1
V1 = D2A1
V2 = V1
VP1 = V1
VP2 = V1
for ig in range(Mz):
for jg in range(Mx):
z = z0[0,ig]
x = x0[0,jg]
###Diabtic Matrix###
VDI[0,0] = (w[1]*z)**2/2+(w[0]*x)**2/2
VDI[1,1] = (w[1]*(z-a[1]))**2/2+(w[0]*(x-a[0]))**2/2+dE12
VDI[0,1] = coup[1]*(z+ac[1])+coup[0]*(x+ac[0])
VDI[1,0] = VDI[0,1]
###Adiabatdization###
[VDt, U] = np.linalg.eigh(VDI)
VDt = np.array(VDt).reshape(2,1)
VDt = np.diagflat(VDt)
UUdVP = np.array([U#sp.linalg.expm(-1.j*dt*VDt)#U.T])
V = U#VDt#U.T
ixz = jg+(ig-1)*Mx
D2A1[:, ixz] = np.conj((U[:,0]))
D2A2[:, ixz] = np.conj((U[:,1]))
print(D2A1)
Below is the MATLAB loop I am trying to recreate.
VDI=zeros(2,2);
D2A1=zeros(2,Mx*Mz); D2A2=D2A1; V1=D2A1; V2=V1; VP1=V1; VP2=V1;
for ig=1:Mz,
for jg=1:Mx,
z = z0(ig); x = x0(jg);
% diabatic matrix
VDI(1,1) = (w(2)*z)^2/2+(w(1)*x)^2/2;
VDI(2,2) = (w(2)*(z-a(2)))^2/2+(w(2)*(x-a(1)))^2/2+dE12;
VDI(1,2) = coup(2)*(z+ac(2))+coup(1)*(x+ac(1)); VDI(2,1)=VDI(1,2);
% adiabatization
[U,VDt]=eig(VDI) ;
[VDt Ind]=sort(diag(VDt)); U=U(:,Ind);
UUdVP=U*diag(exp(-1i*dt*VDt))*U';
V=U*diag(VDt)*U';
ixz = jg + (ig-1)*Mx;
D2A1(:,ixz) = conj(U(:,1)); D2A2(:,ixz) = conj(U(:,2));
end; end;
Any help would be greatly appreciated. Thanks!
Fixed. Error was in the definition of matrices to be generated. From what I gather in Python you must specifically define each array, while in MATLAB you can set matrix equivalences and run them through a for-loop.
I am trying to implement binary addition of 2 numbers using RNN from scratch. I solved the math correctly and implemented the model it is working fine without any errors, however it is not converging. I read a blog online, the author was using MSE for calculating cost and i am using cross-entropy. I don't why but the model is not converging.
import numpy as np
import matplotlib.pyplot as plt
from progressbar import ProgressBar
from tqdm import tqdm
from scipy.special import expit
def sigmoid_derivative(z):
return expit(z) * (1 - expit(z))
def tanh_derivative(z):
return 1 - np.tanh(z)**2
values_map = dict()
bin_dimension = 8
bin_values = np.unpackbits(np.array([range(2**bin_dimension)], dtype=np.uint8).T, axis=1)
for i in range(2**bin_dimension):
values_map[i] = bin_values[i, :]
lr = 0.1
epochs = 20000
wa = 2*np.random.random((13, 13)) - 1
wx = 2*np.random.random((13, 2)) - 1
wy = 2*np.random.random((1, 13)) - 1
d_wa = np.zeros_like(wa)
d_wx = np.zeros_like(wx)
d_wy = np.zeros_like(wy)
za, zy, aa = dict(), dict(), dict()
aa[8] = np.random.random((13, 1))
daprev = np.zeros_like(aa[8])
preds = np.zeros((1, bin_dimension))
for _ in tqdm(range(epochs)):
a = np.random.randint(2**bin_dimension/2)
b = np.random.randint(2**bin_dimension/2)
c = a + b
a = values_map[a]
b = values_map[b]
c = values_map[c]
for t in range(bin_dimension)[::-1]:
x = np.array([[a[t]], [b[t]]])
y = np.array([[c[t]]])
za[t] = np.matmul(wa, aa[t+1]) + np.matmul(wx, x)
aa[t] = np.tanh(za[t])
zy[t] = np.matmul(wy, aa[t])
preds[:, t] = expit(zy[t])
for t in range(bin_dimension):
x = np.array([[a[t]], [b[t]]])
error = preds[:, t] - np.array([[c[t]]]).astype(np.int)
da = (wa.T#daprev + wy.T#error) * tanh_derivative(za[t])
d_wy += error#aa[t].T
d_wx += da#x.T
d_wa += da#aa[t+1].T
daprev = da
wa -= d_wa*lr
wy -= d_wy*lr
wx -= d_wx*lr
print(np.packbits((np.where(preds>.5, 1, 0)).astype(np.int)))
print(np.packbits(c))