I have a very large matrix, but I only want to find the eigenvectors (more than 1) with one specific eigenvalue. How can I get this without solving the whole eigenvalues and eigenvectors of this matrix in python?
One option could be perhaps to use shift-invert method. The method eigs in scipy has an optional parameter sigma using which it is possible to specify the value close to which it should search for eigenvalues:
import numpy as np
from scipy.sparse.linalg import eigs
np.random.seed(42)
N = 10
A = np.random.random_sample((N, N))
A += A.T
A += N*np.identity(N)
#get N//2 largest eigenvalues
l,_ = eigs(A, N//2)
print(l)
#get 2 eigenvalues closest in magnitude to 12
l,_ = eigs(A, 2, sigma = 12)
print(l)
This produces:
[ 19.52479260+0.j 12.28842653+0.j 11.43948696+0.j 10.89132148+0.j
10.79397596+0.j]
[ 12.28842653+0.j 11.43948696+0.j]
EDIT:
In case you know the eigenvalues in advance, then you could try to calculate the basis of the corresponding nullspace. For example:
import numpy as np
from numpy.linalg import eig, svd, norm
from scipy.sparse.linalg import eigs
from scipy.linalg import orth
def nullspace(A, atol=1e-13, rtol=0):
A = np.atleast_2d(A)
u, s, vh = svd(A)
tol = max(atol, rtol * s[0])
nnz = (s >= tol).sum()
ns = vh[nnz:].conj().T
return ns
np.random.seed(42)
eigen_values = [1,2,3,3,4,5]
N = len(eigen_values)
D = np.matrix(np.diag(eigen_values))
#generate random unitary matrix
U = np.matrix(orth(np.random.random_sample((N, N))))
#construct test matrix - it has the same eigenvalues as D
A = U.T * D * U
#get eigenvectors corresponding to eigenvalue 3
Omega = nullspace(A - np.eye(N)*3)
_,M = Omega.shape
for i in range(0, M):
v = Omega[:,i]
print(i, norm(A*v - 3*v))
Related
I have coded Isomap function starting with computing the eulidean distance matrix (using scipy.spatial.distance.cdist), next basing on K-nearest neighbors method and Dijkstra algorithm (to determinate the shortest path) I have Computed the full distance matrix over all paths, finally I have did map computations, following by the dimensionality reduction.
BUT, I want to use epsilon instead of K-nearest neighbors like in the following :
Y = isomap (X, epsilon, d)
• X is an n × m matrix which corresponds to n points with m attributes.
• epsilon is an anonymous function of the distance matrix used to find the parameters of neighborhood. (The neighborhood graph must be formed by eliminating the edges whose width is greater to epsilon of the complete distance graph).
• d is a parameter which signifies the output dimension.
• Y is an n × d matrix, which signifies the embedding resulting from isomap.
THANKS in advance
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
def distance_Matrix(X):
return cdist(X,X,'euclidean')
def Dijkstra(h):
q = h.copy()
for i in range(ndata):
for j in range(ndata):
k = np.argmin(q[i,:])
while not(np.isinf(q[i,k])):
q[i,k] = np.inf
for l in neighbours[k,:]:
possible = h[i,l] + h[l,k]
if possible < h[i,k]:
h[i,k] = possible
k = np.argmin(q[i,:])
return h
def MDS(D,newdim=2):
n = D.shape[0]
# Torgerson formula
I = np.eye(n)
J = np.ones(D.shape)
J = I-(1/n)*J
B = (-1/2)*np.dot(np.dot(J,D),np.dot(D,J)) # B = -(1/2).JD²J
#
eigenval, eigenvec = np.linalg.eig(B)
indices = np.argsort(eigenval)[::-1]
eigenval = eigenval[indices]
eigenvec = eigenvec[:, indices]
# dimension reduction
K = eigenvec[:, :newdim]
L = np.diag(eigenval[:newdim])
# result
Y = K # L **(1/2)
return np.real(Y)
def isomap(data,newdim=2,K=12):
ndata = np.shape(data)[0]
ndim = np.shape(data)[1]
d = distance_Matrix(X)
# replace begin
# K-nearest neighbours
indices = d.argsort()
#notneighbours = indices[:,K+1:]
neighbours = indices[:,:K+1]
# replace end
h = np.ones((ndata,ndata),dtype=float)*np.inf
for i in range(ndata):
h[i,neighbours[i,:]] = d[i,neighbours[i,:]]
h = Dijkstra(h)
return MDS(h,newdim)
Try sklearn.neighbors.radius_neighbors_graph for your distance matrix
I am trying to find the elements of a matrix inverse for an ill-conditioned matrix
Consider the complex non-Hermitian matrix M, I know this matrix has one zero eigenvalue, and is therefor singular. However, I need to find the sum of the matrix elements: v#f(M)#u, where u and v are both vectors and f(x)=1/x (effectively the matrix inverse). I know that the zeroth eigenvalue does not contribute to this sum, so there is no explicit issue with the singularity. However, my code is very numerically unstable, I presume this is a consequence of an error in finding the eigenvalues of the system.
Starting by building the preliminary matrices:
import numpy as np
import scipy as sc
g0 = np.array([0,0,1])
g1 = np.array([0,1,0])
e0 = np.array([1,0,0])
sm = np.outer(g0, e0)
sp = np.outer(e0, g0)
def spre(op):
return np.kron(np.eye(op.shape[0]),op)
def spost(op):
return np.kron(op.T,np.eye(op.shape[0]))
def sprepost(op1,op2):
return np.kron(op1.T,op2)
sm_reg = spre(sm)
sp_reg = spre(sp)
spsm_reg=spre(sp#sm)
hil_dim = int(g0.shape[0])
cav_proj= np.eye(hil_dim).reshape(hil_dim**2,)
rho0 =(np.outer(e0,e0)).reshape(hil_dim**2,)
def ham(g):
return g * (np.outer(g1,e0) + np.outer(e0, g1))
def lind_op(A):
L = 2 * sprepost(A,A.conj().T) - spre(A.conj().T # A)
L += - spost(A.conj().T # A)
return L
def JC_lio(g, kappa, gamma):
unit = -1j * (spre(ham(g)) - spost(ham(g)))
lind = gamma * lind_op(np.outer(g0 , e0)) + kappa * lind_op(np.outer(g0 , g1))
return unit + lind
Now define a function that first finds the left and right eigenvalues, and then finds the sum of the matrix elements:
def power_int(g, kappa, gamma):
# Construct the non-Hermitian matrix of interest
lio = JC_lio(g,kappa,gamma)
#Find its left and right eigenvectors:
ev, left, right = scipy.linalg.eig(lio, left=True,right=True)
# Find the appropriate normalisation factors
norm = np.array([(left.conj().T[ii]).dot(right.conj().T[ii]) for ii in range(len(ev))])
#Find the similarity transformation for the problem
P = right
Pinv = (left/norm).conj().T
#find the projectors for the Eigenbasis
Proj = [np.outer(P.conj().T[ii],Pinv[ii]) for ii in range(len(ev))]
#Find the relevant matrix elements between the Eigenbasis and the projectors --- this is where the zero eigenvector gets removed
PowList = [(spsm_reg# Proj[ii] # rho0).dot(cav_proj) for ii in range(len(ev))]
#apply the function
Pow = 0
for ii in range(len(ev)):
if PowList[ii]==0:
Pow = Pow
else:
Pow += PowList[ii]/ev[ii]
return -np.pi * np.real(Pow)
#example run:
grange = np.linspace(0.001,10,40)
dat = np.array([power_int(g, 1, 1) for g in grange])
Running this code leads to extremely oscillatory results where I expect a smooth curve. I suspect this error is due to poor accuracy in determining the eigenvectors, but I can't seem to find any documentation on this. Any insights would be welcome.
I am trying to solve the inverse of a banded sparse matrix in the most efficient way so that I can incorporate this in my real-time system. I am generating sparse-banded matrices which represent a convolution operation. Currently, I am using spsolve from scipy.sparse.linalg library. I found that there is a better way by using solve_banded from the scipy.linalg library. However, solve_banded requires (l,u) which is the number of non-zero lower and upper diagonals and ab which (l + u + 1, M) array like banded matrix. I am not sure how to convert my code so that I can use solve_banded. Any help with this regard is highly appreciated.
import numpy as np
from scipy import linalg
import math
import time
from scipy.sparse import spdiags
from scipy.sparse.linalg import spsolve
def ABC(deg, fc, N):
r"""Generate sparse-banded matrices
"""
omc = 2*math.pi*fc
t = ((1-math.cos(omc))/(1+math.cos(omc)))**deg
p = 1
for k in np.arange(deg):
p = np.convolve(p,np.array([-1,1]),'full')
P = spdiags(np.kron(p,np.ones((N,1))).T, np.arange(deg+1), N-deg, N)
B = P.T.dot(P)
q = np.sqrt(t)
for k in np.arange(deg):
q = np.convolve(q,np.array([1,1]),'full')
Q = spdiags(np.kron(q,np.ones((N,1))).T, np.arange(deg+1), N-deg, N)
C = Q.T.dot(Q)
A = B + C
return A,B,C
if __name__ == '__main__':
mu = 0.1
deg = 3
wc = 0.1
for i in np.arange(1,7,1):
# some dense random vector
x = np.random.rand(10**i,1)
# generate sparse banded matrices
A,_,C = ABC(deg, wc, 10**i)
# another banded matrix
G = mu*A.dot(A.T) + C.dot(C.T)
# SCIPY SPSOLVE
st = time.time()
y = spsolve(G,x)
et = time.time()
print("SCIPY SPSOLVE: N = ", 10**i, "Time taken: ", et-st)
Results
SCIPY SPSOLVE: N = 10 Time taken: 0.0
SCIPY SPSOLVE: N = 100 Time taken: 0.0
SCIPY SPSOLVE: N = 1000 Time taken: 0.015689611434936523
SCIPY SPSOLVE: N = 10000 Time taken: 0.020943641662597656
SCIPY SPSOLVE: N = 100000 Time taken: 0.16722917556762695
SCIPY SPSOLVE: N = 1000000 Time taken: 1.7254831790924072
Solved it using solveh_banded from the scipy library. Very fast matrix inversion technique for extremely large sparse-banded matrices when the matrix is symmetric and positive definite banded matrix.
from scipy.linalg import solveh_banded
def sp_inv(A, x):
A = A.toarray()
N = np.shape(A)[0]
D = np.count_nonzero(A[0,:])
ab = np.zeros((D,N))
for i in np.arange(1,D):
ab[i,:] = np.concatenate((np.diag(A,k=i),np.zeros(i,)),axis=None)
ab[0,:] = np.diag(A,k=0)
y = solveh_banded(ab,x,lower=True)
return y
I have a varimax rotation code from wikipedia
def varimax(Phi, gamma = 1, q = 20, tol = 1e-6):
from numpy import eye, asarray, dot, sum, diag
from numpy.linalg import svd
p,k = Phi.shape
R = eye(k)
d=0
for i in xrange(q):
d_old = d
Lambda = dot(Phi, R)
u,s,vh = svd(dot(Phi.T,asarray(Lambda)**3 - (gamma/p) * dot(Lambda, diag(diag(dot(Lambda.T,Lambda))))))
R = dot(u,vh)
d = sum(s)
if d/d_old < tol: break
return dot(Phi, R)
and I use it this way:
varimax(X) ## X is a numpy array
but it returns numbers like this: 2.4243244e-15 !! that's not my expected answer
should I change other arguments? for example gamma or q??
I'm not familiar with varimax rotation
Can you post an example of what you're using as the inputs for X and what kind of outputs you're expecting?
I tested your code by fixing up the indenting in your code, like this:
from numpy import eye, asarray, dot, sum, diag
from numpy.linalg import svd
def varimax(Phi, gamma = 1, q = 20, tol = 1e-6):
p,k = Phi.shape
R = eye(k)
d=0
for i in xrange(q):
d_old = d
Lambda = dot(Phi, R)
u,s,vh = svd(dot(Phi.T,asarray(Lambda)**3 - (gamma/p) * dot(Lambda, diag(diag(dot(Lambda.T,Lambda))))))
R = dot(u,vh)
d = sum(s)
if d/d_old < tol: break
return dot(Phi, R)
And making some dummy components to test it like this:
import numpy as np
comps = np.linalg.svd(
np.random.randn(100,10),
full_matrices=False
)[0]
rot_comps = varimax(comps)
print("Original components dimension {}".format(comps.shape))
print("Component norms")
print(np.sum(comps**2, axis=0))
print("Rotated components dimension {}".format(rot_comps.shape))
print("Rotated component norms")
print(np.sum(rot_comps**2, axis=0))
The inputs and outputs are 100 x 10 arrays with unit norm, just as you'd expect.
I am trying to evaluate the density of multivariate t distribution of a 13-d vector. Using the dmvt function from the mvtnorm package in R, the result I get is
[1] 1.009831e-13
When i tried to write the function by myself in Python (thanks to the suggestions in this post:
multivariate student t-distribution with python), I realized that the gamma function was taking very high values (given the fact that I have n=7512 observations), making my function going out of range.
I tried to modify the algorithm, using the math.lgamma() and np.linalg.slogdet() functions to transform it to the log scale, but the result I got was
8.97669876e-15
This is the function that I used in python is the following:
def dmvt(x,mu,Sigma,df,d):
'''
Multivariate t-student density:
output:
the density of the given element
input:
x = parameter (d dimensional numpy array or scalar)
mu = mean (d dimensional numpy array or scalar)
Sigma = scale matrix (dxd numpy array)
df = degrees of freedom
d: dimension
'''
Num = math.lgamma( 1. *(d+df)/2 ) - math.lgamma( 1.*df/2 )
(sign, logdet) = np.linalg.slogdet(Sigma)
Denom =1/2*logdet + d/2*( np.log(pi)+np.log(df) ) + 1.*( (d+df)/2 )*np.log(1 + (1./df)*np.dot(np.dot((x - mu),np.linalg.inv(Sigma)), (x - mu)))
d = 1. * (Num - Denom)
return np.exp(d)
Any ideas why this functions does not produce the same results as the R equivalent?
Using as x = (0,0) produces similar results (up to a point, die to rounding) but with x = (1,1)1 I get a significant difference!
I finally managed to 'translate' the code from the mvtnorm package in R and the following script works without numerical underflows.
import numpy as np
import scipy.stats
import math
from math import lgamma
from numpy import matrix
from numpy import linalg
from numpy.linalg import slogdet
import scipy.special
from scipy.special import gammaln
mu = np.array([3,3])
x = np.array([1, 1])
Sigma = np.array([[1, 0], [0, 1]])
p=2
df=1
def dmvt(x, mu, Sigma, df, log):
'''
Multivariate t-student density. Returns the density
of the function at points specified by x.
input:
x = parameter (n x d numpy array)
mu = mean (d dimensional numpy array)
Sigma = scale matrix (d x d numpy array)
df = degrees of freedom
log = log scale or not
'''
p = Sigma.shape[0] # Dimensionality
dec = np.linalg.cholesky(Sigma)
R_x_m = np.linalg.solve(dec,np.matrix.transpose(x)-mu)
rss = np.power(R_x_m,2).sum(axis=0)
logretval = lgamma(1.0*(p + df)/2) - (lgamma(1.0*df/2) + np.sum(np.log(dec.diagonal())) \
+ p/2 * np.log(math.pi * df)) - 0.5 * (df + p) * math.log1p((rss/df) )
if log == False:
return(np.exp(logretval))
else:
return(logretval)
print(dmvt(x,mu,Sigma,df,True))
print(dmvt(x,mu,Sigma,df,False))