Solving a 9x9 matric with gausian emlinination with pivoting in python - python

Im needing to solve a whole range of 8x8 and 9x9 matrices so thought I could build a python program to make the whole thing easier.
So far I have managed to create:
from __future__ import division
import numpy as np
def solveEqns(A,v):
def lu( A ):
#Factor A into LU by Gaussian elimination with scaled partial pivoting
n, m = np.shape( A )
if n != m:
print "Error: input matrix is not square"
return None
# Generate initial index vector
p = range( n )
# Determine the largest (in magnitude) element in each row. These
# factors are used to scale the pivot elements for comparison purposes
# when deciding which row to use as a pivot row.
s = [0] * n
for i in xrange( n ):
smax = 0.0
for j in xrange( n ):
smax = max( smax, abs( A[i][j] ) )
s[i] = smax
# Begin Gaussian elimination.
for k in xrange( n - 1 ):
# Find the remaining row with the largest scaled pivot.
rmax = 0.0
for i in xrange( k, n ):
r = abs( A[p[i][k]] / s[p[i]] )
if r > rmax:
rmax = r
j = i
# Row j has the largest scaled pivot, so "swap" that row with the
# current row (row k). The swap is not actually done by copying rows,
# but by swaping two entries in an index vector.
p[j], p[k] = ( p[k], p[j] )
# Now carry out the next elimination step as usual, except for the
# added complication of the index vector.
for i in xrange( k + 1, n ):
xmult = A[p[i],k] / A[p[k],k]
A[p[i],k] = xmult
for j in xrange( k + 1, n ):
A[p[i],j] = A[p[i],j] - xmult * A[p[k],j]
# All done, return factored matrix A and permutation vector p
return ( A, p )
def solve( A, p, b ):
#Solves Ax = b given an LU factored matrix A and permuation vector p
n, m = np.shape( A )
if n != m:
print "Error: input matrix is not square"
return None
# Forward solve
x = np.zeros( n )
for k in xrange( n - 1 ):
for i in xrange( k + 1, n ):
b[p[i]] = b[p[i]] - A[p[i],k] * b[p[k]]
# Backward solve
for i in xrange( n - 1, -1, -1 ):
sum = b[p[i]]
for j in xrange( i + 1, n ):
sum = sum - A[p[i],j] * x[j]
x[i] = sum / A[p[i],i]
# All done, return solution vector
return x
lu(A)
return solve(A,p,v)
def circuit():
A = np.array([[1,0,0,0,0,8,0,0,0],[0,1,0,0,5,0,0,0,0],[0,1,0,0,5,0,0,0,0],[0,0,0,1,-1,1,0,0,0],[0,0,1,0,0,0,1,-1,0],[0,0,1,0,0,0,1,0,-1],[0,1,0,0,-1,0,0,0,1],[1,0,0,0,0,-1,0,1,0],[1,-1,0,1,0,0,0,0,0]])
v = np.array([9,-12,-0.5,0,0,0,0,0,0])
I = solveEqns(A,v)
return I
to solve the 9x9 matrix A at the end. This is one of the easier ones i need to solve so can solve it outside of python to check if the results coming through are accurate.
Im getting a traceback error on line 26 of:
Traceback (most recent call last):
File "<ipython-input-110-6daf773db1e3>", line 1, in <module>
solveEqns(A,b)
File "C:/Users/SamMc/Documents/Python Scripts/q6u1510416 v4.py", line 65, in solveEqns
lu(A)
File "C:/Users/SamMc/Documents/Python Scripts/q6u1510416 v4.py", line 26, in lu
r = abs( A[p[i][k]] / s[p[i]] )
TypeError: 'int' object has no attribute '__getitem__'
which i cant figure out why its not pulling through a number from the matrix.
Any help would be greatly appreciated.
Thanks
Sam

you might use gauss elimination via scaled pivoting. the code is shown below.
import numpy as np
def gauss_pivot(a,b,tol=1.0e-12):
"""
x = gaussPivot(a,b,tol=1.0e-12).
Solves [a]{x} = {b} by Gauss elimination with
scaled row pivoting
"""
a = np.copy(a)
b = np.copy(b)
n = len(b)
assert (np.all(np.shape(a) ==(n,n))) # check if a is a square matrix
# Set up scale factors
s = np.zeros(n)
for i in range(n):
s[i] = max(np.abs(a[i,:])) # find the max of each row
for k in range(0, n-1): #pivot row
# Row interchange, if needed
p = np.argmax(np.abs(a[k:n,k])/s[k:n]) # find which row has max item for each col k, and scale by s
if abs(a[p,k]) < tol:
raise Exception("Matrix is singular")
if p != k: # swap rows if current row does not contain max item with the one contains max item within same col
a[[k,p+k],:] = a[[p+k, k],:]
b[k],b[p+k] = b[p+k],b[k]
s[k],s[p+k] = s[p+k],s[k]
# Elimination phase of matrix a
for i in range(k+1,n):
if a[i,k] != 0.0: # skip if a(i,k) is already zero
lam = a [i,k]/a[k,k]
a[i,k:n] = a[i,k:n] - lam*a[k,k:n]
b[i] = b[i] - lam*b[k]
if abs(a[n-1,n-1]) < tol:
raise Exception("Matrix is singular")
# Back substitution phase, solution is substituted by b
x = np.zeros_like(b)
x[n-1] = b[n-1]/a[n-1,n-1]
for k in range(n-2,-1,-1):
x[k] = (b[k] - np.dot(a[k,k+1:n],x[k+1:n]))/a[k,k]
return x
a = np.random.randn(100,100)*10
b = np.random.randn(100)*10
x = gauss_pivot(a,b)
if np.allclose(np.dot(a,x), b) == True:
print("x is the correct solution")
If you want the code to perform faster you might probably replace x by b, so upon function return b contains the solution.
you might also slightly modify elimination phase so elements of matrix a below diagonal are not zeroed, since there are irrelevant during back substitution phase. Therefore, the code becomes as shown below:
import numpy as np
def gauss_pivot(a,b,tol=1.0e-12):
"""
x = gaussPivot(a,b,tol=1.0e-12).
Solves [a]{x} = {b} by Gauss elimination with
scaled row pivoting
"""
a = np.copy(a)
b = np.copy(b)
n = len(b)
assert (np.all(np.shape(a) ==(n,n))) # check if a is a square matrix
# Set up scale factors
s = np.zeros(n)
for i in range(n):
s[i] = max(np.abs(a[i,:])) # find the max of each row
for k in range(0, n-1): #pivot row
# Row interchange, if needed
p = np.argmax(np.abs(a[k:n,k])/s[k:n]) # find which row has max item for each col k, and scale by s
if abs(a[p,k]) < tol:
raise Exception("Matrix is singular")
if p != k: # swap rows if current row does not contain max item with the one contains max item within same col
a[[k,p+k],:] = a[[p+k, k],:]
b[k],b[p+k] = b[p+k],b[k]
s[k],s[p+k] = s[p+k],s[k]
# Elimination phase of matrix a
for i in range(k+1,n):
if a[i,k] != 0.0: # skip if a(i,k) is already zero
lam = a [i,k]/a[k,k]
a[i,k+1:n] = a[i,k+1:n] - lam*a[k,k+1:n]
b[i] = b[i] - lam*b[k]
if abs(a[n-1,n-1]) < tol:
raise Exception("Matrix is singular")
# Back substitution phase, solution is substituted by b
b[n-1] = b[n-1]/a[n-1,n-1]
for k in range(n-2,-1,-1):
b[k] = (b[k] - np.dot(a[k,k+1:n],b[k+1:n]))/a[k,k]
return b
To use LU decomposition instead which is more ideal for b containing more than one column, the LU code is shown below
import numpy as np
def lu_decomp(a,tol=1.0e-9):
a = np.copy(a)
n = len(a)
assert (np.all(np.shape(a) ==(n,n))) # check if a is a square matrix
seq = np.arange(n, dtype=int)
s = np.zeros((n))
for i in range(n):
s[i] = max(abs(a[i,:]))
for k in range(0,n-1):
p = np.argmax(np.abs(a[k:n,k])/s[k:n])
if abs(a[p,k]) < tol:
raise Exception("Matrix is singular")
if p != k:
a[[k,p+k],:] = a[[p+k, k],:]
s[k],s[p+k] = s[p+k],s[k]
seq[k], seq[p+k] = seq[p+k],seq[k]
# Elimination
for i in range(k+1,n):
if a[i,k] != 0.0:
lam = a[i,k]/a[k,k]
a[i,k+1:n] = a[i,k+1:n] - lam*a[k,k+1:n]
a[i,k] = lam
return a,seq
def lu_solve(a,b,seq):
n = len(a)
x = b.copy()
for i in range(n):
x[i] = b[seq[i]]
# Solution
for k in range(1,n):
x[k] = x[k] - np.dot(a[k,0:k],x[0:k])
x[n-1] = x[n-1]/a[n-1,n-1]
for k in range(n-2,-1,-1):
x[k] = (x[k] - np.dot(a[k,k+1:n],x[k+1:n]))/a[k,k]
return x
a2 = np.random.randn(500,500)*100
b2 = np.random.randn(500,20)*100
a_decomposed, seq = lu_decomp(a2)
x2 = np.zeros_like(b2)
for col in range(b2.shape[1]):
x2[:,col] = lu_solve(a_decomposed, b2[:, col], seq)
if np.allclose(np.dot(a2,x2), b2) == True:
print("x2 is the correct solution")
Both methods gives the the output,
Gauss Elimination
x is the correct solution
LU method
x2 is the correct solution
I recommend you use scipy linalg package, from scipy.linalg import solve, lu_factor, lu_solve.
They perform way faster for large matrix size. you can use the same code above but annotate them with numba jit so for large matrix the performance is way better.
from numba import jit
#jit
def gauss_pivot(a, b):
...
...
acknowledgement: codes inspired from the book numerical methods in science and engineering with Python by Prof. Jaan Kiusalaas
https://www.amazon.co.uk/Numerical-Methods-Engineering-Python-3/dp/1107033853/ref=sr_1_1?ie=UTF8&qid=1517845946&sr=8-1&keywords=numerical+method+in+science+and+engineering+with+python

Related

Can I specify a function in python this way or will my results be corrupted

I am trying to bring something from an R package by Lotze & Loecher into python as all of the rest of the project is.
Here is the python code:
def dbeta(x,shape1,shape2):
from scipy.stats import beta
result=beta.pdf(x=x,a=shape1,b=shape2,loc=0,scale=1)
return result
def pbeta(q,shape1,shape2):
from scipy.stats import beta
result=beta.cdf(x=q,a=shape1,b=shape2,loc=0,scale=1)
return result
def combinations(array, tuple_length, prev_array=[]):
if len(prev_array) == tuple_length:
return [prev_array]
combs = []
for f, val in enumerate(array):
prev_array_extended = prev_array.copy()
prev_array_extended.append(val)
combs += combinations(array[f+1:], tuple_length, prev_array_extended)
return combs
After defining these functions here I have the problem with too many iterations in the function I think:
from scipy.integrate import quad
def best_binominal_bandit(x, n, alpha=1, beta=1):
ans = []
# x = vector of number of successes
# n = vector of number of tries
k = len(x)
l = list(range(0,k))
b = combinations(l, k-1)
for i in l:
indx = b[i]
def f(z):
r = dbeta(z, x[i] + alpha, n[i] - x[i] + beta)
for j in indx:
r = r*pbeta(z, x[j] + alpha, n[j] - x[j] + beta)
return r
a = quad(f, 0, 1)[0]
ans.append(a)
return ans
So when calling
x = [10,20,30,50]
n = [100,102,120,130]
best_binominal_bandit(x, n)
I do not receive similar results as they do in their specification. I have the feeling that there are just more iterations in the f function. After all, the bottom line should add up to 1.
I came up with a solution. I will post it here and then close the question. Seems like I misread something and made an easy mistake (that kept vexing me however).
The combination function is not necessary for what I intended, rather the ith element of the list simply needs to be excluded.
def dbeta(x,shape1,shape2):
from scipy.stats import beta
result=beta.pdf(x=x,a=shape1,b=shape2,loc=0,scale=1)
return result
def pbeta(q,shape1,shape2):
from scipy.stats import beta
result=beta.cdf(x=q,a=shape1,b=shape2,loc=0,scale=1)
return result
from scipy.integrate import quad
def best_binominal_bandit(x, n, alpha=1, beta=1):
ans = []
k = len(x)
l = list(range(0,k))
for i in l:
excluded_index = i
indx = l[:excluded_index] + l[excluded_index+1:]
def f(z):
r = dbeta(z, x[i] + alpha, n[i] - x[i] + beta)
print(z)
for j in indx:
r = r*pbeta(z, x[j] + alpha, n[j] - x[j] + beta)
return r
a = quad(f, 0, 1)[0]
ans.append(a)
return ans
All in all this seems to work for a python translation of: p. 648 in
Scott, S. L. (2010). A modern Bayesian look at the multi-armed bandit. Applied Stochastic Models in Business and Industry, 26(6), 639–658. doi:10.1002/asmb.874

Matrix determinant

I have this function to get determinant of matrix
def determinant(self) -> int:
"""
Calculates the Determinant of matrix objects.
Parameters
----------
self
Returns
-------
int
Example
-------
>>> _matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
>>> _matrix = Matrix(_matrix)
>>> _matrix.determinant()
0
"""
if self.row != self.column:
raise ValueError('Cannot get determinant of this matrix! Must be a square Matrix')
else:
def det(matrix):
row = len(matrix)
col = len(matrix[0])
if (row, col) == (1, 1):
return matrix[0][0]
# hard coding for 2x2
elif (row, col) == (2, 2):
return matrix[0][0] * matrix[1][1] - matrix[0][1] * matrix[1][0]
# using sarrus method to solve for 3x3, it's a little faster.
elif (row, col) == (3, 3):
matrix1 = matrix[:]
# Extending matrix to use Sarrus Rule.
for i in range(row - 1):
_col = []
for j in range(col):
_col.append(matrix1[i][j])
matrix1.append(_col)
# Calculating Determinant
# Adding part
add_pointers = [(i, i) for i in range(row)]
result = 0
for pointer in range(row):
temp = 1
for tup in add_pointers:
i, j = tup
temp *= matrix1[i + pointer][j]
result += temp
# Subtracting part
sub_pointers = [((row - 1) - i, 0 + i) for i in range(row)]
for pointers in range(row):
temp = 1
for tup in sub_pointers:
i, j = tup
temp *= matrix1[i + pointers][j]
result -= temp
return result
else:
sign = -1
result = 0
row1 = [matrix[0][i] * (sign ** i) for i in range(col)]
for x, y in enumerate(row1):
mat = matrix[:][1:]
sub_matrix = [[mat[i][j] for j in range(col) if j != x] for i in range(row - 1)]
result += y * det(sub_matrix)
return result
return det(self.matrix)
i have hard-coded determinant of 2x2 and 3x3 matrix, then im recusing through the rest
as u can see its using recursion of nxn matrix(s)... i'm sure there is a faster way, this is extremely slow
A python implementation of the method would be recommended, thank you
The most common best ways would be either list comprehension or the numpy module.
Reason: The for loops will almost certainly be slower than a numpy array simply because of the contiguous and homogeneous nature of a numpy array. In simple terms numpy is basically one memory block all of the same type, where as a list points to different memory blocks and can contain any type.
Here is the numpy example (for 2d):
import numpy as np
a = np.array([[1, 2], [3, 4]])
result = np.linalg.det(a)
print(result)
One of the comments already (correctly) points to this:
https://numpy.org/doc/stable/reference/generated/numpy.linalg.det.html
For more general larger m*n matricies, the advantages would be significant.
Find determinant for 3x3 matrix using the first row:
"""
M:
M11 M12 M13
M21 M22 M23
M31 M32 M33
detM:
M11 * det2D([ [M22, M23], [M32, M33] ]) -
M12 * det2D([ [M21, M23], [M31, M33] ]) +
M13 * det2D([ [M21, M22], [M31, M32] ])
"""
import numpy as np
def det3D(M):
a = M[0][0] * det2D(np.array([ [ M[1][1],M[1][2] ], [ M[2][1],M[2][2] ] ]))
b = M[0][1] * det2D(np.array([ [ M[1][0],M[1][2] ], [ M[2][0],M[2][2] ] ]))
c = M[0][2] * det2D(np.array([ [ M[1][0],M[1][1] ], [ M[2][0],M[2][1] ] ]))
return a - b + c
def det2D(M):
return M[0][0]*M[1,1] - M[0][1] * M[1][0]
M = [ [1,0,0], [0,2,2], [0,2,4] ]
A = det3D(M)
B = round(np.linalg.det(M))
print(A)
print(B)
print(A == B)
Output:
4
4
True
Find determinant of NxN Matrix using recursion:
Note: there are two methods for finding determinants, smartDetNxN run >35X faster than detNxN in the best case on a large matrix.
import numpy as np
# compute partial determinant terms
def terms(M, col = 1, row = 1):
return [x[:col-1] + x[col:] for x in M[0:row-1] + M[row:]]
# compute determinant using first row
def detNxN(M):
N = len(M[0])
# Recursion Base: 2x2 determenant
if (N == 2):
M = np.array(M)
return M[0][0] * M[1,1] - M[0][1] * M[1][0]
# Recursion Loop
else:
rowValues = M[:1][0]
colsSigns = [1 if (col % 2 == 0) else -1 for col in range(N)]
colsDets = [detNxN(terms(M, col + 1)) for col in range(N)]
return sum([rowValues[col] * colsSigns[col] * colsDets[col] for col in range(N)])
# compute determinant using optimum row while skipping zero value columns
def smartDetNxN(M):
N = len(M[0])
# Recursion Base: 2x2 determenant
if (N == 2):
M = np.array(M)
return M[0][0] * M[1,1] - M[0][1] * M[1][0]
# Recursion Loop
else:
# find optimun row
flatM = [len(np.flatnonzero(x)) for x in M]
row = flatM.index(min(flatM))
rowSign = 1 if (row % 2 == 0) else -1
rowValues = M[row]
# compute partial determinants
colsSigns = [1 if (col % 2 == 0) else -1 for col in range(N)]
colsDets = [smartDetNxN(terms(M, col + 1, row + 1)) if (rowValues[col] != 0) else 0 for col in range(N)]
return sum([rowValues[col] * rowSign * colsSigns[col] * colsDets[col] for col in range(N)])
# test case for matrix
def testCase(M):
print()
N1 = len(M[0])
N2 = len(M[0])
A = smartDetNxN(M)
B = round(np.linalg.det(M))
print("Matrix %ix%i:" % (N1, N2))
print("Actual detM = %d, Expected detM = %d " % (A, B))
print("Test Pass:", A == B)
# main
def main():
# Matrix 2 x 2
M1 = [[1,2,],[0,1]]
testCase(M1)
# Matrix 3 x 3
M2 = [[1,2,3],[2,1,2],[3,2,1]]
testCase(M2)
# Matrix 4 x 4
M3 = [[1,2,3,4], [2,1,0,3], [3,0,1,2], [4,0,0,1]]
testCase(M3)
# Matrix 10 x 10
M4 = [
[0,1,2,3,4,5,6,7,8,9],
[1,1,0,0,0,0,0,0,0,8],
[2,0,1,0,0,0,0,0,0,7],
[3,0,0,1,0,0,0,0,0,6],
[4,0,0,0,1,0,0,0,0,5],
[5,0,0,0,0,1,0,0,0,4],
[6,0,0,0,0,0,1,0,0,3],
[7,0,0,0,0,0,0,1,0,2],
[8,0,0,0,0,0,0,0,1,1],
[9,0,0,0,0,0,0,0,0,0],
]
testCase(M4)
main()
Output:
Matrix 2x2:
Actual detM = 1, Expected detM = 1
Test Pass: True
Matrix 3x3:
Actual detM = 8, Expected detM = 8
Test Pass: True
Matrix 4x4:
Actual detM = 20, Expected detM = 20
Test Pass: True
Matrix 10x10:
Actual detM = 999, Expected detM = 999
Test Pass: True

Efficient implementation of pairwise distances computation between observations for mixed numeric and categorical data

I am working on a data science project in which I have to compute the euclidian distance between every pair of observations in a dataset.
Since I am working with very large datasets, I have to use an efficient implementation of pairwise distances computation (both in terms of memory usage and computation time).
One solution is to use the pdist function from Scipy, which returns the result in a 1D array, without duplicate instances.
However, this function is not able to deal with categorical variables. For these, I want to set the distance to 0 when the values are the same and 1 otherwise.
I have tried to implement this variant in Python with Numba. The function takes as input the 2D Numpy array containing all the observations and a 1D array containing the types of the variables (either float64 or category).
Here is the code :
import numpy as np
from numba.decorators import autojit
def pairwise(X, types):
m = X.shape[0]
n = X.shape[1]
D = np.empty((int(m * (m - 1) / 2), 1), dtype=np.float)
ind = 0
for i in range(m):
for j in range(i+1, m):
d = 0.0
for k in range(n):
if types[k] == 'float64':
tmp = X[i, k] - X[j, k]
d += tmp * tmp
else:
if X[i, k] != X[j, k]:
d += 1.
D[ind] = np.sqrt(d)
ind += 1
return D.reshape(1, -1)[0]
pairwise_numba = autojit(pairwise)
vectors = np.random.rand(20000, 100)
types = np.array(['float64']*100)
dists = pairwise_numba(vectors, types)
This implementation is very slow despite the use of Numba. Is it possible to improve my code to make it faster ?
In case you really want numba to perform fast you need to jit the function in nopython mode, otherwise numba may fall back to object mode which is slower (and can be quite slow).
However your function cannot be compiled (as of numba version 0.43.1) in nopython mode, that's because:
the dtype argument to np.empty. np.float is simply Pythons float and will be translated by NumPy (but not numba) to np.float_. If you use numba you have to use that.
String support in numba is lacking. So the types[k] == 'float64' line will not compile.
The first issue is trivially fixe. Regarding the second issue: instead of trying to make the string comparisons work just provide a boolean array. Using a boolean array and evaluating one boolean for thruthiness will also be significantly faster than comparing up to 7 characters. Especially if it's in the innermost loop!
So it might look like this:
import numpy as np
import numba as nb
#nb.njit
def pairwise_numba(X, is_float_type):
m = X.shape[0]
n = X.shape[1]
D = np.empty((int(m * (m - 1) / 2), 1), dtype=np.float64) # corrected dtype
ind = 0
for i in range(m):
for j in range(i+1, m):
d = 0.0
for k in range(n):
if is_float_type[k]:
tmp = X[i, k] - X[j, k]
d += tmp * tmp
else:
if X[i, k] != X[j, k]:
d += 1.
D[ind] = np.sqrt(d)
ind += 1
return D.reshape(1, -1)[0]
dists = pairwise_numba(vectors, types == 'float64') # pass in the boolean array
However you can simplify the logic if you combine scipy.spatial.distances.pdist on the float types with a numba logic to count the unequal categorials:
from scipy.spatial.distance import pdist
#nb.njit
def categorial_sum(X):
m = X.shape[0]
n = X.shape[1]
D = np.zeros(int(m * (m - 1) / 2), dtype=np.float64) # corrected dtype
ind = 0
for i in range(m):
for j in range(i+1, m):
d = 0.0
for k in range(n):
if X[i, k] != X[j, k]:
d += 1.
D[ind] = d
ind += 1
return D
def pdist_with_categorial(vectors, types):
where_float_type = types == 'float64'
# calculate the squared distance of the float values
distances_squared = pdist(vectors[:, where_float_type], metric='sqeuclidean')
# sum the number of mismatched categorials and add that to the distances
# and then take the square root
return np.sqrt(distances_squared + categorial_sum(vectors[:, ~where_float_type]))
It won't be significantly faster, but it drastically simplified the logic in the numba function.
Then you can also avoid the additional array creations by passing in the squared distances to the numba function:
#nb.njit
def add_categorial_sum_and_sqrt(X, D):
m = X.shape[0]
n = X.shape[1]
ind = 0
for i in range(m):
for j in range(i+1, m):
d = 0.0
for k in range(n):
if X[i, k] != X[j, k]:
d += 1.
D[ind] = np.sqrt(D[ind] + d)
ind += 1
return D
def pdist_with_categorial(vectors, types):
where_float_type = types == 'float64'
distances_squared = pdist(vectors[:, where_float_type], metric='sqeuclidean')
return add_categorial_sum_and_sqrt(vectors[:, ~where_float_type], distances_squared)
autojit is deprecated, it is recommended to use jit instead. And almost always you should be using jit(nopython=True) which will make numba fail if something can't be lowered out of python.
Using nopython on your code reveals two problems. One is an easy fix - this line needs to refer to a specific numpy type instead of float
- D = np.empty((int(m * (m - 1) / 2), 1), dtype=np.float)
+ D = np.empty((int(m * (m - 1) / 2), 1), dtype=np.float64)
The second is your use of strings to hold type information - numba has limited support for working with strings. You could instead encode the type information in a numeric array, e.g. 0 for numeric, 1 for categorical. So an implementation could be.
#jit(nopython=True)
def pairwise_nopython(X, types):
m = X.shape[0]
n = X.shape[1]
D = np.empty((int(m * (m - 1) / 2), 1), dtype=np.float64)
ind = 0
for i in range(m):
for j in range(i+1, m):
d = 0.0
for k in range(n):
if types[k] == 0: #numeric
tmp = X[i, k] - X[j, k]
d += tmp * tmp
else:
if X[i, k] != X[j, k]:
d += 1.
D[ind] = np.sqrt(d)
ind += 1
return D.reshape(1, -1)[0]

cvxpy+ecos: problem INFEASIBLE, how to scale correctly

I have the following code:
import numpy as np
import cvxpy as cp
import math
import sys
def solve05( p, a ):
m,n,ids,inv,k = 0,len(p),{},{},0
for i in range(n):
for j in range(n):
ids[(i,j)] = k
inv[k] = (i,j)
k = k+1
# Problem data
A = np.zeros((2*n,n*n+n))
D = np.zeros((2*n,n*n+n))
b = np.zeros(2*n)
B = np.zeros(2*n)
c = np.zeros(2*n)
for j in range(n):
for i in range(n):
idx = ids[(i,j)]
A[j,idx] = 1
b[j] = 1
for i in range(n):
for j in range(n):
idx = ids[(i,j)]
A[i+n,idx] = p[j]
A[i+n,n*n+i] = -1
b[i+n] = p[i]
# Construct the problem
x = cp.Variable(n*n+n)
print("M = ",A)
print("b = ",b)
CF = 1e3
print("Now scaling M by ",CF)
A = A*CF
print(A)
b = b*CF
constraints = [0 <= x, A*x == b]
pex = x[n*n]+x[n*n+1]+x[n*n+2]+1
constraints.append(x[n*n] <= a[0]*CF)
constraints.append(x[n*n+1] <= a[1]*CF)
constraints.append(x[n*n+2] <= a[2]*CF)
constraints.append(x[n*n] >= 0.01)
constraints.append(x[n*n+1] >= 0.01)
constraints.append(x[n*n+2] >= 0.01)
ex = pex.__pow__(-1)
print("Dummy variables: ",x[n*n],x[n*n+1],x[n*n+2])
print("Objective function: ",ex)
print("[should be convex] Curvature: ",ex.curvature)
objective = cp.Minimize(ex)
prob = cp.Problem(objective,constraints)
result = prob.solve(verbose=True)
print('problem state: ', prob.status)
alpha = np.zeros((n,n))
for i in range(n):
for j in range(n):
alpha[i,j] = x.value[ids[(i,j)]]
dummy = [x.value[j] for j in range(n*n,n*n+n)]
return (x,alpha)
if __name__ == '__main__':
p = [0.0005,0.0001,0.0007]
a = [900,500,700]
n = len(a)
(sl,alpha) = solve05(p,a)
for row in alpha:
for x in row:
print("%.4f " % (x), end=" "),
print("")
It fails with "Problem UNFEASIBLE" verdict, and I am eager to know why.
Is there any way to know more? I am not a convex programming expert, so any comments on why this is a bad model is appreciated. I have also tried scaling the problem, because I thought some numerical instability may be what is causing problems, but alas.
The answer ecos+cvxpy was giving is correct. The problem is unfeasible, which can be shown by summing up all the equations and observing that the LHS is some quantity F, whereas the RHS is F+e, for some e > 0.

Select rows until rank is N

I have a matrix A (shape (P,Q)) and I need to select a set of rows I such that A[I, :] is square and invertible. Is there an algorithm that I don't know to compute it?
More in general, I need to split A in as many non-singular matrices with shape (*, Q) as possible. Is there an algorithm to compute this?
A (pointer to a) numpy implementation would be appreciated.
My current (greedy and brute-force) implementation is the following:
def independent_columns_index(A, tol=1e-05):
"""
Found here: http://stackoverflow.com/questions/13312498/how-to-find-degenerate-rows-columns-in-a-covariance-matrix
"""
A = np.array(A)
Q, R = np.linalg.qr(A)
independent = np.where(np.abs(R.diagonal()) > tol)[0]
return independent
def independent_rows_index(A, tol=1e-05):
return independent_columns_index(A.T)
def n_independent_rows_indices(A, n, tol=1e-05):
if n > A.shape[1]:
return
independent = np.empty(0, dtype=int)
next_unchecked_row = 0
need_more = n
while A.shape[0] - next_unchecked_row > need_more:
need_more = n - len(independent)
first_row = next_unchecked_row
last_row = next_unchecked_row + need_more
next_unchecked_row = last_row
indices = np.append(independent, np.arange(first_row, last_row))
subA = A[indices, :]
ret = independent_rows_index(subA)
independent = indices[ret]
if len(independent) == n:
yield independent
independent = np.empty(0, dtype=int)
def test():
A = np.random.randint(0, 100, [43, 5])
k = 20
A[-k:, :] = A[:k, :]
np.random.shuffle(A)
A[1, :] = A[0, :]
for ind in n_independent_rows_indices(A, 5):
R = A[ind, :]
print(ind)
print(np.linalg.matrix_rank(R))
if __name__ == '__main__':
test()
EDIT: question corrected after Amit's comment. Added my naive algorithm after John's comment.
EDIT: improved solution:
def n_independent_rows_indices(A, n):
if n > A.shape[1]:
return
not_used = np.arange(A.shape[0])
while True:
try:
A_ = np.array(A[not_used, :][:3*n, :]).T
_, _, P = sp.linalg.qr(A_, pivoting=True)
now_used = not_used[P[:n]]
if len(now_used) < n:
return
not_used = np.delete(not_used, P[:n], axis=0)
yield now_used
except:
return

Categories

Resources