Calculate Matrix Determinants of minors! - python

i want to caluculate Matrix determinants of minors in Python, maybe using scipy or some other package.
any suggestions?

Numpy/SciPy will do all this.
Form sub-matrices by removing rows and columns.
Calculate determinants with linalg.det().

To create the minor matrix you could use the function
def minor(M, i, j):
M = np.delete(M, i, 0)
M = np.delete(M, j, 1)
return M
With this output
np.linalg.det(M)

To create the principal minor determinants of a matrix and make the calculus for each one determinant, you would want to do this:
import numpy as np
# procedure for creating principal minor determinants
def minor(M, size):
# size can be 2x2, 3x3, 4x4 etc.
theMinor = []
for i in range(size):
clearList = []
for j in range(size):
clearList.append(M[i][j])
theMinor.append(clearList)
return theMinor
# procedure to handle the principal minor
def handleMinorPrincipals(A, n):
# A is a square Matrix
# n is number or rows and cols for A
if n == 0:
return None
if n == 1:
return A[0][0]
# size 1x1 is calculated
# we now look for other minors
for i in range(1, n):
# get the minor determinant
minDet = minor(A, i + 1)
# check if determinant is greater than 0
if np.linalg.det(minDet) > 0:
# do smth
else:
# smth else
return
Example:
[[8. 8. 0. 0. 0.]
[6. 6. 3. 0. 0.]
[0. 4. 4. 4. 0.]
[0. 0. 2. 2. 2.]
[0. 0. 0. 2. 2.]]
size = 1 -> Minor is
[8]
size = 2 -> Minor is
[[8. 8.]
[6. 6.]]
size = 3 -> Minor is
[[8. 8. 0.]
[6. 6. 3.]
[0. 4. 4]]

Related

What does "wrap" in np.pad actually do?

When I am padding a numpy 2d array with mode='wrap', the return from it make me confuse:
import numpy as np
a = np.arange(9, dtype=float).reshape(3, 3)
print(np.pad(a, [(2, 4), (1, 1)], mode='wrap'))
it return:
[[5. 3. 4. 5. 3.]
[8. 6. 7. 8. 6.]
[2. 0. 1. 2. 0.]
[5. 3. 4. 5. 3.]
[8. 6. 7. 8. 6.]
[2. 0. 1. 2. 0.]
[5. 3. 4. 5. 3.]
[8. 6. 7. 8. 6.]
[5. 3. 4. 5. 3.]]
the last row is[[5. 3. 4. 5. 3.]], but I think(expect) it should be [[2. 0. 1. 2. 0.]], that is it will repeat the given array again and again.
The fowolling code is I expect wrap should do:
def pad(array, pad_width, mode='wrap'):
"""pad_width definition is same as np.pad()"""
size = array.shape[0] + pad_width[0][0] + pad_width[0][1], array.shape[1] + pad_width[1][0] + pad_width[1][1]
if mode=='wrap':
arr = np.empty(size, dtype=array.dtype)
for i in range(size[0]):
for j in range(size[1]):
arr[i, j] = array[(i-pad_width[0][0])%array.shape[0], (j-pad_width[1][0])%array.shape[0]]
else:
print('Not define')
return
return arr
a = np.arange(9, dtype=float).reshape(3, 3)
print(pad(a, [(2, 4), (1, 1)], mode='wrap'))
return:
[[5. 3. 4. 5. 3.]
[8. 6. 7. 8. 6.]
[2. 0. 1. 2. 0.]
[5. 3. 4. 5. 3.]
[8. 6. 7. 8. 6.]
[2. 0. 1. 2. 0.]
[5. 3. 4. 5. 3.]
[8. 6. 7. 8. 6.]
[2. 0. 1. 2. 0.]]
Can someone explain what is "wrap" actually doing, thank you!
Edited(2nd):
Now, I have some feeling about what wrap did, it uses the definition of "wrap" and done by recursive to pad the array, but I still wonder what is usefulness of wrap padding?(I didn't make sense of this strange algorithm.)
Here's the definition of wrap mode :
'wrap'
Pads with the wrap of the vector along the axis.
The first values are used to pad the end and the
end values are used to pad the beginning.
And here's the code in pad method :
#array_function_dispatch(_pad_dispatcher, module='numpy')
def pad(array, pad_width, mode='constant', **kwargs):
array = np.asarray(array)
pad_width = np.asarray(pad_width)
if not pad_width.dtype.kind == 'i':
raise TypeError('`pad_width` must be of integral type.')
# Broadcast to shape (array.ndim, 2)
pad_width = _as_pairs(pad_width, array.ndim, as_index=True)
if callable(mode):
# Old behavior: Use user-supplied function with np.apply_along_axis
function = mode
# Create a new zero padded array
padded, _ = _pad_simple(array, pad_width, fill_value=0)
# And apply along each axis
for axis in range(padded.ndim):
# Iterate using ndindex as in apply_along_axis, but assuming that
# function operates inplace on the padded array.
# view with the iteration axis at the end
view = np.moveaxis(padded, axis, -1)
# compute indices for the iteration axes, and append a trailing
# ellipsis to prevent 0d arrays decaying to scalars (gh-8642)
inds = ndindex(view.shape[:-1])
inds = (ind + (Ellipsis,) for ind in inds)
for ind in inds:
function(view[ind], pad_width[axis], axis, kwargs)
return padded
# Make sure that no unsupported keywords were passed for the current mode
allowed_kwargs = {
'empty': [], 'edge': [], 'wrap': [],
'constant': ['constant_values'],
'linear_ramp': ['end_values'],
'maximum': ['stat_length'],
'mean': ['stat_length'],
'median': ['stat_length'],
'minimum': ['stat_length'],
'reflect': ['reflect_type'],
'symmetric': ['reflect_type'],
}
try:
unsupported_kwargs = set(kwargs) - set(allowed_kwargs[mode])
except KeyError:
raise ValueError("mode '{}' is not supported".format(mode)) from None
if unsupported_kwargs:
raise ValueError("unsupported keyword arguments for mode '{}': {}"
.format(mode, unsupported_kwargs))
stat_functions = {"maximum": np.amax, "minimum": np.amin,
"mean": np.mean, "median": np.median}
# Create array with final shape and original values
# (padded area is undefined)
padded, original_area_slice = _pad_simple(array, pad_width)
# And prepare iteration over all dimensions
# (zipping may be more readable than using enumerate)
axes = range(padded.ndim)
#All the other modes...
elif mode == "wrap":
for axis, (left_index, right_index) in zip(axes, pad_width):
roi = _view_roi(padded, original_area_slice, axis)
while left_index > 0 or right_index > 0:
# Iteratively pad until dimension is filled with wrapped
# values. This is necessary if the pad area is larger than
# the length of the original values in the current dimension.
left_index, right_index = _set_wrap_both(
roi, axis, (left_index, right_index))
return padded
Now you can see the difference with what you assume it was doing and what it's really doing
You can find those information in the Numpy GitHub. The code for pad is in the file arraypad.py located at numpy/numpy/lib/arraypad.py

Replace column by 0 based on probability

How to replace column in the numpy array be certain number based on probability, if it is (1,X,X) shape.
I found code to replace rows, but cannot figure out how to modify it, so it is applicable for columns replacement.
grid_example = np.random.rand(1,5,5)
probs = np.random.random((1,5))
grid_example[probs < 0.25] = 0
grid_example
Thanks!
Use:
import numpy as np
rng = np.random.default_rng(42)
grid_example = rng.random((1, 5, 5))
probs = rng.random((1, 5))
grid_example[..., (probs < 0.25).flatten()] = 0
print(grid_example)
Output
[[[0. 0.43887844 0. 0. 0.09417735]
[0. 0.7611397 0. 0. 0.45038594]
[0. 0.92676499 0. 0. 0.4434142 ]
[0. 0.55458479 0. 0. 0.6316644 ]
[0. 0.35452597 0. 0. 0.7783835 ]]]
The notation [..., (probs < 0.25).flatten()] applies the boolean indexing to the last index. More on the documentation.

Create a matrix of neighbours

I have a sparse matrix and I need to create a new neighbor matrix of each index.
Below I leave a representation of the data in the NxM matrix. For each of the elements of the matrix I need to obtain the neighbors in a section of KxK. With this information, it would generate a NMxKK matrix that contains in each row the indices of the neighboring KKs of the element.
I asked a similar question a while ago but the difference is that now the data is structured, so I can do without KdTree.
This new matrix is ​​used to calculate the distance of non-zero neighbors, and with these distances associate a weight to each neighbor, to finally estimate the desired value as a weighted average of the neighbors.
Thanks in advance!
UPDATE
I have data like the ones in the image (generated with the function generate_data) and I need to perform the following operation.
Given a filter / kernel / NxN matrix, with N being the kernel size defined by me, calculate for nonzero values the distances with respect to the central pixel. Take as an example the value 20 that is in the position (1, 8) of the image. Taking a matrix of 5x5, the nonzero values of interest are 40 (in (0, 6)), 37 (in (1, 6)) and 25 (in (3, 10)), with distances 2.23606798, 2 and 2.82842712 respectively (obtained making the Euclidean norm between the indices).
What I need to get in this step is the matrix res:
[[0. 2.23606798 2. 0. 0. ]
[0. 0. 0. 0. 0. ]
[0. 0. 1. 0. 0. ]
[0. 0. 0. 0. 0. ]
[0. 0. 0. 0. 2.82842712]]
I need to obtain the 1. in the center of the matrix too to also take into account the value where I am standing (whose distance to itself is 0.).
With these values, I get the mask with non-zero values and calculate the weights based on a Gaussian distribution:
import scipy.stats as st
mask = 0 < res
gauss = st.norm.pdf(res) # or st.norm.pdf(mask * kernel(5))
[[0. , 0.03274718, 0.05399097, 0. , 0. ],
[0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0.39894228, 0. , 0. ],
[0. , 0. , 0. , 0. , 0. ],
[0. , 0. , 0. , 0. , 0.00730688]])
total = gauss.sum() # 0.4929873057962355
Finally, with these weights, I calculate the weights and the final value of the pixel by interpolating the values.
val[1, 8] = 0.03274718 * 40 / total + 0.05399097 * 37 / total + 0.39894228 * 20 / total + 0.00730688 * 25 / total
The same thing I must do for each pixel (I guess I have to add a kernel_size padding // 2 to be able to use the whole array).
Here is my script
import matplotlib.pylab as plt
import numpy as np
import scipy.stats as st
from scipy import sparse
def generate_data(m, n, density):
s = 64 * sparse.random(m, n, density=density).A
return s.astype(np.int8)
def plot_matrix(matrix):
for (j, i), label in np.ndenumerate(s):
plt.text(i, j, label, ha='center', va='center')
plt.imshow(matrix)
plt.show()
def kernel(n):
n = n if n % 2 != 0 else n + 1
mid = n // 2
m = np.ndarray((n, n, 2))
for i in range(n):
for j in range(n):
m[i, j] = np.array([i, j])
return np.linalg.norm(m - [mid, mid], axis=2)
s = generate_data(10, 14, 0.25)
plot_matrix(s)
This was really simple, although maybe not very efficient. What I had to do was two convolutions:
In the first, it was convolucionar the Gaussian kernel with the matrix
conv_1 = convolve2d(m * mask_clean, k_gauss)
In the second, the Gaussian kernel with the mask
conv_2 = convolve2d(mask_clean, k_gauss)
In each position, conv_1 would have the sum of each value weighed by the corresponding factor of the Gaussian kernel. conv_2 would have in each position the sum of all nonzero values. The only thing left to do was divide them to get the final result
# m have the data
mask_clean = (0 < m) & (m_mean - 3*m_std < m) & (m < m_mean + 3*m_std)
# Custom function to create a gaussian kernel
k = gkern(kernlen=5, std=5//2)
k_gauss = st.norm.pdf(k)
conv_1 = convolve2d(m * mask_clean, k_gauss)
conv_2 = convolve2d(mask_clean, k_gauss)
final = conv_1 / conv_2

From list of indices to one-hot matrix

What is the best (elegant and efficient) way in Theano to convert a vector of indices to a matrix of zeros and ones, in which every row is the one-of-N representation of an index?
v = t.ivector() # the vector of indices
n = t.scalar() # the width of the matrix
convert = <your code here>
f = theano.function(inputs=[v, n], outputs=convert)
Example:
n_val = 4
v_val = [1,0,3]
f(v_val, n_val) = [[0,1,0,0],[1,0,0,0],[0,0,0,1]]
I didn't compare the different option, but you can also do it like this. It don't request extra memory.
import numpy as np
import theano
n_val = 4
v_val = np.asarray([1,0,3])
idx = theano.tensor.lvector()
z = theano.tensor.zeros((idx.shape[0], n_val))
one_hot = theano.tensor.set_subtensor(z[theano.tensor.arange(idx.shape[0]), idx], 1)
f = theano.function([idx], one_hot)
print f(v_val)[[ 0. 1. 0. 0.]
[ 1. 0. 0. 0.]
[ 0. 0. 0. 1.]]
It's as simple as:
convert = t.eye(n,n)[v]
There still might be a more efficient solution that doesn't require building the whole identity matrix. This might be problematic for large n and short v's.
There's now a built in function for this theano.tensor.extra_ops.to_one_hot.
y = tensor.as_tensor([3,2,1])
fn = theano.function([], tensor.extra_ops.to_one_hot(y, 4))
print fn()
# [[ 0. 0. 0. 1.]
# [ 0. 0. 1. 0.]
# [ 0. 1. 0. 0.]]

Autocorrelation of a multidimensional array in numpy

I have a two dimensional array, i.e. an array of sequences which are also arrays. For each sequence I would like to calculate the autocorrelation, so that for a (5,4) array, I would get 5 results, or an array of dimension (5,7).
I know I could just loop over the first dimension, but that's slow and my last resort. Is there another way?
Thanks!
EDIT:
Based on the chosen answer plus the comment from mtrw, I have the following function:
def xcorr(x):
"""FFT based autocorrelation function, which is faster than numpy.correlate"""
# x is supposed to be an array of sequences, of shape (totalelements, length)
fftx = fft(x, n=(length*2-1), axis=1)
ret = ifft(fftx * np.conjugate(fftx), axis=1)
ret = fftshift(ret, axes=1)
return ret
Note that length is a global variable in my code, so be sure to declare it. I also didn't restrict the result to real numbers, since I need to take into account complex numbers as well.
Using FFT-based autocorrelation:
import numpy
from numpy.fft import fft, ifft
data = numpy.arange(5*4).reshape(5, 4)
print data
##[[ 0 1 2 3]
## [ 4 5 6 7]
## [ 8 9 10 11]
## [12 13 14 15]
## [16 17 18 19]]
dataFT = fft(data, axis=1)
dataAC = ifft(dataFT * numpy.conjugate(dataFT), axis=1).real
print dataAC
##[[ 14. 8. 6. 8.]
## [ 126. 120. 118. 120.]
## [ 366. 360. 358. 360.]
## [ 734. 728. 726. 728.]
## [ 1230. 1224. 1222. 1224.]]
I'm a little confused by your statement about the answer having dimension (5, 7), so maybe there's something important I'm not understanding.
EDIT: At the suggestion of mtrw, a padded version that doesn't wrap around:
import numpy
from numpy.fft import fft, ifft
data = numpy.arange(5*4).reshape(5, 4)
padding = numpy.zeros((5, 3))
dataPadded = numpy.concatenate((data, padding), axis=1)
print dataPadded
##[[ 0. 1. 2. 3. 0. 0. 0. 0.]
## [ 4. 5. 6. 7. 0. 0. 0. 0.]
## [ 8. 9. 10. 11. 0. 0. 0. 0.]
## [ 12. 13. 14. 15. 0. 0. 0. 0.]
## [ 16. 17. 18. 19. 0. 0. 0. 0.]]
dataFT = fft(dataPadded, axis=1)
dataAC = ifft(dataFT * numpy.conjugate(dataFT), axis=1).real
print numpy.round(dataAC, 10)[:, :4]
##[[ 14. 8. 3. 0. 0. 3. 8.]
## [ 126. 92. 59. 28. 28. 59. 92.]
## [ 366. 272. 179. 88. 88. 179. 272.]
## [ 734. 548. 363. 180. 180. 363. 548.]
## [ 1230. 920. 611. 304. 304. 611. 920.]]
There must be a more efficient way to do this, especially because autocorrelation is symmetric and I don't take advantage of that.
For really large arrays it becomes important to have n = 2 ** p, where p is an integer. This will save you huge amounts of time. For example:
def xcorr(x):
l = 2 ** int(np.log2(x.shape[1] * 2 - 1))
fftx = fft(x, n = l, axis = 1)
ret = ifft(fftx * np.conjugate(fftx), axis = 1)
ret = fftshift(ret, axes=1)
return ret
This might give you wrap-around errors. For large arrays the auto correlation should be insignificant near the edges, though.
Maybe it's just a preference, but I wanted to follow from the definition. I personally find it a bit easier to follow that way. This is my implementation for an arbitrary nd array.
from itertools import product
from numpy import empty, roll
def autocorrelate(x):
"""
Compute the multidimensional autocorrelation of an nd array.
input: an nd array of floats
output: an nd array of autocorrelations
"""
# used for transposes
t = roll(range(x.ndim), 1)
# pairs of indexes
# the first is for the autocorrelation array
# the second is the shift
ii = [list(enumerate(range(1, s - 1))) for s in x.shape]
# initialize the resulting autocorrelation array
acor = empty(shape=[len(s0) for s0 in ii])
# iterate over all combinations of directional shifts
for i in product(*ii):
# extract the indexes for
# the autocorrelation array
# and original array respectively
i1, i2 = asarray(i).T
x1 = x.copy()
x2 = x.copy()
for i0 in i2:
# clip the unshifted array at the end
x1 = x1[:-i0]
# and the shifted array at the beginning
x2 = x2[i0:]
# prepare to do the same for
# the next axis
x1 = x1.transpose(t)
x2 = x2.transpose(t)
# normalize shifted and unshifted arrays
x1 -= x1.mean()
x1 /= x1.std()
x2 -= x2.mean()
x2 /= x2.std()
# compute the autocorrelation directly
# from the definition
acor[tuple(i1)] = (x1 * x2).mean()
return acor

Categories

Resources