I tried to implement strided convolution of a 2D array using for loop i.e
arr = np.array([[2,3,7,4,6,2,9],
[6,6,9,8,7,4,3],
[3,4,8,3,8,9,7],
[7,8,3,6,6,3,4],
[4,2,1,8,3,4,6],
[3,2,4,1,9,8,3],
[0,1,3,9,2,1,4]])
arr2 = np.array([[3,4,4],
[1,0,2],
[-1,0,3]])
def stride_conv(arr1,arr2,s,p):
beg = 0
end = arr2.shape[0]
final = []
for i in range(0,arr1.shape[0]-1,s):
k = []
for j in range(0,arr1.shape[0]-1,s):
k.append(np.sum(arr1[beg+i : end+i, beg+j:end+j] * (arr2)))
final.append(k)
return np.array(final)
stride_conv(arr,arr2,2,0)
This results in 3*3 array:
array([[ 91, 100, 88],
[ 69, 91, 117],
[ 44, 72, 74]])
Is there a numpy function or scipy function to do the same? My approach is not that good. How can I vectorize this?
Ignoring the padding argument and trailing windows that won't have enough lengths for convolution against the second array, here's one way with np.lib.stride_tricks.as_strided -
def strided4D(arr,arr2,s):
strided = np.lib.stride_tricks.as_strided
s0,s1 = arr.strides
m1,n1 = arr.shape
m2,n2 = arr2.shape
out_shp = (1+(m1-m2)//s, m2, 1+(n1-n2)//s, n2)
return strided(arr, shape=out_shp, strides=(s*s0,s*s1,s0,s1))
def stride_conv_strided(arr,arr2,s):
arr4D = strided4D(arr,arr2,s=s)
return np.tensordot(arr4D, arr2, axes=((2,3),(0,1)))
Alternatively, we can use the scikit-image built-in view_as_windows to get those windows elegantly, like so -
from skimage.util.shape import view_as_windows
def strided4D_v2(arr,arr2,s):
return view_as_windows(arr, arr2.shape, step=s)
How about using signal.convolve2d from scipy?
My approach is similar to Jason's one but using indexing.
def strideConv(arr, arr2, s):
return signal.convolve2d(arr, arr2[::-1, ::-1], mode='valid')[::s, ::s]
Note that the kernal has to be reversed. For details, please see discussion here and here. Otherwise use signal.correlate2d.
Examples:
>>> strideConv(arr, arr2, 1)
array([[ 91, 80, 100, 84, 88],
[ 99, 106, 126, 92, 77],
[ 69, 98, 91, 93, 117],
[ 80, 79, 87, 93, 61],
[ 44, 72, 72, 63, 74]])
>>> strideConv(arr, arr2, 2)
array([[ 91, 100, 88],
[ 69, 91, 117],
[ 44, 72, 74]])
I think we can do a "valid" fft convolution and pick out only those results at strided locations, like this:
def strideConv(arr,arr2,s):
cc=scipy.signal.fftconvolve(arr,arr2[::-1,::-1],mode='valid')
idx=(np.arange(0,cc.shape[1],s), np.arange(0,cc.shape[0],s))
xidx,yidx=np.meshgrid(*idx)
return cc[yidx,xidx]
This gives same results as other people's answers.
But I guess this only works if the kernel size is odd numbered.
Also I've flipped the kernel in arr2[::-1,::-1] just to stay consistent with others, you may want to omit it depending on context.
UPDATE:
We currently have a few different ways of doing 2D or 3D convolution using numpy and scipy alone, and I thought about doing some comparisons to give some idea on which one is faster on data of different sizes. I hope this won't be regarded as off-topic.
Method 1: FFT convolution (using scipy.signal.fftconvolve):
def padArray(var,pad,method=1):
if method==1:
var_pad=numpy.zeros(tuple(2*pad+numpy.array(var.shape[:2]))+var.shape[2:])
var_pad[pad:-pad,pad:-pad]=var
else:
var_pad=numpy.pad(var,([pad,pad],[pad,pad])+([0,0],)*(numpy.ndim(var)-2),
mode='constant',constant_values=0)
return var_pad
def conv3D(var,kernel,stride=1,pad=0,pad_method=1):
'''3D convolution using scipy.signal.convolve.
'''
var_ndim=numpy.ndim(var)
kernel_ndim=numpy.ndim(kernel)
stride=int(stride)
if var_ndim<2 or var_ndim>3 or kernel_ndim<2 or kernel_ndim>3:
raise Exception("<var> and <kernel> dimension should be in 2 or 3.")
if var_ndim==2 and kernel_ndim==3:
raise Exception("<kernel> dimension > <var>.")
if var_ndim==3 and kernel_ndim==2:
kernel=numpy.repeat(kernel[:,:,None],var.shape[2],axis=2)
if pad>0:
var_pad=padArray(var,pad,pad_method)
else:
var_pad=var
conv=fftconvolve(var_pad,kernel,mode='valid')
if stride>1:
conv=conv[::stride,::stride,...]
return conv
Method 2: Special conv (see this anwser):
def conv3D2(var,kernel,stride=1,pad=0):
'''3D convolution by sub-matrix summing.
'''
var_ndim=numpy.ndim(var)
ny,nx=var.shape[:2]
ky,kx=kernel.shape[:2]
result=0
if pad>0:
var_pad=padArray(var,pad,1)
else:
var_pad=var
for ii in range(ky*kx):
yi,xi=divmod(ii,kx)
slabii=var_pad[yi:2*pad+ny-ky+yi+1:1, xi:2*pad+nx-kx+xi+1:1,...]*kernel[yi,xi]
if var_ndim==3:
slabii=slabii.sum(axis=-1)
result+=slabii
if stride>1:
result=result[::stride,::stride,...]
return result
Method 3: Strided-view conv, as suggested by Divakar:
def asStride(arr,sub_shape,stride):
'''Get a strided sub-matrices view of an ndarray.
<arr>: ndarray of rank 2.
<sub_shape>: tuple of length 2, window size: (ny, nx).
<stride>: int, stride of windows.
Return <subs>: strided window view.
See also skimage.util.shape.view_as_windows()
'''
s0,s1=arr.strides[:2]
m1,n1=arr.shape[:2]
m2,n2=sub_shape[:2]
view_shape=(1+(m1-m2)//stride,1+(n1-n2)//stride,m2,n2)+arr.shape[2:]
strides=(stride*s0,stride*s1,s0,s1)+arr.strides[2:]
subs=numpy.lib.stride_tricks.as_strided(arr,view_shape,strides=strides)
return subs
def conv3D3(var,kernel,stride=1,pad=0):
'''3D convolution by strided view.
'''
var_ndim=numpy.ndim(var)
kernel_ndim=numpy.ndim(kernel)
if var_ndim<2 or var_ndim>3 or kernel_ndim<2 or kernel_ndim>3:
raise Exception("<var> and <kernel> dimension should be in 2 or 3.")
if var_ndim==2 and kernel_ndim==3:
raise Exception("<kernel> dimension > <var>.")
if var_ndim==3 and kernel_ndim==2:
kernel=numpy.repeat(kernel[:,:,None],var.shape[2],axis=2)
if pad>0:
var_pad=padArray(var,pad,1)
else:
var_pad=var
view=asStride(var_pad,kernel.shape,stride)
#return numpy.tensordot(aa,kernel,axes=((2,3),(0,1)))
if numpy.ndim(kernel)==2:
conv=numpy.sum(view*kernel,axis=(2,3))
else:
conv=numpy.sum(view*kernel,axis=(2,3,4))
return conv
I did 3 sets of comparisons:
convolution on 2D data, with different input size and different kernel size, stride=1, pad=0. Results below (color as time used for convolution repeated for 10 times):
So "FFT conv" is in general the fastest. "Special conv" and "Stride-view conv" get slow as kernel size increases, but decreases again as it approaches the size of input data. The last subplot shows the fastest method, so the big triangle of purple indicates FFT being the winner, but note there is a thin green column on the left side (probably too small to see, but it's there), suggesting that "Special conv" has advantage for very small kernels (smaller than about 5x5). And when kernel size approaches input, "stride-view conv" is fastest (see the diagonal line).
Comparison 2: convolution on 3D data.
Setup: pad=0, stride=2, input dimension=nxnx5, kernel shape=fxfx5.
I skipped computations of "Special Conv" and "Stride-view conv" when kernel size is in the mid of input. Basically "Special Conv" shows no advantage now, and "Stride-view" is faster than FFT for both small and large kernels.
One additional note: when sizes goes above 350, I notice considerable memory usage peaks for the "Stride-view conv".
Comparison 3: convolution on 3D data with larger stride.
Setup: pad=0, stride=5, input dimension=nxnx10, kernel shape=fxfx10.
This time I omitted the "Special Conv". For a larger area "Stride-view conv" surpasses FFT, and last subplots shows that the difference approaches 100 %.
Probably because as the stride goes up, the FFT approach will have more wasted numbers so the "stride-view" gains more advantages for small and large kernels.
Here is an O(N^d (log N)^d) fft-based approach. The idea is to chop up both operands into strides-spaced grids at all offsets modulo strides, do the conventional fft convolution between grids of corresponding offsets and then pointwise sum the results. It is a bit index-heavy but I'm afraid that can't be helped:
import numpy as np
from numpy.fft import fftn, ifftn
def strided_conv_2d(x, y, strides):
s, t = strides
# consensus dtype
cdt = (x[0, 0, ...] + y[0, 0, ...]).dtype
xi, xj = x.shape
yi, yj = y.shape
# round up modulo strides
xk, xl, yk, yl = map(lambda a, b: -a//b * -b, (xi,xj,yi,yj), (s,t,s,t))
# zero pad to avoid circular convolution
xp, yp = (np.zeros((xk+yk, xl+yl), dtype=cdt) for i in range(2))
xp[:xi, :xj] = x
yp[:yi, :yj] = y
# fold out strides
xp = xp.reshape((xk+yk)//s, s, (xl+yl)//t, t)
yp = yp.reshape((xk+yk)//s, s, (xl+yl)//t, t)
# do conventional fft convolution
xf = fftn(xp, axes=(0, 2))
yf = fftn(yp, axes=(0, 2))
result = ifftn(xf * yf.conj(), axes=(0, 2)).sum(axis=(1, 3))
# restore dtype
if cdt in (int, np.int_, np.int64, np.int32):
result = result.real.round()
return result.astype(cdt)
arr = np.array([[2,3,7,4,6,2,9],
[6,6,9,8,7,4,3],
[3,4,8,3,8,9,7],
[7,8,3,6,6,3,4],
[4,2,1,8,3,4,6],
[3,2,4,1,9,8,3],
[0,1,3,9,2,1,4]])
arr2 = np.array([[3,4,4],
[1,0,2],
[-1,0,3]])
print(strided_conv_2d(arr, arr2, (2, 2)))
Result:
[[ 91 100 88 23 0 29]
[ 69 91 117 19 0 38]
[ 44 72 74 17 0 22]
[ 16 53 26 12 0 0]
[ 0 0 0 0 0 0]
[ 19 11 21 -9 0 6]]
As far as I know, there is no direct implementation of convolution filter in numpy or scipy that supports stride and padding so I think it's better to use a DL package such as torch or tensorflow, then cast the final result to numpy. a torch implementation might be:
import torch
import torch.nn.functional as F
arr = torch.tensor(np.expand_dims(arr, axis=(0,1))
arr2 = torch.tensor(np.expand_dims(arr2, axis=(0,1))
output = F.conv2d(arr, arr2, stride=2, padding=0)
output = output.numpy().squeeze()
output>
array([[ 91, 100, 88],
[ 69, 91, 117],
[ 44, 72, 74]])
Convolution which supports strides and dilation. numpy.lib.stride_tricks.as_strided is used.
import numpy as np
from numpy.lib.stride_tricks import as_strided
def conv_view(X, F_s, dr, std):
X_s = np.array(X.shape)
F_s = np.array(F_s)
dr = np.array(dr)
Fd_s = (F_s - 1) * dr + 1
if np.any(Fd_s > X_s):
raise ValueError('(Dilated) filter size must be smaller than X')
std = np.array(std)
X_ss = np.array(X.strides)
Xn_s = (X_s - Fd_s) // std + 1
Xv_s = np.append(Xn_s, F_s)
Xv_ss = np.tile(X_ss, 2) * np.append(std, dr)
return as_strided(X, Xv_s, Xv_ss, writeable=False)
def convolve_stride(X, F, dr=None, std=None):
if dr is None:
dr = np.ones(X.ndim, dtype=int)
if std is None:
std = np.ones(X.ndim, dtype=int)
if not (X.ndim == F.ndim == len(dr) == len(std)):
raise ValueError('X.ndim, F.ndim, len(dr), len(std) must be the same')
Xv = conv_view(X, F.shape, dr, std)
return np.tensordot(Xv, F, axes=X.ndim)
Related
I am studying image-processing using NumPy and facing a problem with filtering with convolution.
I would like to convolve a gray-scale image. (convolve a 2d Array with a smaller 2d Array)
Does anyone have an idea to refine my method?
I know that SciPy supports convolve2d but I want to make a convolve2d only by using NumPy.
What I have done
First, I made a 2d array the submatrices.
a = np.arange(25).reshape(5,5) # original matrix
submatrices = np.array([
[a[:-2,:-2], a[:-2,1:-1], a[:-2,2:]],
[a[1:-1,:-2], a[1:-1,1:-1], a[1:-1,2:]],
[a[2:,:-2], a[2:,1:-1], a[2:,2:]]])
the submatrices seems complicated but what I am doing is shown in the following drawing.
Next, I multiplied each submatrices with a filter.
conv_filter = np.array([[0,-1,0],[-1,4,-1],[0,-1,0]])
multiplied_subs = np.einsum('ij,ijkl->ijkl',conv_filter,submatrices)
and summed them.
np.sum(np.sum(multiplied_subs, axis = -3), axis = -3)
#array([[ 6, 7, 8],
# [11, 12, 13],
# [16, 17, 18]])
Thus this procedure can be called my convolve2d.
def my_convolve2d(a, conv_filter):
submatrices = np.array([
[a[:-2,:-2], a[:-2,1:-1], a[:-2,2:]],
[a[1:-1,:-2], a[1:-1,1:-1], a[1:-1,2:]],
[a[2:,:-2], a[2:,1:-1], a[2:,2:]]])
multiplied_subs = np.einsum('ij,ijkl->ijkl',conv_filter,submatrices)
return np.sum(np.sum(multiplied_subs, axis = -3), axis = -3)
However, I find this my_convolve2d troublesome for 3 reasons.
Generation of the submatrices is too awkward that is difficult to read and can only be used when the filter is 3*3
The size of the variant submatrices seems to be too big, since it is approximately 9 folds bigger than the original matrix.
The summing seems a little non intuitive. Simply said, ugly.
Thank you for reading this far.
Kind of update. I wrote a conv3d for myself. I will leave this as a public domain.
def convolve3d(img, kernel):
# calc the size of the array of submatrices
sub_shape = tuple(np.subtract(img.shape, kernel.shape) + 1)
# alias for the function
strd = np.lib.stride_tricks.as_strided
# make an array of submatrices
submatrices = strd(img,kernel.shape + sub_shape,img.strides * 2)
# sum the submatrices and kernel
convolved_matrix = np.einsum('hij,hijklm->klm', kernel, submatrices)
return convolved_matrix
You could generate the subarrays using as_strided:
import numpy as np
a = np.array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24]])
sub_shape = (3,3)
view_shape = tuple(np.subtract(a.shape, sub_shape) + 1) + sub_shape
strides = a.strides + a.strides
sub_matrices = np.lib.stride_tricks.as_strided(a,view_shape,strides)
To get rid of your second "ugly" sum, alter your einsum so that the output array only has j and k. This implies your second summation.
conv_filter = np.array([[0,-1,0],[-1,5,-1],[0,-1,0]])
m = np.einsum('ij,ijkl->kl',conv_filter,sub_matrices)
# [[ 6 7 8]
# [11 12 13]
# [16 17 18]]
Cleaned up using as_strided and #Crispin 's einsum trick from above. Enforces the filter size into the expanded shape. Should even allow non-square inputs if the indices are compatible.
def conv2d(a, f):
s = f.shape + tuple(np.subtract(a.shape, f.shape) + 1)
strd = numpy.lib.stride_tricks.as_strided
subM = strd(a, shape = s, strides = a.strides * 2)
return np.einsum('ij,ijkl->kl', f, subM)
You can also use fft (one of the faster methods to perform convolutions)
from numpy.fft import fft2, ifft2
import numpy as np
def fft_convolve2d(x,y):
""" 2D convolution, using FFT"""
fr = fft2(x)
fr2 = fft2(np.flipud(np.fliplr(y)))
m,n = fr.shape
cc = np.real(ifft2(fr*fr2))
cc = np.roll(cc, -m/2+1,axis=0)
cc = np.roll(cc, -n/2+1,axis=1)
return cc
https://gist.github.com/thearn/5424195
you must pad the filter to be the same size as image ( place it in the middle of a zeros_like mat.)
cheers,
Dan
https://laurentperrinet.github.io/sciblog/posts/2017-09-20-the-fastest-2d-convolution-in-the-world.html
Check out all the convolution methods and their respective performances here.
Also, I found the below code snippet to be simpler.
from numpy.fft import fft2, ifft2
def np_fftconvolve(A, B):
return np.real(ifft2(fft2(A)*fft2(B, s=A.shape)))
I have a stochastic process with Mmax trajectories. For each trajectory, I have to take the dot product of two matrices, A and B.
With a loop, it works great
A=np.zeros((2,Mmax),dtype=np.complex64)
B=np.zeros((2,2,Mmax),dtype=np.complex64)
C=np.zeros((2,Mmax),dtype=np.complex64)
for m in range(Mmax):
C[:,m]=B[:,:,m].dot(A[:,m])
(here are just 2x2 matrices to simplify it, when in reality they are much larger)
However, this loop is slow for a large number of trajectories. I want to optimize it by vectorizing it, and I have some problems when I try to implement it
B[:,:,:].dot(A[:,:])
It gives me the error 'shapes (2,2,10) and (2,10) not aligned: 10 (dim 2) != 2 (dim 0)', which makes sense. However, I would really need to vectorize this process, or at least optimize it as much as possible.
Is there any way to get this?
If speed is your concern, there is a way to have that multiplication non-vectorised and yet extremely fast - usually even significantly faster than that. It needs numba though:
import numpy as np
import numba as nb
#nb.njit
def mat_mul(A, B):
n, Mmax = A.shape
C = np.zeros((n, Mmax))
for m in range(Mmax):
for j in range(n):
for i in range(n):
C[j, m] += B[j, i, m]*A[i, m]
return C
Mmax = 100
A = np.ones((2, Mmax))
B = np.ones((2, 2, Mmax))
C = mat_mul(A, B)
Define sample arrays that aren't all zeros. We want to verify values as well as shapes.
In [82]: m = 5
In [83]: A = np.arange(2*m).reshape(2,m)
In [84]: B = np.arange(2*2*m).reshape(2,2,m)
Your iteration:
In [85]: C = np.zeros((2,m))
In [86]: for i in range(m):
...: C[:,i]=B[:,:,i].dot(A[:,i])
...:
In [87]: C
Out[87]:
array([[ 25., 37., 53., 73., 97.],
[ 75., 107., 143., 183., 227.]])
It's fairly easy to express that in einsum:
In [88]: np.einsum('ijk,jk->ik',B,A)
Out[88]:
array([[ 25, 37, 53, 73, 97],
[ 75, 107, 143, 183, 227]])
matmul/# is a variation on np.dot that handles 'batches' nicely. But the batch dimension has to be first (of 3). Your batch dimension, m, is last, so we have do some transposing to get the same result:
In [90]: (np.matmul(B.transpose(2,0,1),A.transpose(1,0)[...,None])[...,0]).T
Out[90]:
array([[ 25, 37, 53, 73, 97],
[ 75, 107, 143, 183, 227]])
I have an array with shape (100000,) over which I want to apply a sliding window of length 200 with a step size of 1. This means that the output array will have the shape (99800,200) - i.e., all unique chunks of length 200. I cannot find an efficient function in numpy that achieves this. I have tried:
for i in range(data.shape[0] - 200):
windows = np.append(windows , data[i:i+200]);
Which not only produces the wrong shape (1D), but it is also incredibly slow. Is there a fast function in Numpy to do this?
Try stride_tricks in numpy. It basically does not use up any extra space than the original array a, but creates a (virtual) strided array containing all the sliding windows.
def slide(a, size):
stride = a.strides[0]
n = a.size - size + 1
return np.lib.stride_tricks.as_strided(a, shape = (n, size), strides = (stride, stride))
a = np.arange(100000)
slide(a, size = 200)
>>>array([[ 0, 1, 2, ..., 197, 198, 199],
[ 1, 2, 3, ..., 198, 199, 200],
[ 2, 3, 4, ..., 199, 200, 201],
...,
[99798, 99799, 99800, ..., 99995, 99996, 99997],
[99799, 99800, 99801, ..., 99996, 99997, 99998],
[99800, 99801, 99802, ..., 99997, 99998, 99999]])
Here's a numpy answer
window_size = 10
i = np.arange(data.size - window_size + 1)
indices = np.add(np.array([np.arange(window_size)] * (data.size - window_size + 1)), i.reshape(-1, 1))
windows = data[indices]
Best function I've seen for this (non-numpy) is skimage.util.view_as_windows()
from skimage.util import view_as_windows
windows = view_as_windows(data, 200)
If you want numpy-only, the recipe in the dupe target is the most general answer, although #swag2198 suggests a more lightweight version in another answer here.
In short: I am looking for a simple numpy (maybe oneliner) implementation of Maxpool - maximum on a window on numpy.narray for all location of the window across dimensions.
In more details: I am implementing a convolutional neural network ("CNN"), one of the typical layers in such a network is MaxPool layer (look for example here). Writing
y = MaxPool(x, S), x is an input narray and S is a parameter, using pseudocode, the output of the MaxPool is given by:
y[b,h,w,c] = max(x[b, s*h + i, s*w + j, c]) over i = 0,..., S-1; j = 0,...,S-1.
That is, y is narray where the value at indexes b,h,w,c equals the maximum taken over the window of size S x S along the second and the third dimension of the input x, the window "corner" is placed at the indexes b,h,w,c.
Some additional details: The network is implemented using numpy. CNN has many "layers" where output of one layer is the input to the next layer. The input to a layers are numpy.narrays called "tensors". In my case tensors are 4-dimensional numpy.narray's, x. That is x.shape is a tuple (B,H,W,C). Each size of dimensions changes after the tensor is process by a layer, for example the input to layer i= 4 can have size B = 10, H = 24, W = 24, C = 3, while the output, aka input to i+1 layer has B = 10, H = 12, W = 12, C = 5. As indicated in the comments the size after application of MaxPool is (B, H - S + 1, W - S + 1, C).
For a concreteness: if I use
import numpy as np
y = np.amax(x, axis = (1,2))
where x.shape is say (2,3,3,4) this will give me what I want but for a degenerate case where the window I am maximizing over is of the size 3 x 3, the size of the second and third dimension of x, which is not exactly what I want.
Here's a solution using np.lib.stride_tricks.as_strided to create sliding windows resulting in a 6D array of shape : (B,H-S+1,W-S+1,S,S,C) and then simply performing max along the fourth and fifth axes, resulting in an output array of shape : (B,H-S+1,W-S+1,C). The intermediate 6D array would be a view into the input array and as such won't occupy anymore memory. The subsequent operation of max being a reduction would efficiently utilize the sliding views.
Thus, an implementation would be -
# Based on http://stackoverflow.com/a/41850409/3293881
def patchify(img, patch_shape):
a, X, Y, b = img.shape
x, y = patch_shape
shape = (a, X - x + 1, Y - y + 1, x, y, b)
a_str, X_str, Y_str, b_str = img.strides
strides = (a_str, X_str, Y_str, X_str, Y_str, b_str)
return np.lib.stride_tricks.as_strided(img, shape=shape, strides=strides)
out = patchify(x, (S,S)).max(axis=(3,4))
Sample run -
In [224]: x = np.random.randint(0,9,(10,24,24,3))
In [225]: S = 5
In [226]: np.may_share_memory(patchify(x, (S,S)), x)
Out[226]: True
In [227]: patchify(x, (S,S)).shape
Out[227]: (10, 20, 20, 5, 5, 3)
In [228]: patchify(x, (S,S)).max(axis=(3,4)).shape
Out[228]: (10, 20, 20, 3)
I am trying to find a well-performing way to calculate the standard deviation from the center of mass/gravity along an axis of a Numpy array.
In formula this is (sorry for the misalignment):
The best I could come up with is this:
def weighted_com(A, axis, weights):
average = np.average(A, axis=axis, weights=weights)
return average * weights.sum() / A.sum(axis=axis).astype(float)
def weighted_std(A, axis):
weights = np.arange(A.shape[axis])
w1com2 = weighted_com(A, axis, weights)**2
w2com1 = weighted_com(A, axis, weights**2)
return np.sqrt(w2com1 - w1com2)
In weighted_com, I need to correct the normalization from sum of weights to sum of values (which is an ugly workaround, I guess). weighted_std is probably fine.
To avoid the XY problem, I still ask for what I actually want, (a better weighted_std) instead of a better version of my weighted_com.
The .astype(float) is a safety measure as I'll apply this to histograms containing ints, which caused problems due to integer division when not in Python 3 or when from __future__ import division is not active.
You want to take the mean, variance and standard deviation of the vector [1, 2, 3, ..., n] — where n is the dimension of the input matrix A along the axis of interest —, with weights given by the matrix A itself.
For concreteness, say you want to consider these center-of-mass statistics along the vertical axis (axis=0) — this is what corresponds to the formulas you wrote. For a fixed column j, you would do
n = A.shape[0]
r = np.arange(1, n+1)
mu = np.average(r, weights=A[:,j])
var = np.average(r**2, weights=A[:,j]) - mu**2
std = np.sqrt(var)
In order to put all of the computations for the different columns together, you have to stack together a bunch of copies of r (one per column) to form a matrix (that I have called R in the code below). With a bit of care, you can make things work for both axis=0 and axis=1.
import numpy as np
def com_stats(A, axis=0):
A = A.astype(float) # if you are worried about int vs. float
n = A.shape[axis]
m = A.shape[(axis-1)%2]
r = np.arange(1, n+1)
R = np.vstack([r] * m)
if axis == 0:
R = R.T
mu = np.average(R, axis=axis, weights=A)
var = np.average(R**2, axis=axis, weights=A) - mu**2
std = np.sqrt(var)
return mu, var, std
For example,
A = np.array([[1, 1, 0], [1, 2, 1], [1, 1, 1]])
print(A)
# [[1 1 0]
# [1 2 1]
# [1 1 1]]
print(com_stats(A))
# (array([ 2. , 2. , 2.5]), # centre-of-mass mean by column
# array([ 0.66666667, 0.5 , 0.25 ]), # centre-of-mass variance by column
# array([ 0.81649658, 0.70710678, 0.5 ])) # centre-of-mass std by column
EDIT:
One can avoid creating in-memory copies of r to build R by using numpy.lib.stride_tricks: swap the line
R = np.vstack([r] * m)
above with
from numpy.lib.stride_tricks import as_strided
R = as_strided(r, strides=(0, r.itemsize), shape=(m, n))
The resulting R is a (strided) ndarray whose underlying array is the same as r's — absolutely no copying of any values occurs.
from numpy.lib.stride_tricks import as_strided
FMT = '''\
Shape: {}
Strides: {}
Position in memory: {}
Size in memory (bytes): {}
'''
def find_base_nbytes(obj):
if obj.base is not None:
return find_base_nbytes(obj.base)
return obj.nbytes
def stats(obj):
return FMT.format(obj.shape,
obj.strides,
obj.__array_interface__['data'][0],
find_base_nbytes(obj))
n=10
m=1000
r = np.arange(1, n+1)
R = np.vstack([r] * m)
S = as_strided(r, strides=(0, r.itemsize), shape=(m, n))
print(stats(r))
print(stats(R))
print(stats(S))
Output:
Shape: (10,)
Strides: (8,)
Position in memory: 4299744576
Size in memory (bytes): 80
Shape: (1000, 10)
Strides: (80, 8)
Position in memory: 4304464384
Size in memory (bytes): 80000
Shape: (1000, 10)
Strides: (0, 8)
Position in memory: 4299744576
Size in memory (bytes): 80
Credit to this SO answer and this one for explanations on how to get the memory address and size of the underlying array of a strided ndarray.