Splitting numpy array into blocks - python

I've got a 900 x 650 2D numpy array which I'd like to split into 10 x 10 blocks, which will be checked for nonzero elements. Is there a Pythonic way that I can achieve this with numpy?
I'm looking for functionality similar to the following:
blocks_that_have_stuff = []
my_array = getArray()
my_array.cut_into_blocks((10, 10))
for block_no, block in enumerate(my_array):
if numpy.count_nonzero(block) > 5:
blocks_that_have_stuff.append(block_no)

I wrote a routine that cut your matrix in blocks. The example is very easy to understand. I wrote it in an easy form to display the result (only for checking purpose). If you are interested in it, you could include in the output the number of blocks or anything.
import matplotlib.pyplot as plt
import numpy as np
def cut_array2d(array, shape):
arr_shape = np.shape(array)
xcut = np.linspace(0,arr_shape[0],shape[0]+1).astype(np.int)
ycut = np.linspace(0,arr_shape[1],shape[1]+1).astype(np.int)
blocks = []; xextent = []; yextent = []
for i in range(shape[0]):
for j in range(shape[1]):
blocks.append(array[xcut[i]:xcut[i+1],ycut[j]:ycut[j+1]])
xextent.append([xcut[i],xcut[i+1]])
yextent.append([ycut[j],ycut[j+1]])
return xextent,yextent,blocks
nx = 900; ny = 650
X, Y = np.meshgrid(np.linspace(-5,5,nx), np.linspace(-5,5,ny))
arr = X**2+Y**2
x,y,blocks = cut_array2d(arr,(10,10))
n = 0
for x,y,block in zip(x,y,blocks):
n += 1
plt.imshow(block,extent=[y[0],y[1],x[0],x[1]],
interpolation='nearest',origin='lower',
vmin = arr.min(), vmax=arr.max(),
cmap=plt.cm.Blues_r)
plt.text(0.5*(y[0]+y[1]),0.5*(x[0]+x[1]),str(n),
horizontalalignment='center',
verticalalignment='center')
plt.xlim([0,900])
plt.ylim([0,650])
plt.savefig("blocks.png",dpi=72)
plt.show()
The output is:
Regards
Note: I think you could optimize this routine using np.meshgrid instead a lot of appends with the xextent & yextent.

Related

plot multiple curves on same plot inside function

I have a following function with takes 2 arguments psi,lam and returns 1 array y.
lam=np.arange(0,1,0.1)
psi=np.deg2rad(np.arange(0,361,1))
def test(psi,lam):
y=[]
for i in range(len(lam)):
sin_psi = np.sin(psi)
cos_psi = np.cos(psi)
sin_beta = lam*sin_psi
cos_beta = np.sqrt(1.0 - sin_beta**2)
ssin_pb = sin_psi*sin_beta
y.append((lam*(cos_psi/cos_beta)**2 - ssin_pb)/cos_beta + cos_psi)
plt.plot(psi,y[i])
return y
I would like the function to return range(len(lam))=10 plots of y on the vertical axis against psi on x axis.
However, it seems to be only plotting the same curve multiple times. Not sure what I am missing?
import matplotlib.pyplot as plt
import numpy as np
lam=np.arange(0,1,0.1)
psi=np.deg2rad(np.arange(0,361,1))
def test(angle,var):
sin_psi = np.sin(psi)
cos_psi = np.cos(psi)
sin_beta = var*sin_psi
cos_beta = np.sqrt(1.0 - sin_beta**2)
ssin_pb = sin_psi*sin_beta
return ((var*(cos_psi/cos_beta)**2 - ssin_pb)/cos_beta + cos_psi)
for i in lam:
plt.plot(psi,test(psi,i))
plt.show()
I moved the variable outside of the function, this way you may also use it for other cases. The only other thing is that you should call plt.show() after you're done drawing.
Your code has several problems the main being that the return function was inside the loop interrupting it after the first iteration. Imitating your code structure as closely as possible, we can rewrite the code as:
import numpy as np
import matplotlib.pyplot as plt
def test(psi,lam):
y=[]
for curr_lam in lam:
sin_psi = np.sin(psi)
cos_psi = np.cos(psi)
sin_beta = curr_lam*sin_psi
cos_beta = np.sqrt(1.0 - sin_beta**2)
ssin_pb = sin_psi*sin_beta
val = (curr_lam * (cos_psi/cos_beta)**2 - ssin_pb)/cos_beta + cos_psi
y.append(val)
plt.plot(psi, val)
plt.show()
return y
lam=np.arange(0, 1, 0.1)
psi=np.deg2rad(np.arange(0,361,1))
y = test(psi, lam)
print(y)
Sample output:
As Johan mentioned in the comments, you should also directly iterate over list/arrays. If you need to combine arrays, use
for x1, x2 in zip(arr1, arr2):
If you absolutely need the index value, use
for i, x in enumerate(arr):

Numpy only computation of mathematical expression involving a nested sum of functions over the same array

I need help to compute a mathematical expression using only numpy operations. The expression I want to compute is the following :
Where : x is an (N, S) array and f is a numpy function (that can work with broadcastable arrays e.g np.maximum, np.sum, np.prod, ...). If that is of importance, in my case f is a symetric function.
So far my code looks like this:
s = 0
for xp in x: # Loop over N...
s += np.sum(np.prod(f(xp, x), axis=1))
And still has loop that I'd like to get rid of.
Typically N is "large" (around 30k) but S is small (less than 20) so if anyone can find a trick to only loop over S this would still be a major improvement.
I belive the problem is easy by N-plicating the array but one of size (32768, 32768, 20) requires 150Go of RAM that I don't have. However, (32768, 32768) fits in memory though I would appreciate a solution that does not allocate such array.
Maybe a use of np.einsum with well-chosen arrays is possible?
Thanks for your replies. If any information is missing let me know!
Have a nice day !
Edit 1 :
Form of f I'm interested in includes (for now) : f(x, y) = |x - y|, f(x, y) = |x - y|^2, f(x, y) = 2 - max(x, y).
Your loop is very efficient. Some possible ways are
Method-1 (looping over S)
import numpy as np
def f(x,y):
return np.abs(x-y)
N = 200
S = 20
x_data = random.rand(N,S) #(i,s)
y_data = random.rand(N,S) #(i',s)
product = f(broadcast_to(x_data[:,0][...,None],(N,N)) ,broadcast_to(y_data[:,0][...,None],(N,N)).T)
for i in range(1,S):
product *= f(broadcast_to(x_data[:,i][...,None],(N,N)) ,broadcast_to(y_data[:,i][...,None],(N,N)).T)
sum = np.sum(product)
Method-2 (dispatching S number of blocks)
import numpy as np
def f(x,y):
x1 = np.broadcast_to(x[:,None,...],(x.shape[0],y.shape[0],x.shape[1]))
y1 = np.broadcast_to(y[None,...],(x.shape[0],y.shape[0],x.shape[1]))
return np.abs(x1-y1)
def f1(x1,y1):
return np.abs(x1-y1)
N = 5000
S = 20
x_data = np.random.rand(N,S) #(i,s)
y_data = np.random.rand(N,S) #(i',s)
def fun_new(x_data1,y_data1):
s = 0
pp =np.split(x_data1,S,axis=0)
for xp in pp:
s += np.sum(np.prod(f(xp, y_data1), axis=2))
return s
def fun_op(x_data1,y_data1):
s = 0
for xp in x_data1: # Loop over N...
s += np.sum(np.prod(f1(xp, y_data1), axis=1))
return s
fun_new(x_data,y_data)

Accessing only one key in a nested list of dictionaries and plotting it with matplotlib

I have a nested list of dictionaries, created like this:
N = 30
grid = []
for row in range(N):
rows = []
for column in range(N):
each_cell = {"check": 0, "type": -1}
rows.append(each_cell)
grid.append(rows)
Type is the one that I want to plot, a value of -1 means nothing in the cell, and 0,1,2,3 are different types (not gradient), which I want to be represented by different colours.
I am putting a random number of types into the grid like this:
import numpy.random as rnd
import matplotlib.pyplot as plt
for i in range (rnd.randint(0, N*N)):
x = rnd.randint(0, N)
y = rnd.randint(0, N)
grid[x][y]['check'] = 1
if grid[x][y]['check'] == 1:
grid[x][y]['type'] = rnd.randint(0,4)
I am attempting to plot it using this:
plt.imshow(grid['type'], interpolation = 'nearest', cmap = 'gist_ncar_r')
plt.show()
But obviously the grid['type'] isn't picking out only the types like I want it to, anybody know how to fix this?
Since imshow requires an 'array-like', you can change the structure of your data to make it easier to work with. Instead of using an array of dicts, use a dict of arrays.
import numpy.random as rnd
import matplotlib.pyplot as plt
N = 30
grid = {'check': [], 'type': []}
for row in range(N):
check_rows = []
type_rows = []
for column in range(N):
check_rows.append(0)
type_rows.append(1)
grid['check'].append(check_rows)
grid['type'].append(type_rows)
for i in range (rnd.randint(0, N*N)):
x = rnd.randint(0, N)
y = rnd.randint(0, N)
grid['check'][x][y] = 1
if grid['check'][x][y] == 1:
grid['type'][x][y] = rnd.randint(0,4)
plt.imshow(grid['type'], interpolation = 'nearest', cmap = 'gist_ncar_r')
plt.show()
You can use a list comprehension to get the data you want into an array:
from numpy import *
...
data = array([[grid[i][j]['type'] for j in range(N)] for i in range(N)])
To use array you will need to do do the numpy import.
Then you can plot it like you're trying to:
matplotlib.pyplot.imshow(data, interpolation = 'nearest', cmap = 'gist_ncar_r')
matplotlib.pyplot.show()

How to index List/ numpy array in order to plot the data with matplotlib

I have a function f(x,t) = cos(t)*t + x and i want to display the change of the result over the width x and time t at discretised time steps t_i and discretised width steps x_j.
Now I am a while here on SX and feel really embarrassed to only can post such little code or in other words nothing (since nothing worked I have done...):
Nevertheless if someone has the time to help, I`d appreciate it.
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.pyplot as pyplot
from astropy.io.ascii.latex import AASTex
def func(xi, ti):
res = np.cos(ti)*ti + xi
return res
timeSpacing = 100
timeStart = 0
timeEnd = 1
time = np.linspace(timeStart, timeEnd, timeSpacing)
widthSpacing = 300
widthStart = 0
widthEnd = 3
width = np.linspace(widthStart, widthEnd, widthSpacing)
resultList = [None]*timeSpacing
resultListInner = [None]*widthSpacing
for i, ithTime in enumerate(time):
for j, jthWidth in enumerate(width):
aas = np.zeros_like(width)
aas.fill(ithTime)
resultListInner[j] = ithTime, jthWidth, func(jthWidth, aas)
resultList[i] = resultListInner
So how do I correctly index the list and array and plot my data using matplotlib?
My plot should look like this:
where in my case the aperature should be the width x, the sky annulus is my time t and the RMS is my func(x,t).
A couple of points:
Numpy provides a very nice function for doing differences of array elements: diff
Matplotlib uses plot_wireframe for creating a plot that you would want (also using Numpy's meshgrid)
Now, combining these into what you may want would look something like this.
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.pyplot as plt
def func(xi, ti):
res = np.cos(ti)*np.sin(xi)
return res
timeSpacing = 20
timeStart = 0
timeEnd = 1
time = np.linspace(timeStart, timeEnd, timeSpacing)
widthSpacing = 50
widthStart = 0
widthEnd = 3
width = np.linspace(widthStart, widthEnd, widthSpacing)
X,T = np.meshgrid(width,time)
F = func(X,T)
DF = np.diff(np.diff(F,axis=0),axis=1)
fig = plt.figure()
ax = fig.add_subplot(111,projection='3d')
ax.plot_wireframe(X[:-1,:-1],T[:-1,:-1],DF)
plt.show()
Note that diff is applied twice: once in each dimension axis= . I have also changed the toy function you provided to something that actually looks decent in this case.
For your more general use, it seems that you would want to just collect all of your F data into a 2D array, then proceed from the DF = line.

Shifting data in 2d array through shifted indices

I need to shift a 2D array field, i.e. I have a "previous_data" array which I access through shifted indices to create my "new_data" array.
I can do this in a nonpythonic (and slow) loop, but would very much appreciate some help in finding a pythonic (and faster) solution!
Any help and hints are very much appreciated!
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import mpl
def nonpythonic():
#this works, but is slow (for large arrays)
new_data = np.zeros((ny,nx))
for j in xrange(ny):
for i in xrange(nx):
#go through each item, check if it is within the bounds
#and assign the data to the new_data array
i_new = ix[j,i]
j_new = iy[j,i]
if ((i_new>=0) and (i_new<nx) and (j_new>=0) and (j_new<ny)):
new_data[j,i]=previous_data[j_new,i_new]
ef, axar = plt.subplots(1,2)
im = axar[0].pcolor(previous_data, vmin=0,vmax=2)
ef.colorbar(im, ax=axar[0], shrink=0.9)
im = axar[1].pcolor(new_data, vmin=0,vmax=2)
ef.colorbar(im, ax=axar[1], shrink=0.9)
plt.show()
def pythonic():
#tried a few things here, but none are working
#-tried assigning NaNs to indices (ix,iy) which are out of bounds, but NaN's don't work for indices
#-tried masked arrays, but they also don't work as indices
#-tried boolean arrays, but ended in shape mismatches
#just as in the nonworking code below
ind_y_good = np.where(iy>=0) and np.where(iy<ny)
ind_x_good = np.where(ix>=0) and np.where(ix<nx)
new_data = np.zeros((ny,nx))
new_data[ind_y_good,ind_x_good] = previous_data[iy[ind_y_good],ix[ind_x_good]]
#some 2D array:
nx = 20
ny = 30
#array indices:
iy, ix = np.indices((ny,nx))
#modify indices (shift):
iy = iy + 1
ix = ix - 4
#create some out of range indices (which might happen in my real scenario)
iy[0,2:7] = -9999
ix[0:3,-1] = 6666
#some previous data which is the basis for the new_data:
previous_data = np.ones((ny,nx))
previous_data[2:8,10:20] = 2
nonpythonic()
pythonic()
This is the result of the working (nonpythonic) code above:
I implemented a version of pythonic that replicates nonpythonic with some masking and index fiddling - see below. By the way I think the "new" indices should be the ones corresponding to the new array, rather than the old ones, but I've left it as in your existing function.
The main thing to realise is that in your attempt in the question, your conditions
ind_y_good = np.where(iy>=0) and np.where(iy<ny)
ind_x_good = np.where(ix>=0) and np.where(ix<nx)
must be combined, since we must always have pairs of x and y indices. i.e. if the x index is invalid, then so is the y.
Finally, if the indices are really all shifted by a constant factor, you can make this even simpler by using NumPy's roll function and taking a slice of the indices corresponding to the valid area.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import mpl
def nonpythonic(previous_data, ix, iy, nx, ny):
#this works, but is slow (for large arrays)
new_data = np.zeros((ny,nx))
for j in xrange(ny):
for i in xrange(nx):
#go through each item, check if it is within the bounds
#and assign the data to the new_data array
i_new = ix[j,i]
j_new = iy[j,i]
if ((i_new>=0) and (i_new<nx) and (j_new>=0) and (j_new<ny)):
new_data[j,i]=previous_data[j_new,i_new]
return new_data
def pythonic(previous_data, ix, iy):
ny, nx = previous_data.shape
iy_old, ix_old = np.indices(previous_data.shape)
# note you must apply the same condition to both
# index arrays
valid = (iy >= 0) & (iy < ny) & (ix >= 0) & (ix < nx)
new_data = np.zeros((ny,nx))
new_data[iy_old[valid], ix_old[valid]] = previous_data[iy[valid], ix[valid]]
return new_data
def main():
#some 2D array:
nx = 20
ny = 30
#array indices:
iy, ix = np.indices((ny,nx))
#modify indices (shift):
iy = iy + 1
ix = ix - 4
#create some out of range indices (which might happen in my real scenario)
iy[0,2:7] = -9999
ix[0:3,-1] = 6666
#some previous data which is the basis for the new_data:
previous_data = np.ones((ny,nx))
previous_data[2:8,10:20] = 2
data_nonpythonic = nonpythonic(previous_data, ix, iy, nx, ny)
data_pythonic = pythonic(previous_data, ix, iy)
new_data = data_nonpythonic
ef, axar = plt.subplots(1,2)
im = axar[0].pcolor(previous_data, vmin=0,vmax=2)
ef.colorbar(im, ax=axar[0], shrink=0.9)
im = axar[1].pcolor(new_data, vmin=0,vmax=2)
ef.colorbar(im, ax=axar[1], shrink=0.9)
plt.show()
print(np.allclose(data_nonpythonic, data_pythonic))
if __name__ == "__main__":
main()

Categories

Resources