Using nditer to sort values in arrays - python

I have two text files, each with the pixel intensities from an image. The first file I converted to a binary image by manually establishing a threshold:
import numpy as np
import matplotlib.pyplot as p
icp4 = np.loadtxt(icp4_img)
with np.nditer(icp4, op_flags=['readwrite']) as it:
for x in it:
if x[...] > 800:
x[...] = 1
else:
x[...] = 0
p.imshow(icp4, interpolation='nearest', cmap='gray')
p.show()
print(icp4.shape)
>>>(45, 52)
With the second file, I want to sort the pixel values into two lists, which I will use to plot a histogram of pixel values, i.e. if the pixel is above threshold in the first array, then I want to sort it to the inside list.
#sorting pixels for PTM channel
ptm = np.loadtxt(ptm_img)
inside = [] #list for pixel values that colocalize with icp4 signal
outside = [] #list for pixel values that do not colocalize with icp4 signal
with np.nditer(ptm, op_flags=['readonly']) as it:
i=0
for x in it:
if icp4[i] > 0:
inside.append(x[...])
else:
outside.append(x[...])
i+=1
sys.exit()
I cannot figure out how to reference the same position in array icp4 when iterating through the array ptm. I apologize if this is a duplicated question.

import numpy as np
import matplotlib.pyplot as p
icp4 = np.loadtxt(icp4_img)
ptm = np.loadtxt(ptm_img)
inside, outside = [], []
with np.nditer(icp4, op_flags=['readwrite']) as icp_it, np.nditer(ptm, op_flags=['readonly']) as ptm_it:
for icp, ptm in zip(icp_it, ptm_it):
if icp[...] > 800:
icp[...] = 1
else:
icp[...] = 0
if np.all(icp > 0):
inside.append(ptm)
else:
outside.append(ptm)
p.imshow(icp4, interpolation='nearest', cmap='gray')
p.show()
print(icp4.shape)
sys.exit()

Related

Numy 2D image array how to apply formula to only pixels that satisfy a condition?

import numpy as np
from PIL import Image
def isblack(rgb):
return (rgb[0]==0) and (rgb[1]==0) and (rgb[2]==0)
a = Image.open('image1.jpg')
a = np.array(a) # RGB image
[h,w,chan] = np.shape(a)
filtsz = 9
# comparing subimage with a[50:59,60:69] and a[100:119,120:129], for example
srcTop = 50
srcLeft = 60
dstTop = 100
dstLeft = 120
ssd = 0 # sumOfSquareDifference
for i in range(filtsz):
for j in range(filtsz):
if not isblack(a[dstTop+i,dstLeft+j,:]):
ssd += sum((a[dstTop+i, dstLeft+j] - a[srcTop+i, srcLeft+j])**2)
print(ssd)
The naive implementation is to loop over all pixel that satisfy the condition, then compute.
However, this is very slow.
How can I make it faster? I'm looking for a way that use indexing. For example, something that has the following pseudo code:
selected = [not isblack(pixel) for pixel in image] # 2D array contains 0 if black, 1 if not black
diff = [(a[pixel] - b[pixel])**2 for pixel in a] # 2D array contains the square difference at each pixel
ssd = sum(diff * selected) # sum only positions that satisfy the condition
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
img = Image.open("image/image1.jpg")
filtsz = 100 # increase from 9 to 100 for display purpose
srcTop = 50
srcLeft = 60
dstTop = 100
dstLeft = 120
npimg = np.array(img)
# indexing
subimg_src = npimg[srcTop:srcTop+filtsz,srcLeft:srcLeft+filtsz,:]
subimg_dst = npimg[dstTop:dstTop+filtsz,dstLeft:dstLeft+filtsz,:]
fig,ax = plt.subplots(1,2)
ax[0].imshow(subimg_src)
ax[1].imshow(subimg_dst)
# channel axis - 2
# ~: negation operator
# keepdims: set True for broadcasting
selected = ~np.any(subimg_dst,axis=2,keepdims=True)
ssd = np.sum((subimg_src-subimg_dst)**2*selected)
print(ssd)
Example image:

Python: Find mean of points within radius of a an element in 2D array

I am looking for an efficient way to find the mean of of values with a certain radius of an element in a 2D NumPy array, excluding the center point and values < 0.
My current method is to create a disc shaped mask (using the method here) and find the mean of points within this mask. This is taking too long however...over 10 minutes to calculate ~18000 points within my 300x300 array.
The array I want to find means within is here titled "arr"
def radMask(index,radius,array,insert):
a,b = index
nx,ny = array.shape
y,x = np.ogrid[-a:nx-a,-b:ny-b]
mask = x*x + y*y <= radius*radius
array[mask] = insert
return array
arr_mask = np.zeros_like(arr).astype(int)
arr_mask = radMask(center, radius, arr_mask, 1)
arr_mask[arr < 0] = 0 #Exclude points with no echo
arr_mask[ind] = 0 #Exclude center point
arr_mean = 0
if np.any(dbz_bg):
arr_mean = sp.mean(arr[arr_mask])
Is there any more efficient way to do this? I've looked into some of the image processing filters/tools but can't quite wrap my head around it.
is this helpful? This takes only a couple of seconds on my laptop for ~ 18000 points:
import numpy as np
#generate a random 300x300 matrix for testing
inputMat = np.random.random((300,300))
radius=50
def radMask(index,radius,array):
a,b = index
nx,ny = array.shape
y,x = np.ogrid[-a:nx-a,-b:ny-b]
mask = x*x + y*y <= radius*radius
return mask
#meanAll is going to store ~18000 points
meanAll=np.zeros((130,130))
for x in range(130):
for y in range(130):
centerMask=(x,y)
mask=radMask(centerMask,radius,inputMat)
#un-mask center and values below 0
mask[centerMask]=False
mask[inputMat<0]=False
#get the mean
meanAll[x,y]=np.mean(inputMat[mask])

border/edge operations on numpy arrays

Suppose I have a 3D numpy array of nonzero values and "background" = 0. As an example I will take a sphere of random values:
array = np.random.randint(1, 5, size = (100,100,100))
z,y,x = np.ogrid[-50:50, -50:50, -50:50]
mask = x**2 + y**2 + z**2<= 20**2
array[np.invert(mask)] = 0
First, I would like to find the "border voxels" (all nonzero values that have a zero within their 3x3x3 neigbourhood). Second, I would like to replace all border voxels with the mean of their nonzero neighbours. So far I tried to use scipy's generic filter in the following way:
Function to apply at each element:
def borderCheck(values):
#check if the footprint center is on a nonzero value
if values[13] != 0:
#replace border voxels with the mean of nonzero neighbours
if 0 in values:
return np.sum(values)/np.count_nonzero(values)
else:
return values[13]
else:
return 0
Generic filter:
from scipy import ndimage
result = ndimage.generic_filter(array, borderCheck, footprint = np.ones((3,3,3)))
Is this a proper way to handle this problem? I feel that I am trying to reinvent the wheel here and that there must be a shorter, nicer way to achieve the result. Are there any other suitable (numpy, scipy ) functions that I can use?
EDIT
I messed one thing up: I would like to replace all border voxels with the mean of their nonzero AND non-border neighbours. For this, I tried to clean up the neighbours from ali_m's code (2D case):
#for each neighbour voxel, check whether it also appears in the border/edges
non_border_neighbours = []
for each in neighbours:
non_border_neighbours.append([i for i in each if nonzero_idx[i] not in edge_idx])
Now I can't figure out why non_border_neighbours comes back empty?
Furthermore, correct me if I am wrong but doesn't tree.query_ball_point with radius 1 adress only the 6 next neighbours (euclidean distance 1)? Should I set sqrt(3) (3D case) as radius to get the 26-neighbourhood?
I think it's best to start out with the 2D case first, since it can be visualized much more easily:
import numpy as np
from matplotlib import pyplot as plt
A = np.random.randint(1, 5, size=(100, 100)).astype(np.double)
y, x = np.ogrid[-50:50, -50:50]
mask = x**2 + y**2 <= 30**2
A[~mask] = 0
To find the edge pixels you could perform binary erosion on your mask, then XOR the result with your mask
# rank 2 structure with full connectivity
struct = ndimage.generate_binary_structure(2, 2)
erode = ndimage.binary_erosion(mask, struct)
edges = mask ^ erode
One approach to find the nearest non-zero neighbours of each edge pixel would be to use a scipy.spatial.cKDTree:
from scipy.spatial import cKDTree
# the indices of the non-zero locations and their corresponding values
nonzero_idx = np.vstack(np.where(mask)).T
nonzero_vals = A[mask]
# build a k-D tree
tree = cKDTree(nonzero_idx)
# use it to find the indices of all non-zero values that are at most 1 pixel
# away from each edge pixel
edge_idx = np.vstack(np.where(edges)).T
neighbours = tree.query_ball_point(edge_idx, r=1, p=np.inf)
# take the average value for each set of neighbours
new_vals = np.hstack(np.mean(nonzero_vals[n]) for n in neighbours)
# use these to replace the values of the edge pixels
A_new = A.astype(np.double, copy=True)
A_new[edges] = new_vals
Some visualisation:
fig, ax = plt.subplots(1, 3, figsize=(10, 4), sharex=True, sharey=True)
norm = plt.Normalize(0, A.max())
ax[0].imshow(A, norm=norm)
ax[0].set_title('Original', fontsize='x-large')
ax[1].imshow(edges)
ax[1].set_title('Edges', fontsize='x-large')
ax[2].imshow(A_new, norm=norm)
ax[2].set_title('Averaged', fontsize='x-large')
for aa in ax:
aa.set_axis_off()
ax[0].set_xlim(20, 50)
ax[0].set_ylim(50, 80)
fig.tight_layout()
plt.show()
This approach will also generalize to the 3D case:
B = np.random.randint(1, 5, size=(100, 100, 100)).astype(np.double)
z, y, x = np.ogrid[-50:50, -50:50, -50:50]
mask = x**2 + y**2 + z**2 <= 20**2
B[~mask] = 0
struct = ndimage.generate_binary_structure(3, 3)
erode = ndimage.binary_erosion(mask, struct)
edges = mask ^ erode
nonzero_idx = np.vstack(np.where(mask)).T
nonzero_vals = B[mask]
tree = cKDTree(nonzero_idx)
edge_idx = np.vstack(np.where(edges)).T
neighbours = tree.query_ball_point(edge_idx, r=1, p=np.inf)
new_vals = np.hstack(np.mean(nonzero_vals[n]) for n in neighbours)
B_new = B.astype(np.double, copy=True)
B_new[edges] = new_vals
Test against your version:
def borderCheck(values):
#check if the footprint center is on a nonzero value
if values[13] != 0:
#replace border voxels with the mean of nonzero neighbours
if 0 in values:
return np.sum(values)/np.count_nonzero(values)
else:
return values[13]
else:
return 0
result = ndimage.generic_filter(B, borderCheck, footprint=np.ones((3, 3, 3)))
print(np.allclose(B_new, result))
# True
I'm sure this isn't the most efficient way to do it, but it will still be significantly faster than using generic_filter.
Update
The performance could be further improved by reducing the number of points that are considered as candidate neighbours of the edge pixels/voxels:
# ...
# the edge pixels/voxels plus their immediate non-zero neighbours
erode2 = ndimage.binary_erosion(erode, struct)
candidate_neighbours = mask ^ erode2
nonzero_idx = np.vstack(np.where(candidate_neighbours)).T
nonzero_vals = B[candidate_neighbours]
# ...

Accessing only one key in a nested list of dictionaries and plotting it with matplotlib

I have a nested list of dictionaries, created like this:
N = 30
grid = []
for row in range(N):
rows = []
for column in range(N):
each_cell = {"check": 0, "type": -1}
rows.append(each_cell)
grid.append(rows)
Type is the one that I want to plot, a value of -1 means nothing in the cell, and 0,1,2,3 are different types (not gradient), which I want to be represented by different colours.
I am putting a random number of types into the grid like this:
import numpy.random as rnd
import matplotlib.pyplot as plt
for i in range (rnd.randint(0, N*N)):
x = rnd.randint(0, N)
y = rnd.randint(0, N)
grid[x][y]['check'] = 1
if grid[x][y]['check'] == 1:
grid[x][y]['type'] = rnd.randint(0,4)
I am attempting to plot it using this:
plt.imshow(grid['type'], interpolation = 'nearest', cmap = 'gist_ncar_r')
plt.show()
But obviously the grid['type'] isn't picking out only the types like I want it to, anybody know how to fix this?
Since imshow requires an 'array-like', you can change the structure of your data to make it easier to work with. Instead of using an array of dicts, use a dict of arrays.
import numpy.random as rnd
import matplotlib.pyplot as plt
N = 30
grid = {'check': [], 'type': []}
for row in range(N):
check_rows = []
type_rows = []
for column in range(N):
check_rows.append(0)
type_rows.append(1)
grid['check'].append(check_rows)
grid['type'].append(type_rows)
for i in range (rnd.randint(0, N*N)):
x = rnd.randint(0, N)
y = rnd.randint(0, N)
grid['check'][x][y] = 1
if grid['check'][x][y] == 1:
grid['type'][x][y] = rnd.randint(0,4)
plt.imshow(grid['type'], interpolation = 'nearest', cmap = 'gist_ncar_r')
plt.show()
You can use a list comprehension to get the data you want into an array:
from numpy import *
...
data = array([[grid[i][j]['type'] for j in range(N)] for i in range(N)])
To use array you will need to do do the numpy import.
Then you can plot it like you're trying to:
matplotlib.pyplot.imshow(data, interpolation = 'nearest', cmap = 'gist_ncar_r')
matplotlib.pyplot.show()

Shifting data in 2d array through shifted indices

I need to shift a 2D array field, i.e. I have a "previous_data" array which I access through shifted indices to create my "new_data" array.
I can do this in a nonpythonic (and slow) loop, but would very much appreciate some help in finding a pythonic (and faster) solution!
Any help and hints are very much appreciated!
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import mpl
def nonpythonic():
#this works, but is slow (for large arrays)
new_data = np.zeros((ny,nx))
for j in xrange(ny):
for i in xrange(nx):
#go through each item, check if it is within the bounds
#and assign the data to the new_data array
i_new = ix[j,i]
j_new = iy[j,i]
if ((i_new>=0) and (i_new<nx) and (j_new>=0) and (j_new<ny)):
new_data[j,i]=previous_data[j_new,i_new]
ef, axar = plt.subplots(1,2)
im = axar[0].pcolor(previous_data, vmin=0,vmax=2)
ef.colorbar(im, ax=axar[0], shrink=0.9)
im = axar[1].pcolor(new_data, vmin=0,vmax=2)
ef.colorbar(im, ax=axar[1], shrink=0.9)
plt.show()
def pythonic():
#tried a few things here, but none are working
#-tried assigning NaNs to indices (ix,iy) which are out of bounds, but NaN's don't work for indices
#-tried masked arrays, but they also don't work as indices
#-tried boolean arrays, but ended in shape mismatches
#just as in the nonworking code below
ind_y_good = np.where(iy>=0) and np.where(iy<ny)
ind_x_good = np.where(ix>=0) and np.where(ix<nx)
new_data = np.zeros((ny,nx))
new_data[ind_y_good,ind_x_good] = previous_data[iy[ind_y_good],ix[ind_x_good]]
#some 2D array:
nx = 20
ny = 30
#array indices:
iy, ix = np.indices((ny,nx))
#modify indices (shift):
iy = iy + 1
ix = ix - 4
#create some out of range indices (which might happen in my real scenario)
iy[0,2:7] = -9999
ix[0:3,-1] = 6666
#some previous data which is the basis for the new_data:
previous_data = np.ones((ny,nx))
previous_data[2:8,10:20] = 2
nonpythonic()
pythonic()
This is the result of the working (nonpythonic) code above:
I implemented a version of pythonic that replicates nonpythonic with some masking and index fiddling - see below. By the way I think the "new" indices should be the ones corresponding to the new array, rather than the old ones, but I've left it as in your existing function.
The main thing to realise is that in your attempt in the question, your conditions
ind_y_good = np.where(iy>=0) and np.where(iy<ny)
ind_x_good = np.where(ix>=0) and np.where(ix<nx)
must be combined, since we must always have pairs of x and y indices. i.e. if the x index is invalid, then so is the y.
Finally, if the indices are really all shifted by a constant factor, you can make this even simpler by using NumPy's roll function and taking a slice of the indices corresponding to the valid area.
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import mpl
def nonpythonic(previous_data, ix, iy, nx, ny):
#this works, but is slow (for large arrays)
new_data = np.zeros((ny,nx))
for j in xrange(ny):
for i in xrange(nx):
#go through each item, check if it is within the bounds
#and assign the data to the new_data array
i_new = ix[j,i]
j_new = iy[j,i]
if ((i_new>=0) and (i_new<nx) and (j_new>=0) and (j_new<ny)):
new_data[j,i]=previous_data[j_new,i_new]
return new_data
def pythonic(previous_data, ix, iy):
ny, nx = previous_data.shape
iy_old, ix_old = np.indices(previous_data.shape)
# note you must apply the same condition to both
# index arrays
valid = (iy >= 0) & (iy < ny) & (ix >= 0) & (ix < nx)
new_data = np.zeros((ny,nx))
new_data[iy_old[valid], ix_old[valid]] = previous_data[iy[valid], ix[valid]]
return new_data
def main():
#some 2D array:
nx = 20
ny = 30
#array indices:
iy, ix = np.indices((ny,nx))
#modify indices (shift):
iy = iy + 1
ix = ix - 4
#create some out of range indices (which might happen in my real scenario)
iy[0,2:7] = -9999
ix[0:3,-1] = 6666
#some previous data which is the basis for the new_data:
previous_data = np.ones((ny,nx))
previous_data[2:8,10:20] = 2
data_nonpythonic = nonpythonic(previous_data, ix, iy, nx, ny)
data_pythonic = pythonic(previous_data, ix, iy)
new_data = data_nonpythonic
ef, axar = plt.subplots(1,2)
im = axar[0].pcolor(previous_data, vmin=0,vmax=2)
ef.colorbar(im, ax=axar[0], shrink=0.9)
im = axar[1].pcolor(new_data, vmin=0,vmax=2)
ef.colorbar(im, ax=axar[1], shrink=0.9)
plt.show()
print(np.allclose(data_nonpythonic, data_pythonic))
if __name__ == "__main__":
main()

Categories

Resources