Python vectorize nested for loop with conditionals

Python vectorize nested for loop with conditionals - python

How can I vectorize a nested for loop containing some conditionals? I'm trying to get a list of row/column windows within a very large array. What I have below is quick for a nested loop going through all the rows and columns with a given window size but I'm wondering if there is any way to make this faster.
def get_windows(width, height, win_size):
windows = list()
for i in range(0, width, win_size):
if i + win_size < width:
numCols = win_size
else:
numCols = width - i
for j in range(0, height, win_size):
if j + win_size< height:
numRows = win_size
else:
numRows = height - j
window = [i, j, numCols, numRows]
windows.append(window)
return windows
def sliding_window(arr, windows):
for i in windows:
win_arr = arr[0:3, i[0]:i[0]+i[2], i[1]:i[1]+i[2]]
win_arr = np.transpose(win_arr, [1, 2, 0])

I think you can use repeated calls to numpy.array_split to accomplish your goals.
import numpy as np
def sliding_window(arr, win_size):
for win_arr_x in np.array_split(arr, win_size, axis=-2):
for win_arr in np.array_split(win_arr_x, win_size, axis=-1):
win_arr = np.transpose(win_arr, [1, 2, 0])

Related

Python numpy: Simplify operation on multiple matrices

I have 3 numpy matrices:
One contains pixels positions in X (x_pos), another pixel positions in Y (y_pos) and a last one containing pixel values (p_value)
I would like to use these 3 matrices to build a results image
With loops I have this result:
#Resulting image
res = np.zeros((128,128,3), dtype = np.uint8)
for i in range(x_pos.shape[0]):
for j in range(x_pos.shape[1]):
# Get coordinates
x = x_pos[i][j]
y = y_pos[i][j]
res[y,x] = p_value[i][j]
With large matrices (2048*2048) this code already takes a lot of time. Is it possible to optimize this code without using a nested loop?
I specify that the positions in the pos_x and pos_y matrices do not necessarily follow each other, there may be holes or duplicate values

It should be possible using np.meshgrid
i = np.arange(0, x.shape[0])
j = np.arange(0, x.shape[1])
i_1, j_1 = np.meshgrid(i, j, indexing='ij')
res[y_1.ravel(),x_1.ravel()] = p_value[i_1.ravel(),j_1.ravel()]

First use consistent numpy 2d array indexing:
x = x_pos[i,j]
y = y_pos[i,j]
res[y,x] = p_value[i,j]
Now instead of scalar i,j use arrays
i = np.arange(n); j = np.arange(m)
You didn't provida [mcve] so I won't try to demonstrate that th

Thanks to #hpaulj and #ai2ys answer the problem is solved.
Here is a comparison of the results in terms of execution speed:
import numpy as np
import cv2
import time
m_size = 4096
m_x = np.random.randint(0,m_size,(m_size,m_size), dtype = np.uint16)
m_y = np.random.randint(0,m_size,(m_size,m_size), dtype = np.uint16)
p_value = np.ones((m_size,m_size), dtype = np.uint8)
#Meshgrid method:
out = np.zeros((m_size,m_size),dtype=np.uint8)
start = time.time()
i = np.arange(0, m_x.shape[0])
j = np.arange(0, m_x.shape[1])
i_1, j_1 = np.meshgrid(i, j, indexing='ij')
out[m_x.ravel(),m_y.ravel()] = p_value[i_1.ravel(),j_1.ravel()]
end = time.time()
print("Meshgrid: {} s".format(end - start))
#No for loop method:
out = np.zeros((m_size,m_size),dtype=np.uint8)
start = time.time()
i = np.arange(m_x.shape[0])
j = np.arange(m_y.shape[1])
x = m_x[i,j]
y = m_y[i,j]
out[x,y] = p_value[i,j]
end = time.time()
print("No loop: {} s".format(end - start))
#For loop method:
out = np.zeros((m_size,m_size),dtype=np.uint8)
start = time.time()
for i in range(m_x.shape[0]):
for j in range(m_y.shape[1]):
x = m_x[i,j]
y = m_y[i,j]
out[x,y] = p_value[i,j]
end = time.time()
print("Nested loop: {} s".format(end - start))
#Output:
Meshgrid: 0.4837045669555664 s
No loop: 0.3600656986236572 s
Nested loop: 13.10097336769104 s

Faster way to threshold a 4-D numpy array

I have a 4D numpy array of size (98,359,256,269) that I want to threshold.
Right now, I have two separate lists that keep the coordinates of the first 2 dimension and the last 2 dimensions. (mag_ang for the first 2 dimensions and indices for the last 2).
size of indices : (61821,2)
size of mag_ang : (35182,2)
Currently, my code looks like this:
inner_points = []
for k in indices:
x = k[0]
y = k[1]
for i,ctr in enumerate(mag_ang):
mag = ctr[0]
ang = ctr[1]
if X[mag][ang][x][y] > 10:
inner_points.append((y,x))
This code works but it's pretty slow and I wonder if there's any more pythonic/faster way to do this?s

(EDIT: added a second alternate method)
Use numpy multi-array indexing:
import time
import numpy as np
n_mag, n_ang, n_x, n_y = 10, 12, 5, 6
shape = n_mag, n_ang, n_x, n_y
X = np.random.random_sample(shape) * 20
nb_indices = 100 # 61821
indices = np.c_[np.random.randint(0, n_x, nb_indices), np.random.randint(0, n_y, nb_indices)]
nb_mag_ang = 50 # 35182
mag_ang = np.c_[np.random.randint(0, n_mag, nb_mag_ang), np.random.randint(0, n_ang, nb_mag_ang)]
# original method
inner_points = []
start = time.time()
for x, y in indices:
for mag, ang in mag_ang:
if X[mag][ang][x][y] > 10:
inner_points.append((y, x))
end = time.time()
print(end - start)
# faster method 1:
inner_points_faster1 = []
start = time.time()
for x, y in indices:
if np.any(X[mag_ang[:, 0], mag_ang[:, 1], x, y] > 10):
inner_points_faster1.append((y, x))
end = time.time()
print(end - start)
# faster method 2:
start = time.time()
# note: depending on the real size of mag_ang and indices, you may wish to do this the other way round ?
found = X[:, :, indices[:, 0], indices[:, 1]][mag_ang[:, 0], mag_ang[:, 1], :] > 10
# 'found' shape is (nb_mag_ang x nb_indices)
assert found.shape == (nb_mag_ang, nb_indices)
matching_indices_mask = found.any(axis=0)
inner_points_faster2 = indices[matching_indices_mask, :]
end = time.time()
print(end - start)
# finally assert equality of findings
inner_points = np.unique(np.array(inner_points))
inner_points_faster1 = np.unique(np.array(inner_points_faster1))
inner_points_faster2 = np.unique(inner_points_faster2)
assert np.array_equal(inner_points, inner_points_faster1)
assert np.array_equal(inner_points, inner_points_faster2)
yields
0.04685807228088379
0.0
0.0
(of course if you increase the shape the time will not be zero for the second and third)
Final note: here I use "unique" at the end, but it would maybe be wise to do it upfront for the indices and mag_ang arrays (except if you are sure that they are unique already)

Use numpy directly. If indices and mag_ang are numpy arrays of two columns each for the appropriate coordinate:
(x, y), (mag, ang) = indices.T, mag_ang.T
index_matrix = np.meshgrid(mag, ang, x, y).T.reshape(-1,4)
inner_mag, inner_ang, inner_x, inner_y = np.where(X[index_matrix] > 10)
Now you the inner... variables hold arrays for each coordinate. To get a single list of pars you can zip the inner_y and inner_x.

Here are few vecorized ways leveraging broadcasting -
thresh = 10
mask = X[mag_ang[:,0],mag_ang[:,1],indices[:,0,None],indices[:,1,None]]>thresh
r = np.where(mask)[0]
inner_points_out = indices[r][:,::-1]
For larger arrays, we can compare first and then index to get the mask -
mask = (X>thresh)[mag_ang[:,0],mag_ang[:,1],indices[:,0,None],indices[:,1,None]]
If you are only interested in the unique coordinates off indices, use the mask directly -
inner_points_out = indices[mask.any(1)][:,::-1]
For large arrays, we can also leverage multi-cores with numexpr module.
Thus, first off import the module -
import numexpr as ne
Then, replace (X>thresh) with ne.evaluate('X>thresh') in the computation(s) listed earlier.

Use np.where
inner = np.where(X > 10)
a, b, x, y = zip(*inner)
inner_points = np.vstack([y, x]).T

Fast Circular buffer in python than the one using deque?

I am implementing circular buffer in python using collections.deque to use it for some calculations. This is my original code:
clip=moviepy.editor.VideoFileClip('file.mp4')
clip_size= clip.size[::-1]
Depth=30
dc=5
TempKern = # some array of size Depth
RingBuffer=deque(np.zeros(clip_size, dtype=float),maxlen=NewDepth)
modified_clip = clip.fl_image(new_filtered_output)
modified_clip.write_videofile('output.mp4'))
def new_filtered_output(image):
global RingBuffer
inter_frame=somefunction(image)# inter_frame and image shape is same as clip_size
RingBuffer.append(inter_frame)
# Apply kernel
Output = dc + np.sum([np.asarray(RingBuffer)[j]*TempKern[j] for j in range(Depth)],axis=0)
return Output
Is this the fastest way possible? I have heard that numpy roll is an option. But I don't know how to make it behave like the above code?

I noticed you changed the code above, but your original code was:
def one():
TempKern=np.array([1,2,3,4,5])
depth=len(TempKern)
buf=deque(np.zeros((2,3)),maxlen=5)
for i in range(10):
buf.append([[i,i+1,i+2],[i+3,i+4,i+5]])
total= + np.sum([np.asarray(buf)[j]*TempKern[j] for j in range(depth)],axis=0)
print('total')
print(total)
return total
You can simplify things greatly and make it run quite a bit faster if you first flatten the arrays for the computation.
def two():
buf = np.zeros((5,6), dtype=np.int32)
for idx, i in enumerate(range(5, 10)):
buf[idx] = np.array([[i,i+1,i+2,i+3,i+4,i+5]], dtype=np.int32)
return (buf.T * np.array([1, 2, 3, 4, 5])).sum(axis=1).reshape((2,3))
The second implementation returns the same values and runs about 4x faster on my machine
one()
>> [[115 130 145]
[160 175 190]] ~ 100µs / loop
two()
>> array([[115, 130, 145],
[160, 175, 190]]) ~~ 26µs / loop
You can further simplify and parameterize this as such:
def three(n, array_shape):
buf = np.zeros((n,array_shape[0]*array_shape[1]), dtype=np.int32)
addit = np.arange(1, n+1, dtype=np.int32)
for idx, i in enumerate(range(n, 2*n)):
buf[idx] = np.arange(i, i+n+1)
return (buf.T * addit).sum(axis=1).reshape(array_shape)
three(5, (2,3))
>> array([[115, 130, 145],
[160, 175, 190]]) ~ 17µs / loop
Note that the second and third version returns a numpy array. You can cast it to a list by using .tolist() if need be.
Based on your feedback - edit below:
def four(array_shape):
n = array_shape[0] * array_shape[1] - 1
buf = []
addit = np.arange(1, n+1, dtype=np.int32)
for idx, i in enumerate(range(n, 2*n)):
buf.append(np.arange(i, i+n+1))
buf = np.asarray(buf)
summed = (buf.T * addit).sum(axis=1)
return summed.reshape(array_shape)

You can have the ring buffer as a numpy array, by doubling the size and slicing:
clipsize = clip.size[::-1]
depth = 30
ringbuffer = np.zeros((2*depth,) + clipsize)
framecounter = 0
def new_filtered_output(image):
global ringbuffer, framecounter
inter_frame = somefunction(image)
idx = framecounter % depth
ringbuffer[idx] = ringbuffer[idx + depth] = inter_frame
buffer = ringbuffer[idx + 1 : idx + 1 + depth]
framecounter += 1
# Apply kernel
output = dc + np.sum([buffer[j]*kernel[j] for j in range(depth)], axis=0)
return output
Now you don't have convert the deque into a numpy array every frame (and every loop iteration..).
As mentioned in the comments, you can apply the kernel more effeciently:
output = dc + np.einsum('ijk,i->jk', buffer, kernel)
Or:
output = dc + np.tensordot(kernel, buffer, axes=1)

Making a collage in PIL

I. Am. Stuck.
I have been working on this for over a week now, and I cannot seem to get my code to run correctly. I am fairly new to PIL and Python as a whole. I am trying to make a 2x3 collage of some pictures. I have my code listed below. I am trying to get my photos to fit without any access black space in the newly created collage, however when I run my code I can only get 2 pictures to be placed into the collage, instead of the 6 I want. Any suggestions would be helpful.
*CODE EDITED
from PIL import Image
im= Image.open('Tulips.jpg')
out=im.convert("RGB", (
0.412453, 0.357580, 0.180423, 0,
0.212671, 0.715160, 0.072169, 0,
0.019334, 0.119193, 0.950227, 0 ))
out.save("Image2" + ".jpg")
out2=im.convert("RGB", (
0.9756324, 0.154789, 0.180423, 0,
0.212671, 0.715160, 0.254783, 0,
0.123456, 0.119193, 0.950227, 0 ))
out2.save("Image3" + ".jpg")
out3= im.convert("1")
out3.save("Image4"+".jpg")
out4=im.convert("RGB", (
0.986542, 0.154789, 0.756231, 0,
0.212671, 0.715160, 0.254783, 0,
0.123456, 0.119193, 0.112348, 0 ))
out4.save("Image5" + ".jpg")
out5=Image.blend(im, out4, 0.5)
out5.save("Image6" + ".jpg")
listofimages=['Tulips.jpg', 'Image2.jpg', 'Image3.jpg', 'Image4.jpg', 'Image5.jpg', 'Image6.jpg']
def create_collage(width, height, listofimages):
Picturewidth=width//3
Pictureheight=height//2
size=Picturewidth, Pictureheight
new_im=Image.new('RGB', (450, 300))
for p in listofimages:
Image.open(p)
for col in range(0,width):
for row in range(0, height):
image=Image.eval(p, lambda x: x+(col+row)/30)
new_im.paste(p, (col,row))
new_im.save("Collage"+".jpg")
create_collage(450,300,listofimages)

Here's some working code.
When you call Image.open(p), that returns an Image object, so you need to store than in a variable: im = Image.open(p).
I'm not sure what image=Image.eval(p, lambda x: x+(col+row)/30) is meant to do so I removed it.
size is the size of the thumbnails, but you're not using that variable. After opening the image, it should be resized to size.
I renamed Picturewidth and Pictureheight to thumbnail_width and thumbnail_height to make it clear what they are and follow Python naming conventions.
I also moved the number of cols and rows to variables so they can be reused without magic numbers.
The first loop opens each image into an im, thumbnails it and puts it in a list of ims.
Before the next loops we initialise i,x, andy` variables to keep track of which image we're looking at, and the x and y coordinates to paste the thumbnails into the larger canvas. They'll be updated in the next loops.
The first loop is for columns (cols), not pixels (width). (Also range(0, thing) does the same as range(thing).)
Similarly the second loop is for rows instead of pixels. Inside this loop we paste the current image at ims[i] into the big new_im at x, y. These are pixel positions, not row/cols positions.
At the end of the inner loop, increment the i counter, and add thumbnail_height to y.
Similarly, at the end of the outer loop, and add thumnnail_width to x and reset y to zero.
You only need to save new_im once, after these loops have finished.
There's no need for concatenating "Image2" + ".jpg" etc., just do "Image2.jpg".
This results in something like this:
This code could be improved. For example, if you don't need them for anything else, there's no need to save the intermediate ImageX.jpg files, and rather than putting those filenames in listofimages, put the images directly there: listofimages = [im, out1, out2, etc...], and then replace for p in listofimages: with for im in listofimages: and remove im = Image.open(p).
You could also calculate some padding for the images so the blackspace is even.
from PIL import Image
im= Image.open('Tulips.jpg')
out=im.convert("RGB", (
0.412453, 0.357580, 0.180423, 0,
0.212671, 0.715160, 0.072169, 0,
0.019334, 0.119193, 0.950227, 0 ))
out.save("Image2.jpg")
out2=im.convert("RGB", (
0.9756324, 0.154789, 0.180423, 0,
0.212671, 0.715160, 0.254783, 0,
0.123456, 0.119193, 0.950227, 0 ))
out2.save("Image3.jpg")
out3= im.convert("1")
out3.save("Image4.jpg")
out4=im.convert("RGB", (
0.986542, 0.154789, 0.756231, 0,
0.212671, 0.715160, 0.254783, 0,
0.123456, 0.119193, 0.112348, 0 ))
out4.save("Image5.jpg")
out5=Image.blend(im, out4, 0.5)
out5.save("Image6.jpg")
listofimages=['Tulips.jpg', 'Image2.jpg', 'Image3.jpg', 'Image4.jpg', 'Image5.jpg', 'Image6.jpg']
def create_collage(width, height, listofimages):
cols = 3
rows = 2
thumbnail_width = width//cols
thumbnail_height = height//rows
size = thumbnail_width, thumbnail_height
new_im = Image.new('RGB', (width, height))
ims = []
for p in listofimages:
im = Image.open(p)
im.thumbnail(size)
ims.append(im)
i = 0
x = 0
y = 0
for col in range(cols):
for row in range(rows):
print(i, x, y)
new_im.paste(ims[i], (x, y))
i += 1
y += thumbnail_height
x += thumbnail_width
y = 0
new_im.save("Collage.jpg")
create_collage(450, 300, listofimages)

I made a solution inspired by #Hugo's answer which only requires the input list of images. The function automatically creates a grid based on the number of images input.
def find_multiples(number : int):
multiples = set()
for i in range(number - 1, 1, -1):
mod = number % i
if mod == 0:
tup = (i, int(number / i))
if tup not in multiples and (tup[1], tup[0]) not in multiples:
multiples.add(tup)
if len(multiples) == 0:
mod == number % 2
div = number // 2
multiples.add((2, div + mod))
return list(multiples)
def get_smallest_multiples(number : int, smallest_first = True) -> Tuple[int, int]:
multiples = find_multiples(number)
smallest_sum = number
index = 0
for i, m in enumerate(multiples):
sum = m[0] + m[1]
if sum < smallest_sum:
smallest_sum = sum
index = i
result = list(multiples[i])
if smallest_first:
result.sort()
return result[0], result[1]
def create_collage(listofimages : List[str], n_cols : int = 0, n_rows: int = 0,
thumbnail_scale : float = 1.0, thumbnail_width : int = 0, thumbnail_height : int = 0):
n_cols = n_cols if n_cols >= 0 else abs(n_cols)
n_rows = n_rows if n_rows >= 0 else abs(n_rows)
if n_cols == 0 and n_rows != 0:
n_cols = len(listofimages) // n_rows
if n_rows == 0 and n_cols != 0:
n_rows = len(listofimages) // n_cols
if n_rows == 0 and n_cols == 0:
n_cols, n_rows = get_smallest_multiples(len(listofimages))
thumbnail_width = 0 if thumbnail_width == 0 or n_cols == 0 else round(thumbnail_width / n_cols)
thumbnail_height = 0 if thumbnail_height == 0 or n_rows == 0 else round(thumbnail_height/n_rows)
all_thumbnails : List[Image.Image] = []
for p in listofimages:
thumbnail = Image.open(p)
if thumbnail_width * thumbnail_scale < thumbnail.width:
thumbnail_width = round(thumbnail.width * thumbnail_scale)
if thumbnail_height * thumbnail_scale < thumbnail.height:
thumbnail_height = round(thumbnail.height * thumbnail_scale)
thumbnail.thumbnail((thumbnail_width, thumbnail_height))
all_thumbnails.append(thumbnail)
new_im = Image.new('RGB', (thumbnail_width * n_cols, thumbnail_height * n_rows), 'white')
i, x, y = 0, 0, 0
for col in range(n_cols):
for row in range(n_rows):
if i > len(all_thumbnails) - 1:
continue
print(i, x, y)
new_im.paste(all_thumbnails[i], (x, y))
i += 1
y += thumbnail_height
x += thumbnail_width
y = 0
extension = os.path.splitext(listofimages[0])[1]
if extension == "":
extension = ".jpg"
destination_file = os.path.join(os.path.dirname(listofimages[0]), f"Collage{extension}")
new_im.save(destination_file)
Example usage:
listofimages=['Tulips.jpg', 'Image2.jpg', 'Image3.jpg', 'Image4.jpg', 'Image5.jpg', 'Image6.jpg']
create_collage(listofimages)
In this case, because the input images are 6, the function returns a 3x2 (3 rows, 2 columns) collage of the images.
To do so, the function finds the two smallest integer multiples of the length of the input list of graphs (e.g. for 12, it returns 3 and 4 rather than 2 and 6) and creates a grid, where the first number is always the smallest of the multiples and it is taken to be the number of columns (i.e. by default the grid gets fewer columns than rows; for 12 images, you get a 4x3 matrix: 4 rows, 3 columns). This it can be customized via the smallest_first argument (only exposed in get_smallest_multiples()).
Optional arguments also allow to force a number of rows/columns.
The final image size is the sum of the sizes of the single images, but an optional thumbnail_scale argument allows to specify a percentage of scaling for all the thumbnails (defaults to 1.0, i.e. 100%, no scaling).
This function works well when the size of the images are all roughly the same. I have not covered more complex scenarios.

Fancy indexing with tuples

Say I have a 100x100 array in numpy, from this array I want to select 10 random blocks of (x*x)
pixels and change the values of these blocks simultaneously. What is the best way to index the slices for each block? An ideal solution would be something along the lines of the following, where the slices are taken between the pairs of tuples.
A = np.ones(100,100)
blockSize = 10
numBlocks = 15
blockCenter_Row = tuple(np.random.randint(blockSize,high=(100-blockSize),size=numBlocks))
blockCenter_Col = tuple(np.random.randint(blockSize,high=(100-blockSize),size=numBlocks))
rowLeft_Boundary = tuple((i-blockSize/2) for i in blockCenter_Row)
rowRight_Boundary = tuple((i+blockSize/2) for i in blockCenter_Row)
colLower_Boundary = tuple((i-blockSize/2) for i in blockCenter_Row)
colUpper_Boundary = tuple((i+blockSize/2) for i in blockCenter_Row)
for value in range(10):
A[rowLeft_Boundary:rowRight_Boundary,colLower_Boundary:colUpper_Boundary] = value

I think you can use as_strided() to do the trick, if the blocks can be overlaped.
import pylab as pl
from numpy.lib.stride_tricks import as_strided
blockSize = 10
numBlocks = 15
n = 100
a = np.zeros((n, n))
itemsize = a.dtype.itemsize
new_shape = n-blockSize+1, n-blockSize+1, blockSize, blockSize
new_stride = itemsize*n, itemsize, itemsize*n, itemsize
b = as_strided(a, shape=new_shape, strides=new_stride)
idx0 = np.random.randint(0, b.shape[0], numBlocks)
idx1 = np.random.randint(0, b.shape[1], numBlocks)
b[idx0, idx1, :, :] = np.random.rand(numBlocks, blockSize, blockSize)*3 + np.arange(numBlocks).reshape(-1, 1, 1)
pl.imshow(a, cmap="gray", interpolation="nearest")
here is the output:

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python vectorize nested for loop with conditionals - python

I think you can use repeated calls to numpy.array_split to accomplish your goals. import numpy as np def sliding_window(arr, win_size): for win_arr_x in np.array_split(arr, win_size, axis=-2): for win_arr in np.array_split(win_arr_x, win_size, axis=-1): win_arr = np.transpose(win_arr, [1, 2, 0])

Related

Python numpy: Simplify operation on multiple matrices

Faster way to threshold a 4-D numpy array

Fast Circular buffer in python than the one using deque?

Making a collage in PIL

Fancy indexing with tuples

Categories

Resources