Efficiently calculating grid-based point density in 3d point cloud - python

I have a 3d point cloud matrix, and I am trying to calculate the largest point density within a smaller volume inside the matrix. I am currently using a 3D grid-histogram system where I loop through every point in the matrix and increase the value of the corresponding grid square. Then, I can simply find the max value of the grid matrix.
I have already written code that works, but it is horribly slow for what I am trying to do
import numpy as np
def densityPointCloud(points, gridCount, gridSize):
hist = np.zeros((gridCount, gridCount, gridCount), np.uint16)
rndPoints = np.rint(points/gridSize) + int(gridCount/2)
rndPoints = rndPoints.astype(int)
for point in rndPoints:
if np.amax(point) < gridCount and np.amin(point) >= 0:
hist[point[0]][point[1]][point[2]] += 1
return hist
cloud = (np.random.rand(100000, 3)*10)-5
histogram = densityPointCloud(cloud , 50, 0.2)
print(np.amax(histogram))
Are there any shortcuts I can take to do this more efficiently?

Here's a start:
import numpy as np
import time
from collections import Counter
# if you need the whole histogram object
def dpc2(points, gridCount, gridSize):
hist = np.zeros((gridCount, gridCount, gridCount), np.uint16)
rndPoints = np.rint(points/gridSize) + int(gridCount/2)
rndPoints = rndPoints.astype(int)
inbounds = np.logical_and(np.amax(rndPoints,axis = 1) < gridCount, np.amin(rndPoints,axis = 1) >= 0)
for point in rndPoints[inbounds,:]:
hist[point[0]][point[1]][point[2]] += 1
return hist
# just care about a max point
def dpc3(points, gridCount, gridSize):
rndPoints = np.rint(points/gridSize) + int(gridCount/2)
rndPoints = rndPoints.astype(int)
inbounds = np.logical_and(np.amax(rndPoints,axis = 1) < gridCount,
np.amin(rndPoints,axis = 1) >= 0)
# cheap hashing
phashes = gridCount*gridCount*rndPoints[inbounds,0] + gridCount*rndPoints[inbounds,1] + rndPoints[inbounds,2]
max_h, max_v = Counter(phashes).most_common(1)[0]
max_coord = [(max_h // (gridCount*gridCount)) % gridCount,(max_h // gridCount) % gridCount,max_h % gridCount]
return (max_coord, max_v)
# TESTING
cloud = (np.random.rand(200000, 3)*10)-5
t1 = time.perf_counter()
hist1 = densityPointCloud(cloud , 50, 0.2)
t2 = time.perf_counter()
hist2 = dpc2(cloud,50,0.2)
t3 = time.perf_counter()
hist3 = dpc3(cloud,50,0.2)
t4 = time.perf_counter()
print(f"task 1: {round(1000*(t2-t1))}ms\ntask 2: {round(1000*(t3-t2))}ms\ntask 3: {round(1000*(t4-t3))}ms")
print(f"max value is {hist3[1]}, achieved at {hist3[0]}")
np.all(np.equal(hist1,hist2)) # check that results are identical
# check for equal max - histogram may be multi-modal so the point won't
# necessarily match
np.unravel_index(np.argmax(hist2, axis=None), hist2.shape)
The idea is to do all the if/and comparisons once: let numpy do them (effectively in C) rather then doing them 'manually' inside a Python loop. This also lets us only iterate over the points that will lead to hist being incremented.
You can also consider using a sparse data structure for hist if you think your cloud will have lots of empty space - memory allocation can become a bottleneck for very large data.
Did not scientifically benchmark this but appears to run ~2-3x faster (v2) and 6-8x faster (v3)! If you'd like all the points which are tied for the max. density, it would be easy to extract those from the Counter object.

Related

Interpolate values and replace with NaNs within a long gap?

I am trying to interpolate data with gaps. Sometimes the gap can be very large, and I do not want the interpolation to "succeed" within the gap; the result should be NaNs inside a large gap. For example, consider this example data set:
orig_x = [26219, 26225, 26232, 28521, 28538]
orig_y = [39, 40, 41, 72, 71]
which has clear gap between x-values 26232 and 28521. Now, I would like to have the orig_y interpolated to x-values like this:
import numpy as np
x_target = np.array(range(min(orig_x) // 10 * 10 + 10, max(orig_x) // 10 * 10 + 10, 10))
# array([26220, 26230, 26240, 26250, 26260, 26270, 26280, 26290,
# ...
# 28460, 28470, 28480, 28490, 28500, 28510, 28520, 28530])
and the output y_target should be np.nan everywhere else than at 26220, 26230 and 28520. Let's say that the condition for this would be that if there is a gap larger than 40 in the data, the interpolation should result to np.nan inside this data gap.
Goal shown as a picture
Instead of this
Get something like this
i.e. the "gap" in the data should result to np.nan instead of garbage data.
Question
What would be the best way (fastest interpolation) to achieve this kind of interpolation? The interpolation can be linear or more sophisticated (e.g. cubic spline). One possibility I have in mind would be to use the scipy.interpolate.interp1d as starting point like this
from scipy.interpolate import interp1d
f = interp1d(orig_x, orig_y, bounds_error=False)
y_target = f(x_target)
and then search for gaps in the data and replace the interpolated data with np.nan inside the gaps. Since I will be using this on fairly large dataset (~10M rows, few columns, handled in parts), performance is a key.
After some trial and error, a think I got a "fast enough" implementation using basic linear interpolation and numba for speedups. Forgive for writing everything in the same loop and same function, but it seems that is the numba way of making your code fast. (numba loves loops, and does not seem to accept nested functions)
Test data used
I added some mode data to x_target to test the algorithm performance.
orig_x = np.array([26219, 26225, 26232, 28521, 28538])
orig_y = np.array([39, 40, 41, 72, 71])
x_target = np.array(
np.arange(min(orig_x) // 10 * 10,
max(orig_x) // 10 * 10 + 10, 0.1))
Test code
from matplotlib import pyplot as plt
y_target = interpolate_with_max_gap(orig_x, orig_y, x_target, max_gap=40)
plt.scatter(x_target, y_target, label='interpolated', s=10)
plt.scatter(orig_x, orig_y, label='orig', s=10)
plt.legend()
plt.show()
Test results
The data is interpolated in regions with gap less than max_gap (40):
closeup:
Speed:
I first tried a pure python + numpy implementation, which took 49.6 ms with the same test data (using timeit). This implementation with numba takes 480µs (100x speedup!). When using target_x_is_sorted=True, the speed is 80.1µs!
The orig_x_sorted=True did not give speedup, probably since the orig_x is so short that sorting it does not make any difference in timing in this example.
Implementation
import numba
import numpy as np
#numba.njit()
def interpolate_with_max_gap(orig_x,
orig_y,
target_x,
max_gap=np.inf,
orig_x_is_sorted=False,
target_x_is_sorted=False):
"""
Interpolate data linearly with maximum gap. If there is
larger gap in data than `max_gap`, the gap will be filled
with np.nan.
The input values should not contain NaNs.
Parameters
---------
orig_x: np.array
The input x-data
orig_y: np.array
The input y-data
target_x: np.array
The output x-data; the data points in x-axis that
you want the interpolation results from.
max_gap: float
The maximum allowable gap in `orig_x` inside which
interpolation is still performed. Gaps larger than
this will be filled with np.nan in the output `target_y`.
orig_x_is_sorted: boolean, default: False
If True, the input data `orig_x` is assumed to be monotonically
increasing. Some performance gain if you supply sorted input data.
target_x_is_sorted: boolean, default: False
If True, the input data `target_x` is assumed to be
monotonically increasing. Some performance gain if you supply
sorted input data.
Returns
------
target_y: np.array
The interpolation results.
"""
if not orig_x_is_sorted:
# Sort to be monotonous wrt. input x-variable.
idx = orig_x.argsort()
orig_x = orig_x[idx]
orig_y = orig_y[idx]
if not target_x_is_sorted:
target_idx = target_x.argsort()
# Needed for sorting back the data.
target_idx_for_reverse = target_idx.argsort()
target_x = target_x[target_idx]
target_y = np.empty(target_x.size)
idx_orig = 0
orig_gone_through = False
for idx_target, x_new in enumerate(target_x):
# Grow idx_orig if needed.
while not orig_gone_through:
if idx_orig + 1 >= len(orig_x):
# Already consumed the orig_x; no more data
# so we would need to extrapolate
orig_gone_through = True
elif x_new > orig_x[idx_orig + 1]:
idx_orig += 1
else:
# x_new <= x2
break
if orig_gone_through:
target_y[idx_target] = np.nan
continue
x1 = orig_x[idx_orig]
y1 = orig_y[idx_orig]
x2 = orig_x[idx_orig + 1]
y2 = orig_y[idx_orig + 1]
if x_new < x1:
# would need to extrapolate to left
target_y[idx_target] = np.nan
continue
delta_x = x2 - x1
if delta_x > max_gap:
target_y[idx_target] = np.nan
continue
delta_y = y2 - y1
if delta_x == 0:
target_y[idx_target] = np.nan
continue
k = delta_y / delta_x
delta_x_new = x_new - x1
delta_y_new = k * delta_x_new
y_new = y1 + delta_y_new
target_y[idx_target] = y_new
if not target_x_is_sorted:
return target_y[target_idx_for_reverse]
return target_y

Is it possible to convert this numpy function to tensorflow?

I have a function that takes a [32, 32, 3] tensor, and outputs a [256,256,3] tensor.
Specifically, the function interprets the smaller array as if it was a .svg file, and 'renders' it to a 256x256 array as a canvas using this algorithm
For an explanation of WHY I would want to do this, see This question
The function behaves exactly as intended, until I try to include it in the training loop of a GAN. The current error I'm seeing is:
NotImplementedError: Cannot convert a symbolic Tensor (mul:0) to a numpy array.
A lot of other answers to similar errors seem to boil down to "You need to re-write the function using tensorflow, not numpy"
Here's the working code using numpy - is it possible to re-write it to exclusively use tensorflow functions?
def convert_to_bitmap(input_tensor, target, j):
#implied conversion to nparray - the tensorflow docs seem to indicate this is okay, but the error is thrown here when training
array = input_tensor
outputArray = target
output = target
for i in range(32):
col = float(array[i,0,j])
if ((float(array[i,0,0]))+(float(array[i,0,1]))+(float(array[i,0,2]))/3)< 0:
continue
#slice only the red channel from the i line, multiply by 255
red_array = array[i,:,0]*255
#slice only the green channel, multiply by 255
green_array = array[i,:,1]*255
#combine and flatten them
combined_array = np.dstack((red_array, green_array)).flatten()
#remove the first two and last two indices of the combined array
index = [0,1,62,63]
clipped_array = np.delete(combined_array,index)
#filter array to remove values less than 0
filtered = clipped_array > 0
filtered_array = clipped_array[filtered]
#check array has an even number of values, delete the last index if it doesn't
if len(filtered_array) % 2 == 0:
pass
else:
filtered_array = np.delete(filtered_array,-1)
#convert into a set of tuples
l = filtered_array.tolist()
t = list(zip(l, l[1:] + l[:1]))
if not t:
continue
output = fill_polygon(t, outputArray, col)
return(output)
The 'fill polygon' function is copied from the 'mahotas' library:
def fill_polygon(polygon, canvas, color):
if not len(polygon):
return
min_y = min(int(y) for y,x in polygon)
max_y = max(int(y) for y,x in polygon)
polygon = [(float(y),float(x)) for y,x in polygon]
if max_y < canvas.shape[0]:
max_y += 1
for y in range(min_y, max_y):
nodes = []
j = -1
for i,p in enumerate(polygon):
pj = polygon[j]
if p[0] < y and pj[0] >= y or pj[0] < y and p[0] >= y:
dy = pj[0] - p[0]
if dy:
nodes.append( (p[1] + (y-p[0])/(pj[0]-p[0])*(pj[1]-p[1])) )
elif p[0] == y:
nodes.append(p[1])
j = i
nodes.sort()
for n,nn in zip(nodes[::2],nodes[1::2]):
nn += 1
canvas[y, int(n):int(nn)] = color
return(canvas)
NOTE: I'm not trying to get someone to convert the whole thing for me! There are some functions that are pretty obvious (tf.stack instead of np.dstack), but others that I don't even know how to start, like the last few lines of the fill_polygon function above.
Yes you can actually do this, you can use a python function in sth called tf.pyfunc. Its a python wrapper but its extremely slow in comparison to plain tensorflow. However, tensorflow and Cuda for example are so damn fast because they use stuff like vectorization, meaning you can rewrite a lot , really many of the loops in terms of mathematical tensor operations which are very fast.
In general:
If you want to use custom code as a custom layer, i would recommend you to rethink the algebra behind those loops and try to express them somehow different. If its just preprocessing before the training is going to start, you can use tensorflow but doing the same with numpy and other libraries is easier.
To your main question: Yes its possible, but better dont use loops. Tensorflow has a build-in loop optimizer but then you have to use tf.while() and thats anyoing (maybe just for me). I just blinked over your code, but it looks like you should be able to vectorize it quite good using the standard tensorflow vocabulary. If you want it fast, i mean really fast with GPU support write all in tensorflow, but nothing like 50/50 with tf.convert_to_tensor(), because than its going to be slow again. because than you switch between GPU and CPU and plain Python interpreter and the tensorflow low level API. Hope i could help you at least a bit
This code 'works', in that it only uses tensorflow functions, and does allow the model to train when used in a training loop:
def convert_image (x):
#split off the first column of the generator output, and store it for later (remove the 'colours' column)
colours_column = tf.slice(img_to_convert, tf.constant([0,0,0], dtype=tf.int32), tf.constant([32,1,3], dtype=tf.int32))
#split off the rest of the data, only keeping R + G, and discarding B
image_data_red = tf.slice(img_to_convert, tf.constant([0,1,0], dtype=tf.int32), tf.constant([32,31,1], dtype=tf.int32))
image_data_green = tf.slice(img_to_convert, tf.constant([0,1,1], dtype=tf.int32), tf.constant([32, 31,1], dtype=tf.int32))
#roll each row by 1 position, and make two more 2D tensors
rolled_red = tf.roll(image_data_red, shift=-1, axis=0)
rolled_green = tf.roll(image_data_green, shift=-1, axis=0)
#remove all values where either the red OR green channels are 0
zeroes = tf.constant(0, dtype=tf.float32)
#this is for the 'count_nonzero' command
boolean_red_data = tf.not_equal(image_data_red, zeroes)
boolean_green_data = tf.not_equal(image_data_green, zeroes)
initial_data_mask = tf.logical_and(boolean_red_data, boolean_green_data)
#count non-zero values per row and flatten it
count = tf.math.count_nonzero(initial_data_mask, 1)
count_flat = tf.reshape(count, [-1])
flat_red = tf.reshape(image_data_red, [-1])
flat_green = tf.reshape(image_data_green, [-1])
boolean_red = tf.math.logical_not(tf.equal(flat_red, tf.zeros_like(flat_red)))
boolean_green = tf.math.logical_not(tf.equal(flat_green, tf.zeros_like(flat_red)))
mask = tf.logical_and(boolean_red, boolean_green)
flat_red_without_zero = tf.boolean_mask(flat_red, mask)
flat_green_without_zero = tf.boolean_mask(flat_green, mask)
# create a ragged tensor
X0_ragged = tf.RaggedTensor.from_row_lengths(values=flat_red_without_zero, row_lengths=count_flat)
Y0_ragged = tf.RaggedTensor.from_row_lengths(values=flat_green_without_zero, row_lengths=count_flat)
#do the same for the rolled version
rolled_data_mask = tf.roll(initial_data_mask, shift=-1, axis=1)
flat_rolled_red = tf.reshape(rolled_red, [-1])
flat_rolled_green = tf.reshape(rolled_green, [-1])
#from SO "shift zeros to the end"
boolean_rolled_red = tf.math.logical_not(tf.equal(flat_rolled_red, tf.zeros_like(flat_rolled_red)))
boolean_rolled_green = tf.math.logical_not(tf.equal(flat_rolled_green, tf.zeros_like(flat_rolled_red)))
rolled_mask = tf.logical_and(boolean_rolled_red, boolean_rolled_green)
flat_rolled_red_without_zero = tf.boolean_mask(flat_rolled_red, rolled_mask)
flat_rolled_green_without_zero = tf.boolean_mask(flat_rolled_green, rolled_mask)
# create a ragged tensor
X1_ragged = tf.RaggedTensor.from_row_lengths(values=flat_rolled_red_without_zero, row_lengths=count_flat)
Y1_ragged = tf.RaggedTensor.from_row_lengths(values=flat_rolled_green_without_zero, row_lengths=count_flat)
#available outputs for future use are:
X0 = X0_ragged.to_tensor(default_value=0.)
Y0 = Y0_ragged.to_tensor(default_value=0.)
X1 = X1_ragged.to_tensor(default_value=0.)
Y1 = Y1_ragged.to_tensor(default_value=0.)
#Example tensor cel (replace with (x))
P = tf.cast(x, dtype=tf.float32)
#split out P.x and P.y, and fill a ragged tensor to the same shape as Rx
Px_value = tf.cast(x, dtype=tf.float32) - tf.cast((tf.math.floor(x/255)*255), dtype=tf.float32)
Py_value = tf.cast(tf.math.floor(x/255), dtype=tf.float32)
Px = tf.squeeze(tf.ones_like(X0)*Px_value)
Py = tf.squeeze(tf.ones_like(Y0)*Py_value)
#for each pair of values (Y0, Y1, make a vector, and check to see if it crosses the y-value (Py) either up or down
a = tf.math.less(Y0, Py)
b = tf.math.greater_equal(Y1, Py)
c = tf.logical_and(a, b)
d = tf.math.greater_equal(Y0, Py)
e = tf.math.less(Y1, Py)
f = tf.logical_and(d, e)
g = tf.logical_or(c, f)
#Makes boolean bitwise mask
#calculate the intersection of the line with the y-value, assuming it intersects
#P.x <= (G.x - R.x) * (P.y - R.y) / (G.y - R.y + R.x) - use tf.divide_no_nan for safe divide
h = tf.math.less(Px,(tf.math.divide_no_nan(((X1-X0)*(Py-Y0)),(Y1-Y0+X0))))
#combine using AND with the mask above
i = tf.logical_and(g,h)
#tf.count_nonzero
#reshape to make a column tensor with the same dimensions as the colours
#divide by 2 using tf.floor_mod (returns remainder of division - any remainder means the value is odd, and hence the point is IN the polygon)
final_count = tf.cast((tf.math.count_nonzero(i, 1)), dtype=tf.int32)
twos = tf.ones_like(final_count, dtype=tf.int32)*tf.constant([2], dtype=tf.int32)
divide = tf.cast(tf.math.floormod(final_count, twos), dtype=tf.int32)
index = tf.cast(tf.range(0,32, delta=1), dtype=tf.int32)
clipped_index = divide*index
sort = tf.sort(clipped_index)
reverse = tf.reverse(sort, [-1])
value = tf.slice(reverse, [0], [1])
pair = tf.constant([0], dtype=tf.int32)
slice_tensor = tf.reshape(tf.stack([value, pair, pair], axis=0),[-1])
output_colour = tf.slice(colours_column, slice_tensor, [1,1,3])
return output_colour
This is where the 'convert image' function is applied using tf.vectorize_map:
def convert_images(image_to_convert):
global img_to_convert
img_to_convert = image_to_convert
process_list = tf.reshape((tf.range(0,65536, delta=1, dtype=tf.int32)), [65536, 1])
output_line = tf.vectorized_map(convert_image, process_list)
output_line_squeezed = tf.squeeze(output_line)
output_reshape = (tf.reshape(output_line_squeezed, [256,256,3])/127.5)-1
output = tf.expand_dims(output_reshape, axis=0)
return output
It is PAINFULLY slow, though - It does not appear to be using the GPU, and looks to be single threaded as well.
I'm adding it as an answer to my own question because is clearly IS possible to do this numpy function entirely in tensorflow - it just probably shouldn't be done like this.

Converting `for` loop that can't be vectorized to sparse matrix

There are 2 boxes and a small gap that allows 1 particle per second from one box to enter the other box. Whether a particle will go from A to B, or B to A depends on the ratio Pa/Ptot (Pa: number of particles in box A, Ptot: total particles in both boxes).
To make it faster, I need to get rid of the for loops, however I can't find a way to either vectorize them or turn them into a sparse matrix that represents my for loop:
What about for loops you can't vectorize? The ones where the result at iteration n depends on what you calculated in iteration n-1, n-2, etc. You can define a sparse matrix that represents your for loop and then do a sparse matrix solve.
But I can't figure out how to define a sparse matrix out of this. The simulation boils down to calculating:
where
is the piece that gives me trouble when trying to express my problem as described here. (Note: the contents in the parenthesis are a bool operation)
Questions:
Can I vectorize the for loop?
If not, how can I define a sparse matrix?
(bonus question) Why is the execution time x27 faster in Python (0.027s) than Octave (0.75s)?
Note: I implemented the simulation in both Python and Octave and will soon do it on Matlab, therefor the tags are correct.
Octave code
1; % starting with `function` causes errors
function arr = Px_simulation (Pa_init, Ptot, t_arr)
t_size = size(t_arr);
arr = zeros(t_size); % fixed size array is better than arr = []
rand_arr = rand(t_size); % create all rand values at once
_Pa = Pa_init;
for _j=t_arr()
if (rand_arr(_j) * Ptot > _Pa)
_Pa += 1;
else
_Pa -= 1;
endif
arr(_j) = _Pa;
endfor
endfunction
t = 1:10^5;
for _i=1:3
Ptot = 100*10^_i;
tic()
Pa_simulation = Px_simulation(Ptot, Ptot, t);
toc()
subplot(2,2,_i);
plot(t, Pa_simulation, "-2;simulation;")
title(strcat("{P}_{a0}=", num2str(Ptot), ',P=', num2str(Ptot)))
endfor
Python
import numpy
import matplotlib.pyplot as plt
import timeit
import cpuinfo
from random import random
print('\nCPU: {}'.format(cpuinfo.get_cpu_info()['brand']))
PARTICLES_COUNT_LST = [1000, 10000, 100000]
DURATION = 10**5
t_vals = numpy.linspace(0, DURATION, DURATION)
def simulation(na_initial, ntotal, tvals):
shape = numpy.shape(tvals)
arr = numpy.zeros(shape)
na_current = na_initial
for i in range(len(tvals)):
if random() > (na_current/ntotal):
na_current += 1
else:
na_current -= 1
arr[i] = na_current
return arr
plot_lst = []
for i in PARTICLES_COUNT_LST:
start_t = timeit.default_timer()
n_a_simulation = simulation(na_initial=i, ntotal=i, tvals=t_vals)
execution_time = (timeit.default_timer() - start_t)
print('Execution time: {:.6}'.format(execution_time))
plot_lst.append(n_a_simulation)
for i in range(len(PARTICLES_COUNT_LST)):
plt.subplot('22{}'.format(i))
plt.plot(t_vals, plot_lst[i], 'r')
plt.grid(linestyle='dotted')
plt.xlabel("time [s]")
plt.ylabel("Particles in box A")
plt.show()
IIUC you can use cumsum() in both Octave and Numpy:
Octave:
>> p = rand(1, 5);
>> r = rand(1, 5);
>> p
p =
0.43804 0.37906 0.18445 0.88555 0.58913
>> r
r =
0.70735 0.41619 0.37457 0.72841 0.27605
>> cumsum (2*(p<(r+0.03)) - 1)
ans =
1 2 3 2 1
>> (2*(p<(r+0.03)) - 1)
ans =
1 1 1 -1 -1
Also note that the following function will return values ([-1, 1]):

Random walk series between start-end values and within minimum/maximum limits

How can i generate a random walk data between a start-end values
while not passing over the maximum value and not going under the minimum value?
Here is my attempt to do this but for some reason sometimes the series goes over the max or under the min values. It seems that the Start and the End value are respected but not the minimum and the maximum value. How can this be fixed? Also i would like to give the standard deviation for the fluctuations but don't know how. I use a randomPerc for fluctuation but this is wrong as i would like to specify the std instead.
import numpy as np
import matplotlib.pyplot as plt
def generateRandomData(length,randomPerc, min,max,start, end):
data_np = (np.random.random(length) - randomPerc).cumsum()
data_np *= (max - min) / (data_np.max() - data_np.min())
data_np += np.linspace(start - data_np[0], end - data_np[-1], len(data_np))
return data_np
randomData=generateRandomData(length = 1000, randomPerc = 0.5, min = 50, max = 100, start = 66, end = 80)
## print values
print("Max Value",randomData.max())
print("Min Value",randomData.min())
print("Start Value",randomData[0])
print("End Value",randomData[-1])
print("Standard deviation",np.std(randomData))
## plot values
plt.figure()
plt.plot(range(randomData.shape[0]), randomData)
plt.show()
plt.close()
Here is a simple loop which checks for series that go under the minimum or over the maximum value. This is exactly what i am trying to avoid. The series should be distributed between the given limits for min and max values.
## generate 1000 series and check if there are any values over the maximum limit or under the minimum limit
for i in range(1000):
randomData = generateRandomData(length = 1000, randomPerc = 0.5, min = 50, max = 100, start = 66, end = 80)
if(randomData.min() < 50):
print(i, "Value Lower than Min limit")
if(randomData.max() > 100):
print(i, "Value Higher than Max limit")
As you impose conditions on your walk, it can not be considered purely random. Anyway, one way is to generate the walk iteratively, and check the boundaries on each iteration. But if you wanted a vectorized solution, here it is:
def bounded_random_walk(length, lower_bound, upper_bound, start, end, std):
assert (lower_bound <= start and lower_bound <= end)
assert (start <= upper_bound and end <= upper_bound)
bounds = upper_bound - lower_bound
rand = (std * (np.random.random(length) - 0.5)).cumsum()
rand_trend = np.linspace(rand[0], rand[-1], length)
rand_deltas = (rand - rand_trend)
rand_deltas /= np.max([1, (rand_deltas.max()-rand_deltas.min())/bounds])
trend_line = np.linspace(start, end, length)
upper_bound_delta = upper_bound - trend_line
lower_bound_delta = lower_bound - trend_line
upper_slips_mask = (rand_deltas-upper_bound_delta) >= 0
upper_deltas = rand_deltas - upper_bound_delta
rand_deltas[upper_slips_mask] = (upper_bound_delta - upper_deltas)[upper_slips_mask]
lower_slips_mask = (lower_bound_delta-rand_deltas) >= 0
lower_deltas = lower_bound_delta - rand_deltas
rand_deltas[lower_slips_mask] = (lower_bound_delta + lower_deltas)[lower_slips_mask]
return trend_line + rand_deltas
randomData = bounded_random_walk(1000, lower_bound=50, upper_bound =100, start=50, end=100, std=10)
You can see it as a solution of geometric problem. The trend_line is connecting your start and end points, and have margins defined by lower_bound and upper_bound. rand is your random walk, rand_trend it's trend line and rand_deltas is it's deviation from the rand trend line. We collocate the trend lines, and want to make sure that deltas don't exceed margins. When rand_deltas exceeds the allowed margin, we "fold" the excess back to the bounds.
At the end you add the resulting random deltas to the start=>end trend line, thus receiving the desired bounded random walk.
The std parameter corresponds to the amount of variance of the random walk.
update : fixed assertions
In this version "std" is not promised to be the "interval".
I noticed you used built in functions as arguments (min and max) which is not reccomended (I changed these to max_1 and min_1). Other than this your code should work as expected:
def generateRandomData(length,randomPerc, min_1,max_1,start, end):
data_np = (np.random.random(length) - randomPerc).cumsum()
data_np *= (max_1 - min_1) / (data_np.max() - data_np.min())
data_np += np.linspace(start - data_np[0], end - data_np[-1],len(data_np))
return data_np
randomData=generateRandomData(1000, 0.5, 50, 100, 66, 80)
If you are willing to modify your code this will work:
import random
for_fill=[]
# generate 1000 samples within the specified range and save them in for_fill
for x in range(1000):
generate_rnd_df=random.uniform(50,100)
for_fill.append(generate_rnd_df)
#set starting and end point manually
for_fill[0]=60
for_fill[999]=80
Here is one way, very crudely expressed in code.
>>> import random
>>> steps = 1000
>>> start = 66
>>> end = 80
>>> step_size = (50,100)
Generate 1,000 steps assured to be within the required range.
>>> crude_walk_steps = [random.uniform(*step_size) for _ in range(steps)]
>>> import numpy as np
Turn these steps into a walk but notice that they fail to meet the requirements.
>>> crude_walk = np.cumsum(crude_walk_steps)
>>> min(crude_walk)
57.099056617839288
>>> max(crude_walk)
75048.948693623403
Calculate a simple linear transformation to scale the steps.
>>> from sympy import *
>>> var('a b')
(a, b)
>>> solve([57.099056617839288*a+b-66,75048.948693623403*a+b-80])
{b: 65.9893403510312, a: 0.000186686954219243}
Scales the steps.
>>> walk = [0.000186686954219243*_+65.9893403510312 for _ in crude_walk]
Verify that the walk now starts and stops where intended.
>>> min(walk)
65.999999999999986
>>> max(walk)
79.999999999999986
You can also generate a stream of random walks and filter out those that do not meet your constraints. Just be aware that by filtering they are not really 'random' anymore.
The code below creates an infinite stream of 'valid' random walks. Be careful with
very tight constraints, the 'next' call might take a while ;).
import itertools
import numpy as np
def make_random_walk(first, last, min_val, max_val, size):
# Generate a sequence of random steps of lenght `size-2`
# that will be taken bewteen the start and stop values.
steps = np.random.normal(size=size-2)
# The walk is the cumsum of those steps
walk = steps.cumsum()
# Performing the walk from the start value gives you your series.
series = walk + first
# Compare the target min and max values with the observed ones.
target_min_max = np.array([min_val, max_val])
observed_min_max = np.array([series.min(), series.max()])
# Calculate the absolute 'overshoot' for min and max values
f = np.array([-1, 1])
overshoot = (observed_min_max*f - target_min_max*f)
# Calculate the scale factor to constrain the walk within the
# target min/max values.
# Don't upscale.
correction_base = [walk.min(), walk.max()][np.argmax(overshoot)]
scale = min(1, (correction_base - overshoot.max()) / correction_base)
# Generate the scaled series
new_steps = steps * scale
new_walk = new_steps.cumsum()
new_series = new_walk + first
# Check the size of the final step necessary to reach the target endpoint.
last_step_size = abs(last - new_series[-1]) # step needed to reach desired end
# Is it larger than the largest previously observed step?
if last_step_size > np.abs(new_steps).max():
# If so, consider this series invalid.
return None
else:
# Else, we found a valid series that meets the constraints.
return np.concatenate((np.array([first]), new_series, np.array([last])))
start = 66
stop = 80
max_val = 100
min_val = 50
size = 1000
# Create an infinite stream of candidate series
candidate_walks = (
(i, make_random_walk(first=start, last=stop, min_val=min_val, max_val=max_val, size=size))
for i in itertools.count()
)
# Filter out the invalid ones.
valid_walks = ((i, w) for i, w in candidate_walks if w is not None)
idx, walk = next(valid_walks) # Get the next valid series
print(
"Walk #{}: min/max({:.2f}/{:.2f})"
.format(idx, walk.min(), walk.max())
)

Vectorization of python numpy code is making it slower instead of faster

I am trying to perform image correlation to find which frame out of a set of 20 frames (the set is stored in a 3D array, x) matches best with a given frame (stored as a 2D array, y). This step has to be performed 1000 times.
I tried to vectorize the code to make it run faster. But somehow the vectorization is making the code take twice as long. I am probably doing something wrong in the vectorization process which is making it slower.
Here is the code
import numpy as np
import time
def corr2(a,b):
#Getting shapes and prealocating the auxillairy variables
k = np.shape(a)
#Calculating mean values
AM=np.mean(a)
BM=np.mean(b)
#calculate vectors
c_vect = (a-AM)*(b-BM)
d_vect = (a-AM)**2
e_vect = (b-BM)**2
#Formula itself
r_out = np.sum(c_vect)/float(np.sqrt(np.sum(d_vect)*np.sum(e_vect)))
return r_out
def ZZ_1X_v1_MCC(R,RefImage):
from img_proccessing import corr2
Cor = np.zeros(R.shape[2])
for t in range(R.shape[2]):
Cor[t]=corr2(RefImage,R[:,:,t]) #Correlation
#report
max_correlationvalue_intermediate = np.amax(Cor)
max_correlatedframe_intermediate = np.argmax(Cor)
max_correlatedframeandvalue = [max_correlatedframe_intermediate,max_correlationvalue_intermediate];
return max_correlatedframeandvalue
def ZZ_1X_v1_MCC_vectorized(R,RefImage):
R_shape = np.asarray(np.shape(R))
R_flattened = R.swapaxes(0,2).reshape(R_shape[2],R_shape[0]*R_shape[1])
AA = np.transpose(R_flattened)
RefImageflattened = RefImage.transpose().ravel()
#Calculating mean subtracted values
AAM = AA - np.mean(AA,axis=0)
BM = RefImageflattened - np.mean(RefImageflattened)
#calculate vectors
DD_vect = AAM**2
E_vect = BM**2
EE_vect = np.transpose(np.tile(np.transpose(E_vect),(R_shape[2],1)))
CC_vect = AAM*np.transpose(np.tile(BM,(R_shape[2],1)))
#Formula itself
Cor = np.sum(CC_vect,axis=0)/np.sqrt((np.sum(DD_vect,axis=0)*np.sum(EE_vect,axis=0)).astype(float))
#report
max_correlationvalue_intermediate = np.amax(Cor)
max_correlatedframe_intermediate = np.argmax(Cor)
max_correlatedframeandvalue = [max_correlatedframe_intermediate,max_correlationvalue_intermediate];
return max_correlatedframeandvalue
x = np.arange(400000).reshape((20,200,100)).swapaxes(0,2) #3D array with 20 frames
y = np.transpose(np.arange(20000).reshape((200,100))) #2D array with 1 frame
# using for loop
tic = time.time()
for i in range(500):
[a,b] = ZZ_1X_v1_MCC(x,y)
print(time.time() - tic)
# using vectorization
tic = time.time()
for i in range(500):
[a,b] = ZZ_1X_v1_MCC_vectorized(x,y)
print(time.time() - tic)

Categories

Resources