Local reduce with specified slices over a single axis in tensorflow

Local reduce with specified slices over a single axis in tensorflow - python

I am trying to perform a local reduce with specified slices over a single axis on a 2D array.
I achieved this using numpy's numpy.ufunc.reduceat or numpy.add.reduceat but I would like do the same in tensorflow as the input to this reduce operation is an output from tensorflow convolution.
I came across tf.math.reduce_sum but I am not sure how this can be used in my case.
It will be great if I can do the reduceat operation in tensorflow as I can take advantage of a GPU.

You can do almost the same using tf.math.segment_sum:
import tensorflow as tf
import numpy as np
def add_reduceat_tf(a, indices, axis=0):
a = tf.convert_to_tensor(a)
indices = tf.convert_to_tensor(indices)
# Transpose if necessary
transpose = not (isinstance(axis, int) and axis == 0)
if transpose:
axis = tf.convert_to_tensor(axis)
ndims = tf.cast(tf.rank(a), axis.dtype)
a = tf.transpose(a, tf.concat([[axis], tf.range(axis),
tf.range(axis + 1, ndims)], axis=0))
# Make segment ids
r = tf.range(tf.shape(a, out_type=indices.dtype)[0])
segments = tf.searchsorted(indices, r, side='right')
# Compute segmented sum and discard first unused segment
out = tf.math.segment_sum(a, segments)[1:]
# Transpose back if necessary
if transpose:
out = tf.transpose(out, tf.concat([tf.range(1, axis + 1), [0],
tf.range(axis + 1, ndims)], axis=0))
return out
# Test
np.random.seed(0)
a = np.random.rand(5, 10).astype(np.float32)
indices = [2, 4, 7]
axis = 1
# NumPy computation
out_np = np.add.reduceat(a, indices, axis=axis)
# TF computation
with tf.Graph().as_default(), tf.Session() as sess:
out = add_reduceat_tf(a, indices, axis=axis)
out_tf = sess.run(out)
# Check result
print(np.allclose(out_np, out_tf))
# True
You can replace tf.math.segment_sum above with the reduction function you want to use. The only difference between this and the actual np.ufunc.reduceat is the special case where indices[i] >= indices[i + 1]. The posted function requires indices to be sorted, and if there were a case where indices[i] == indices[i + 1] the corresponding i position in the output would be zero, not a[indices[i]].

Related

Multidimensional Tensor slicing

First things first: I'm relatively new to TensorFlow.
I'm trying to implement a custom layer in tensorflow.keras and I'm having relatively hard time when I try to achieve the following:
I've got 3 Tensors (x,y,z) of shape (?,49,3,3,32) [where ? is the batch size]
On each Tensor I compute the sum over the 3rd and 4th axes [thus I end up with 3 Tensors of shape (?,49,32)]
By doing an argmax (A)on the above 3 Tensors (?,49,32) I get a single (?,49,32) Tensor
Now I want to use this tensor to select slices from the initial x,y,z Tensors in the following form:
Each element in the last dimension of A corresponds to the selected Tensor.
(aka: 0 = X, 1 = Y, 2 = Z)
The index of the last dimension of A corresponds to the slice that I would like to extract from the Tensor last dimension.
I've tried to achieve the above using tf.gather but I had no luck. Then I tried using a series of tf.map_fn, which is ugly and computationally costly.
To simplify the above:
let's say we've got an A array of shape (3,3,3,32). Then the numpy equivalent of what I try to achieve is this:
import numpy as np
x = np.random.rand(3,3,32)
y = np.random.rand(3,3,32)
z = np.random.rand(3,3,32)
x_sums = np.sum(np.sum(x,axis=0),0);
y_sums = np.sum(np.sum(y,axis=0),0);
z_sums = np.sum(np.sum(z,axis=0),0);
max_sums = np.argmax([x_sums,y_sums,z_sums],0)
A = np.array([x,y,z])
tmp = []
for i in range(0,len(max_sums)):
tmp.append(A[max_sums[i],:,:,i)
output = np.transpose(np.stack(tmp))
Any suggestions?
ps: I tried tf.gather_nd but I had no luck

This is how you can do something like that with tf.gather_nd:
import tensorflow as tf
# Make example data
tf.random.set_seed(0)
b = 10 # Batch size
x = tf.random.uniform((b, 49, 3, 3, 32))
y = tf.random.uniform((b, 49, 3, 3, 32))
z = tf.random.uniform((b, 49, 3, 3, 32))
# Stack tensors together
data = tf.stack([x, y, z], axis=2)
# Put reduction axes last
data_t = tf.transpose(data, (0, 1, 5, 2, 3, 4))
# Reduce
s = tf.reduce_sum(data_t, axis=(4, 5))
# Find largest sums
idx = tf.argmax(s, 3)
# Make gather indices
data_shape = tf.shape(data_t, idx.dtype)
bb, ii, jj = tf.meshgrid(*(tf.range(data_shape[i]) for i in range(3)), indexing='ij')
# Gather result
output_t = tf.gather_nd(data_t, tf.stack([bb, ii, jj, idx], axis=-1))
# Reorder axes
output = tf.transpose(output_t, (0, 1, 3, 4, 2))
print(output.shape)
# TensorShape([10, 49, 3, 3, 32])

Softmax function of a numpy array by row

I am trying to apply a softmax function to a numpy array. But I am not getting the desired results. This is the code I have tried:
import numpy as np
x = np.array([[1001,1002],[3,4]])
softmax = np.exp(x - np.max(x))/(np.sum(np.exp(x - np.max(x)))
print softmax
I think the x - np.max(x) code is not subtracting the max of each row. The max needs to be subtracted from x to prevent very large numbers.
This is supposed to output
np.array([
[0.26894142, 0.73105858],
[0.26894142, 0.73105858]])
But I am getting:
np.array([
[0.26894142, 0.73105858],
[0, 0]])

A convenient way to keep the axes that are consumed by "reduce" operations such as max or sum is the keepdims keyword:
mx = np.max(x, axis=-1, keepdims=True)
mx
# array([[1002],
# [ 4]])
x - mx
# array([[-1, 0],
# [-1, 0]])
numerator = np.exp(x - mx)
denominator = np.sum(numerator, axis=-1, keepdims=True)
denominator
# array([[ 1.36787944],
# [ 1.36787944]])
numerator/denominator
# array([[ 0.26894142, 0.73105858],
[ 0.26894142, 0.73105858]])

My 5-liner (which uses scipy logsumexp for the tricky bits):
def softmax(a, axis=None):
"""
Computes exp(a)/sumexp(a); relies on scipy logsumexp implementation.
:param a: ndarray/tensor
:param axis: axis to sum over; default (None) sums over everything
"""
from scipy.special import logsumexp
lse = logsumexp(a, axis=axis) # this reduces along axis
if axis is not None:
lse = np.expand_dims(lse, axis) # restore that axis for subtraction
return np.exp(a - lse)
You may have to use from scipy.misc import logsumexp if you have an older scipy version.

EDIT. As of version 1.2.0, scipy includes softmax as a special function:
https://scipy.github.io/devdocs/generated/scipy.special.softmax.html
I wrote a very general softmax function operating over an arbitrary axis, including the tricky max subtraction bit. The function is below, and I wrote a blog post about it here.
def softmax(X, theta = 1.0, axis = None):
"""
Compute the softmax of each element along an axis of X.
Parameters
----------
X: ND-Array. Probably should be floats.
theta (optional): float parameter, used as a multiplier
prior to exponentiation. Default = 1.0
axis (optional): axis to compute values along. Default is the
first non-singleton axis.
Returns an array the same size as X. The result will sum to 1
along the specified axis.
"""
# make X at least 2d
y = np.atleast_2d(X)
# find axis
if axis is None:
axis = next(j[0] for j in enumerate(y.shape) if j[1] > 1)
# multiply y against the theta parameter,
y = y * float(theta)
# subtract the max for numerical stability
y = y - np.expand_dims(np.max(y, axis = axis), axis)
# exponentiate y
y = np.exp(y)
# take the sum along the specified axis
ax_sum = np.expand_dims(np.sum(y, axis = axis), axis)
# finally: divide elementwise
p = y / ax_sum
# flatten if X was 1D
if len(X.shape) == 1: p = p.flatten()
return p

The x - np.max(x) code is not doing row-wise subtraction.
Let's do it step-wise. First we will make a 'maxes' array by tiling or making a copy of the column:
maxes = np.tile(np.max(x,1), (2,1)).T
This will create a 2X2 matrix which will correspond to the maxes for each row by making a duplicate column(tile). After this you can do:
x = np.exp(x - maxes)/(np.sum(np.exp(x - maxes), axis = 1))
You should get your result with this. The axis = 1 is for the row-wise softmax you mentioned in the heading of your answer. Hope this helps.

How about this?
For taking max along the rows just specify the argument as axis=1 and then convert the result as a column vector(but a 2D array actually) using np.newaxis/None.
In [40]: x
Out[40]:
array([[1001, 1002],
[ 3, 4]])
In [41]: z = x - np.max(x, axis=1)[:, np.newaxis]
In [42]: z
Out[42]:
array([[-1, 0],
[-1, 0]])
In [44]: softmax = np.exp(z) / np.sum(np.exp(z), axis=1)[:, np.newaxis]
In [45]: softmax
Out[45]:
array([[ 0.26894142, 0.73105858],
[ 0.26894142, 0.73105858]])
In the last step, again when you take sum just specify the argument axis=1 to sum it along the rows.

alternative to numpy.gradient

I have been struggling to find a robust function to compute gradient for a 3D array. numpy.gradient supports up to 2nd order accuracy. Is there any alternative way to compute the gradient with a better accuracy? thanks.

Finally I found this: 4th order gradient.
Wish numpy would integrate this, too...
https://gist.github.com/deeplycloudy/1b9fa46d5290314d9be02a5156b48741
def gradientO4(f, *varargs):
"""Calculate the fourth-order-accurate gradient of an N-dimensional scalar function.
Uses central differences on the interior and first differences on boundaries
to give the same shape.
Inputs:
f -- An N-dimensional array giving samples of a scalar function
varargs -- 0, 1, or N scalars giving the sample distances in each direction
Outputs:
N arrays of the same shape as f giving the derivative of f with respect
to each dimension.
"""
N = len(f.shape) # number of dimensions
n = len(varargs)
if n == 0:
dx = [1.0]*N
elif n == 1:
dx = [varargs[0]]*N
elif n == N:
dx = list(varargs)
else:
raise SyntaxError, "invalid number of arguments"
# use central differences on interior and first differences on endpoints
#print dx
outvals = []
# create slice objects --- initially all are [:, :, ..., :]
slice0 = [slice(None)]*N
slice1 = [slice(None)]*N
slice2 = [slice(None)]*N
slice3 = [slice(None)]*N
slice4 = [slice(None)]*N
otype = f.dtype.char
if otype not in ['f', 'd', 'F', 'D']:
otype = 'd'
for axis in range(N):
# select out appropriate parts for this dimension
out = np.zeros(f.shape, f.dtype.char)
slice0[axis] = slice(2, -2)
slice1[axis] = slice(None, -4)
slice2[axis] = slice(1, -3)
slice3[axis] = slice(3, -1)
slice4[axis] = slice(4, None)
# 1D equivalent -- out[2:-2] = (f[:4] - 8*f[1:-3] + 8*f[3:-1] - f[4:])/12.0
out[slice0] = (f[slice1] - 8.0*f[slice2] + 8.0*f[slice3] - f[slice4])/12.0
slice0[axis] = slice(None, 2)
slice1[axis] = slice(1, 3)
slice2[axis] = slice(None, 2)
# 1D equivalent -- out[0:2] = (f[1:3] - f[0:2])
out[slice0] = (f[slice1] - f[slice2])
slice0[axis] = slice(-2, None)
slice1[axis] = slice(-2, None)
slice2[axis] = slice(-3, -1)
## 1D equivalent -- out[-2:] = (f[-2:] - f[-3:-1])
out[slice0] = (f[slice1] - f[slice2])
# divide by step size
outvals.append(out / dx[axis])
# reset the slice object in this dimension to ":"
slice0[axis] = slice(None)
slice1[axis] = slice(None)
slice2[axis] = slice(None)
slice3[axis] = slice(None)
slice4[axis] = slice(None)
if N == 1:
return outvals[0]
else:
return outvals

I would suggest using the symbolic library called Theano (http://deeplearning.net/software/theano/). It is primarily designed for neural networks and deep learning stuff, yet quite nicely fits what you want.
After installing theano, here is a simple code for computing the gradient of a 1-d vector. You can extend it to 3-d yourself.
import numpy as np
import theano
import theano.tensor as T
x = T.dvector('x')
J, updates = theano.scan(lambda i, x : (x[i+1] - x[i])/2, sequences=T.arange(x.shape[0] - 1), non_sequences=[x])
f = theano.function([x], J, updates=updates)
f(np.array([1, 2, 4, 7, 11, 16], dtype='float32'))
f(np.array([1, 2, 4, 7.12345, 11, 16], dtype='float32'))

I found this: FINDIFF -- A Python package for finite difference numerical derivatives and partial differential equations in any number of dimensions.
https://github.com/maroba/findiff
Cheers!

Compute pairwise distance in a batch without replicating tensor in Tensorflow?

I want to compute the pairwise square distance of a batch of feature in Tensorflow. I have a simple implementation using + and * operations by
tiling the original tensor :
def pairwise_l2_norm2(x, y, scope=None):
with tf.op_scope([x, y], scope, 'pairwise_l2_norm2'):
size_x = tf.shape(x)[0]
size_y = tf.shape(y)[0]
xx = tf.expand_dims(x, -1)
xx = tf.tile(xx, tf.pack([1, 1, size_y]))
yy = tf.expand_dims(y, -1)
yy = tf.tile(yy, tf.pack([1, 1, size_x]))
yy = tf.transpose(yy, perm=[2, 1, 0])
diff = tf.sub(xx, yy)
square_diff = tf.square(diff)
square_dist = tf.reduce_sum(square_diff, 1)
return square_dist
This function takes as input two matrices of size (m,d) and (n,d) and compute the squared distance between each row vector. The output is a matrix of size (m,n) with element 'd_ij = dist(x_i, y_j)'.
The problem is that I have a large batch and high dim features 'm, n, d' replicating the tensor consume a lot of memory.
I'm looking for another way to implement this without increasing the memory usage and just only store the final distance tensor. Kind of double looping the original tensor.

You can use some linear algebra to turn it into matrix ops. Note that what you need matrix D where a[i] is the ith row of your original matrix and
D[i,j] = (a[i]-a[j])(a[i]-a[j])'
You can rewrite that into
D[i,j] = r[i] - 2 a[i]a[j]' + r[j]
Where r[i] is squared norm of ith row of the original matrix.
In a system that supports standard broadcasting rules you can treat r as a column vector and write D as
D = r - 2 A A' + r'
In TensorFlow you could write this as
A = tf.constant([[1, 1], [2, 2], [3, 3]])
r = tf.reduce_sum(A*A, 1)
# turn r into column vector
r = tf.reshape(r, [-1, 1])
D = r - 2*tf.matmul(A, tf.transpose(A)) + tf.transpose(r)
sess = tf.Session()
sess.run(D)
result
array([[0, 2, 8],
[2, 0, 2],
[8, 2, 0]], dtype=int32)

Using squared_difference:
def squared_dist(A):
expanded_a = tf.expand_dims(A, 1)
expanded_b = tf.expand_dims(A, 0)
distances = tf.reduce_sum(tf.squared_difference(expanded_a, expanded_b), 2)
return distances
One thing I noticed is that this solution using tf.squared_difference gives me out of memory (OOM) for very large vectors, while the approach by #YaroslavBulatov doesn't. So, I think decomposing the operation yields a smaller memory footprint (which I thought squared_difference would handle better under the hood).

Here is a more general solution for two tensors of coordinates A and B:
def squared_dist(A, B):
assert A.shape.as_list() == B.shape.as_list()
row_norms_A = tf.reduce_sum(tf.square(A), axis=1)
row_norms_A = tf.reshape(row_norms_A, [-1, 1]) # Column vector.
row_norms_B = tf.reduce_sum(tf.square(B), axis=1)
row_norms_B = tf.reshape(row_norms_B, [1, -1]) # Row vector.
return row_norms_A - 2 * tf.matmul(A, tf.transpose(B)) + row_norms_B
Note that this is the square distance. If you want to change this to the Euclidean distance, perform a tf.sqrt on the result. If you want to do that, don't forget to add a small constant to compensate for the floating point instabilities: dist = tf.sqrt(squared_dist(A, B) + 1e-6).

If you want compute other method , then change the order of the tf modules.
def compute_euclidean_distance(x, y):
size_x = x.shape.dims[0]
size_y = y.shape.dims[0]
for i in range(size_x):
tile_one = tf.reshape(tf.tile(x[i], [size_y]), [size_y, -1])
eu_one = tf.expand_dims(tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(tile_one, y), 2), axis=1)), axis=0)
if i == 0:
d = eu_one
else:
d = tf.concat([d, eu_one], axis=0)
return d

Multi-dimensional gather in Tensorflow

The general solution to this question is being worked on in this github issue, but I was wondering if there are workarounds using tf.gather (or something else) to achieve array indexing using a multi-index. One solution I came up with was to broadcast multiply each index in the multi-idx with the cumulative product of the tensor shape, which produces indices suitable for indexing the flattened tensor:
import tensorflow as tf
import numpy as np
def __cumprod(l):
# Get the length and make a copy
ll = len(l)
l = [v for v in l]
# Reverse cumulative product
for i in range(ll-1):
l[ll-i-2] *= l[ll-i-1]
return l
def ravel_multi_index(tensor, multi_idx):
"""
Returns a tensor suitable for use as the index
on a gather operation on argument tensor.
"""
if not isinstance(tensor, (tf.Variable, tf.Tensor)):
raise TypeError('tensor should be a tf.Variable')
if not isinstance(multi_idx, list):
multi_idx = [multi_idx]
# Shape of the tensor in ints
shape = [i.value for i in tensor.get_shape()]
if len(shape) != len(multi_idx):
raise ValueError("Tensor rank is different "
"from the multi_idx length.")
# Work out the shape of each tensor in the multi_idx
idx_shape = [tuple(j.value for j in i.get_shape()) for i in multi_idx]
# Ensure that each multi_idx tensor is length 1
assert all(len(i) == 1 for i in idx_shape)
# Create a list of reshaped indices. New shape will be
# [1, 1, dim[0], 1] for the 3rd index in multi_idx
# for example.
reshaped_idx = [tf.reshape(idx, [1 if i !=j else dim[0]
for j in range(len(shape))])
for i, (idx, dim)
in enumerate(zip(multi_idx, idx_shape))]
# Figure out the base indices for each dimension
base = __cumprod(shape)
# Now multiply base indices by each reshaped index
# to produce the flat index
return (sum(b*s for b, s in zip(base[1:], reshaped_idx[:-1]))
+ reshaped_idx[-1])
# Shape and slice starts and sizes
shape = (Z, Y, X) = 4, 5, 6
Z0, Y0, X0 = 1, 1, 1
ZS, YS, XS = 3, 3, 4
# Numpy matrix and index
M = np.random.random(size=shape)
idx = [
np.arange(Z0, Z0+ZS).reshape(ZS,1,1),
np.arange(Y0, Y0+YS).reshape(1,YS,1),
np.arange(X0, X0+XS).reshape(1,1,XS),
]
# Tensorflow matrix and indices
TM = tf.Variable(M)
TF_flat_idx = ravel_multi_index(TM, [
tf.range(Z0, Z0+ZS),
tf.range(Y0, Y0+YS),
tf.range(X0, X0+XS)])
TF_data = tf.gather(tf.reshape(TM,[-1]), TF_flat_idx)
with tf.Session() as S:
S.run(tf.initialize_all_variables())
# Obtain data via flat indexing
data = S.run(TF_data)
# Check that it agrees with data obtained
# by numpy smart indexing
assert np.all(data == M[idx])
However, this only works on tensors of rank 3 due to this (current) limitation limiting broadcasts to tensors of rank 3.
At the moment I can only think of doing a chained gather, transpose, gather, transpose, gather, but this is unlikely to be efficient. e.g.
shape = (8, 9, 10)
A = tf.random_normal(shape)
data = tf.gather(tf.transpose(tf.gather(A, [1, 3]), [1,0,2]), ...)
Any ideas?

It sounds like you want gather_nd.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Local reduce with specified slices over a single axis in tensorflow - python

Related

Multidimensional Tensor slicing

Softmax function of a numpy array by row

alternative to numpy.gradient

Compute pairwise distance in a batch without replicating tensor in Tensorflow?

Multi-dimensional gather in Tensorflow

Categories

Resources