How to vectorize the sum? tensor[i,:,:,:] + tensor[i] - python

I want to vectorize the following code:
def style_noise(self, y, style):
n = torch.randn(y.shape)
for i in range(n.shape[0]):
n[i] = (n[i] - n.mean(dim=(1, 2, 3))[i]) * style.std(dim=(1, 2, 3))[i] / n.std(dim=(1, 2, 3))[i] + style.mean(dim=(1, 2, 3))[i]
noise = Variable(n, requires_grad=False).to(y.device)
return noise
I didn't find a way nice way of doing so.
y and style are 4d tensors, say style.shape = y.shape = [64, 3, 128, 128].
I want to return the noise tensor, noise.shape = [64, 3, 128, 128].
Please let me know in the comments if the question is not clear.

Your use case is exactly why the .mean and .std methods come with a keepdim parameter. You can make use of this to enable broadcasting semantics to vectorize things for you:
def style_noise(self, y, style):
n = torch.randn(y.shape)
n_mean = n.mean(dim=(1, 2, 3), keepdim=True)
n_std = n.std(dim=(1, 2, 3), keepdim=True)
style_mean = style.mean(dim=(1, 2, 3), keepdim=True)
style_std = style.std(dim=(1, 2, 3), keepdim=True)
n = (n - n_mean) * style_std / n_std + style_mean
noise = Variable(n, requires_grad=False).to(y.device)
return noise

To calculate mean and std for the whole tensor you set no arguments
m = t.mean(); print(m) # if you don't set the dim for the whole tensor
s = t.std(); print(s) # if you don't set the dim for the whole tensor
Then if your shape is 2,2,2 for instance, create tensors for broadcasting subtract and division.
ss = torch.empty(2,2,2).fill_(s)
print(ss)
mm = torch.empty(2,2,2).fill_(m)
print(mm)
At the moment keepdim is not working as expected when you don't set the dim.
m = t.mean(); print(m) # for the whole tensor
s = t.std(); print(s) # for the whole tensor
m = t.mean(dim=0); print(m) # 0 means columns mean
s = t.std(dim=0); print(s) # 0 means columns mean
m = t.mean(dim=1); print(m) # 1 means rows mean
s = t.std(dim=1); print(s) # 1 means rows mean
s = t.mean(keepdim=True);print(s) # will not work
m = t.std(keepdim=True);print(m) # will not work
If you set a dim as a tuple, then it will return mean for axes, you asked not for the whole.

Related

Multidimensional Tensor slicing

First things first: I'm relatively new to TensorFlow.
I'm trying to implement a custom layer in tensorflow.keras and I'm having relatively hard time when I try to achieve the following:
I've got 3 Tensors (x,y,z) of shape (?,49,3,3,32) [where ? is the batch size]
On each Tensor I compute the sum over the 3rd and 4th axes [thus I end up with 3 Tensors of shape (?,49,32)]
By doing an argmax (A)on the above 3 Tensors (?,49,32) I get a single (?,49,32) Tensor
Now I want to use this tensor to select slices from the initial x,y,z Tensors in the following form:
Each element in the last dimension of A corresponds to the selected Tensor.
(aka: 0 = X, 1 = Y, 2 = Z)
The index of the last dimension of A corresponds to the slice that I would like to extract from the Tensor last dimension.
I've tried to achieve the above using tf.gather but I had no luck. Then I tried using a series of tf.map_fn, which is ugly and computationally costly.
To simplify the above:
let's say we've got an A array of shape (3,3,3,32). Then the numpy equivalent of what I try to achieve is this:
import numpy as np
x = np.random.rand(3,3,32)
y = np.random.rand(3,3,32)
z = np.random.rand(3,3,32)
x_sums = np.sum(np.sum(x,axis=0),0);
y_sums = np.sum(np.sum(y,axis=0),0);
z_sums = np.sum(np.sum(z,axis=0),0);
max_sums = np.argmax([x_sums,y_sums,z_sums],0)
A = np.array([x,y,z])
tmp = []
for i in range(0,len(max_sums)):
tmp.append(A[max_sums[i],:,:,i)
output = np.transpose(np.stack(tmp))
Any suggestions?
ps: I tried tf.gather_nd but I had no luck
This is how you can do something like that with tf.gather_nd:
import tensorflow as tf
# Make example data
tf.random.set_seed(0)
b = 10 # Batch size
x = tf.random.uniform((b, 49, 3, 3, 32))
y = tf.random.uniform((b, 49, 3, 3, 32))
z = tf.random.uniform((b, 49, 3, 3, 32))
# Stack tensors together
data = tf.stack([x, y, z], axis=2)
# Put reduction axes last
data_t = tf.transpose(data, (0, 1, 5, 2, 3, 4))
# Reduce
s = tf.reduce_sum(data_t, axis=(4, 5))
# Find largest sums
idx = tf.argmax(s, 3)
# Make gather indices
data_shape = tf.shape(data_t, idx.dtype)
bb, ii, jj = tf.meshgrid(*(tf.range(data_shape[i]) for i in range(3)), indexing='ij')
# Gather result
output_t = tf.gather_nd(data_t, tf.stack([bb, ii, jj, idx], axis=-1))
# Reorder axes
output = tf.transpose(output_t, (0, 1, 3, 4, 2))
print(output.shape)
# TensorShape([10, 49, 3, 3, 32])

How to implement tf.nn.top_k with Numpy?

How can I implement the tensorflow function tf.nn.top_k with Numpy? Suppose the input is ndarray in format heigh x width x channel?
You can use the answer here with Numpy 1.8 and up.
I spent more time on this than I wanted, because the other answers treated the whole multidimensional array as a single search where top_k only looks at the last dimension. There's more information here, where the partition is used to specifically sort a given axis.
To summarize, based upon the tensorflow signature (without name):
def top_k(input, k=1, sorted=True):
"""Top k max pooling
Args:
input(ndarray): convolutional feature in heigh x width x channel format
k(int): if k==1, it is equal to normal max pooling
sorted(bool): whether to return the array sorted by channel value
Returns:
ndarray: k x (height x width)
ndarray: k
"""
ind = np.argpartition(input, -k)[..., -k:]
def get_entries(input, ind, sorted):
if len(ind.shape) == 1:
if sorted:
ind = ind[np.argsort(-input[ind])]
return input[ind], ind
output, ind = zip(*[get_entries(inp, id, sorted) for inp, id in zip(input, ind)])
return np.array(output), np.array(ind)
return get_entries(input, ind, sorted)
Keep in mind, for your answer, you tested with
arr = np.random.rand(3, 3, 3)
arr1, ind1 = top_k(arr)
arr2 = np.max(arr, axis=(0,1))
arr3, ind3 = tf.nn.top_k(arr)
print(arr1)
print(arr2)
print(arr3.numpy())
but arr2.shape is (3,) and arr3.numpy().shape is (3, 3, 1).
If you really want tf.nn.top_k like functionality, you should use np.array_equal(arr3, np.max(arr, axis=-1, keepdims=True)) as the test. I ran this with tf.enable_eager_execution() executed, hence the .numpy() instead of .eval().
import numpy as np
def top_k(input, k=1):
"""Top k max pooling
Args:
input(ndarray): convolutional feature in heigh x width x channel format
k(int): if k==1, it is equal to normal max pooling
Returns:
ndarray: k x (height x width)
"""
input = np.reshape(input, [-1, input.shape[-1]])
input = np.sort(input, axis=0)[::-1, :][:k, :]
return input
arr = np.random.rand(3, 3, 3)
arr1 = top_k(arr)
arr2 = np.max(arr, axis=(0,1))
print(arr1)
print(arr2)
assert np.array_equal(top_k(arr)[0], np.max(arr, axis=(0,1)))

alternative to numpy.gradient

I have been struggling to find a robust function to compute gradient for a 3D array. numpy.gradient supports up to 2nd order accuracy. Is there any alternative way to compute the gradient with a better accuracy? thanks.
Finally I found this: 4th order gradient.
Wish numpy would integrate this, too...
https://gist.github.com/deeplycloudy/1b9fa46d5290314d9be02a5156b48741
def gradientO4(f, *varargs):
"""Calculate the fourth-order-accurate gradient of an N-dimensional scalar function.
Uses central differences on the interior and first differences on boundaries
to give the same shape.
Inputs:
f -- An N-dimensional array giving samples of a scalar function
varargs -- 0, 1, or N scalars giving the sample distances in each direction
Outputs:
N arrays of the same shape as f giving the derivative of f with respect
to each dimension.
"""
N = len(f.shape) # number of dimensions
n = len(varargs)
if n == 0:
dx = [1.0]*N
elif n == 1:
dx = [varargs[0]]*N
elif n == N:
dx = list(varargs)
else:
raise SyntaxError, "invalid number of arguments"
# use central differences on interior and first differences on endpoints
#print dx
outvals = []
# create slice objects --- initially all are [:, :, ..., :]
slice0 = [slice(None)]*N
slice1 = [slice(None)]*N
slice2 = [slice(None)]*N
slice3 = [slice(None)]*N
slice4 = [slice(None)]*N
otype = f.dtype.char
if otype not in ['f', 'd', 'F', 'D']:
otype = 'd'
for axis in range(N):
# select out appropriate parts for this dimension
out = np.zeros(f.shape, f.dtype.char)
slice0[axis] = slice(2, -2)
slice1[axis] = slice(None, -4)
slice2[axis] = slice(1, -3)
slice3[axis] = slice(3, -1)
slice4[axis] = slice(4, None)
# 1D equivalent -- out[2:-2] = (f[:4] - 8*f[1:-3] + 8*f[3:-1] - f[4:])/12.0
out[slice0] = (f[slice1] - 8.0*f[slice2] + 8.0*f[slice3] - f[slice4])/12.0
slice0[axis] = slice(None, 2)
slice1[axis] = slice(1, 3)
slice2[axis] = slice(None, 2)
# 1D equivalent -- out[0:2] = (f[1:3] - f[0:2])
out[slice0] = (f[slice1] - f[slice2])
slice0[axis] = slice(-2, None)
slice1[axis] = slice(-2, None)
slice2[axis] = slice(-3, -1)
## 1D equivalent -- out[-2:] = (f[-2:] - f[-3:-1])
out[slice0] = (f[slice1] - f[slice2])
# divide by step size
outvals.append(out / dx[axis])
# reset the slice object in this dimension to ":"
slice0[axis] = slice(None)
slice1[axis] = slice(None)
slice2[axis] = slice(None)
slice3[axis] = slice(None)
slice4[axis] = slice(None)
if N == 1:
return outvals[0]
else:
return outvals
I would suggest using the symbolic library called Theano (http://deeplearning.net/software/theano/). It is primarily designed for neural networks and deep learning stuff, yet quite nicely fits what you want.
After installing theano, here is a simple code for computing the gradient of a 1-d vector. You can extend it to 3-d yourself.
import numpy as np
import theano
import theano.tensor as T
x = T.dvector('x')
J, updates = theano.scan(lambda i, x : (x[i+1] - x[i])/2, sequences=T.arange(x.shape[0] - 1), non_sequences=[x])
f = theano.function([x], J, updates=updates)
f(np.array([1, 2, 4, 7, 11, 16], dtype='float32'))
f(np.array([1, 2, 4, 7.12345, 11, 16], dtype='float32'))
I found this: FINDIFF -- A Python package for finite difference numerical derivatives and partial differential equations in any number of dimensions.
https://github.com/maroba/findiff
Cheers!

Compute pairwise distance in a batch without replicating tensor in Tensorflow?

I want to compute the pairwise square distance of a batch of feature in Tensorflow. I have a simple implementation using + and * operations by
tiling the original tensor :
def pairwise_l2_norm2(x, y, scope=None):
with tf.op_scope([x, y], scope, 'pairwise_l2_norm2'):
size_x = tf.shape(x)[0]
size_y = tf.shape(y)[0]
xx = tf.expand_dims(x, -1)
xx = tf.tile(xx, tf.pack([1, 1, size_y]))
yy = tf.expand_dims(y, -1)
yy = tf.tile(yy, tf.pack([1, 1, size_x]))
yy = tf.transpose(yy, perm=[2, 1, 0])
diff = tf.sub(xx, yy)
square_diff = tf.square(diff)
square_dist = tf.reduce_sum(square_diff, 1)
return square_dist
This function takes as input two matrices of size (m,d) and (n,d) and compute the squared distance between each row vector. The output is a matrix of size (m,n) with element 'd_ij = dist(x_i, y_j)'.
The problem is that I have a large batch and high dim features 'm, n, d' replicating the tensor consume a lot of memory.
I'm looking for another way to implement this without increasing the memory usage and just only store the final distance tensor. Kind of double looping the original tensor.
You can use some linear algebra to turn it into matrix ops. Note that what you need matrix D where a[i] is the ith row of your original matrix and
D[i,j] = (a[i]-a[j])(a[i]-a[j])'
You can rewrite that into
D[i,j] = r[i] - 2 a[i]a[j]' + r[j]
Where r[i] is squared norm of ith row of the original matrix.
In a system that supports standard broadcasting rules you can treat r as a column vector and write D as
D = r - 2 A A' + r'
In TensorFlow you could write this as
A = tf.constant([[1, 1], [2, 2], [3, 3]])
r = tf.reduce_sum(A*A, 1)
# turn r into column vector
r = tf.reshape(r, [-1, 1])
D = r - 2*tf.matmul(A, tf.transpose(A)) + tf.transpose(r)
sess = tf.Session()
sess.run(D)
result
array([[0, 2, 8],
[2, 0, 2],
[8, 2, 0]], dtype=int32)
Using squared_difference:
def squared_dist(A):
expanded_a = tf.expand_dims(A, 1)
expanded_b = tf.expand_dims(A, 0)
distances = tf.reduce_sum(tf.squared_difference(expanded_a, expanded_b), 2)
return distances
One thing I noticed is that this solution using tf.squared_difference gives me out of memory (OOM) for very large vectors, while the approach by #YaroslavBulatov doesn't. So, I think decomposing the operation yields a smaller memory footprint (which I thought squared_difference would handle better under the hood).
Here is a more general solution for two tensors of coordinates A and B:
def squared_dist(A, B):
assert A.shape.as_list() == B.shape.as_list()
row_norms_A = tf.reduce_sum(tf.square(A), axis=1)
row_norms_A = tf.reshape(row_norms_A, [-1, 1]) # Column vector.
row_norms_B = tf.reduce_sum(tf.square(B), axis=1)
row_norms_B = tf.reshape(row_norms_B, [1, -1]) # Row vector.
return row_norms_A - 2 * tf.matmul(A, tf.transpose(B)) + row_norms_B
Note that this is the square distance. If you want to change this to the Euclidean distance, perform a tf.sqrt on the result. If you want to do that, don't forget to add a small constant to compensate for the floating point instabilities: dist = tf.sqrt(squared_dist(A, B) + 1e-6).
If you want compute other method , then change the order of the tf modules.
def compute_euclidean_distance(x, y):
size_x = x.shape.dims[0]
size_y = y.shape.dims[0]
for i in range(size_x):
tile_one = tf.reshape(tf.tile(x[i], [size_y]), [size_y, -1])
eu_one = tf.expand_dims(tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(tile_one, y), 2), axis=1)), axis=0)
if i == 0:
d = eu_one
else:
d = tf.concat([d, eu_one], axis=0)
return d

Multi-dimensional gather in Tensorflow

The general solution to this question is being worked on in this github issue, but I was wondering if there are workarounds using tf.gather (or something else) to achieve array indexing using a multi-index. One solution I came up with was to broadcast multiply each index in the multi-idx with the cumulative product of the tensor shape, which produces indices suitable for indexing the flattened tensor:
import tensorflow as tf
import numpy as np
def __cumprod(l):
# Get the length and make a copy
ll = len(l)
l = [v for v in l]
# Reverse cumulative product
for i in range(ll-1):
l[ll-i-2] *= l[ll-i-1]
return l
def ravel_multi_index(tensor, multi_idx):
"""
Returns a tensor suitable for use as the index
on a gather operation on argument tensor.
"""
if not isinstance(tensor, (tf.Variable, tf.Tensor)):
raise TypeError('tensor should be a tf.Variable')
if not isinstance(multi_idx, list):
multi_idx = [multi_idx]
# Shape of the tensor in ints
shape = [i.value for i in tensor.get_shape()]
if len(shape) != len(multi_idx):
raise ValueError("Tensor rank is different "
"from the multi_idx length.")
# Work out the shape of each tensor in the multi_idx
idx_shape = [tuple(j.value for j in i.get_shape()) for i in multi_idx]
# Ensure that each multi_idx tensor is length 1
assert all(len(i) == 1 for i in idx_shape)
# Create a list of reshaped indices. New shape will be
# [1, 1, dim[0], 1] for the 3rd index in multi_idx
# for example.
reshaped_idx = [tf.reshape(idx, [1 if i !=j else dim[0]
for j in range(len(shape))])
for i, (idx, dim)
in enumerate(zip(multi_idx, idx_shape))]
# Figure out the base indices for each dimension
base = __cumprod(shape)
# Now multiply base indices by each reshaped index
# to produce the flat index
return (sum(b*s for b, s in zip(base[1:], reshaped_idx[:-1]))
+ reshaped_idx[-1])
# Shape and slice starts and sizes
shape = (Z, Y, X) = 4, 5, 6
Z0, Y0, X0 = 1, 1, 1
ZS, YS, XS = 3, 3, 4
# Numpy matrix and index
M = np.random.random(size=shape)
idx = [
np.arange(Z0, Z0+ZS).reshape(ZS,1,1),
np.arange(Y0, Y0+YS).reshape(1,YS,1),
np.arange(X0, X0+XS).reshape(1,1,XS),
]
# Tensorflow matrix and indices
TM = tf.Variable(M)
TF_flat_idx = ravel_multi_index(TM, [
tf.range(Z0, Z0+ZS),
tf.range(Y0, Y0+YS),
tf.range(X0, X0+XS)])
TF_data = tf.gather(tf.reshape(TM,[-1]), TF_flat_idx)
with tf.Session() as S:
S.run(tf.initialize_all_variables())
# Obtain data via flat indexing
data = S.run(TF_data)
# Check that it agrees with data obtained
# by numpy smart indexing
assert np.all(data == M[idx])
However, this only works on tensors of rank 3 due to this (current) limitation limiting broadcasts to tensors of rank 3.
At the moment I can only think of doing a chained gather, transpose, gather, transpose, gather, but this is unlikely to be efficient. e.g.
shape = (8, 9, 10)
A = tf.random_normal(shape)
data = tf.gather(tf.transpose(tf.gather(A, [1, 3]), [1,0,2]), ...)
Any ideas?
It sounds like you want gather_nd.

Categories

Resources