I am trying to smush two tensors together. scatter_nd is perfect for this occasion, and I have written the following function to accomplish my task. It basically just does 2 scatter_nds ad puts them together.
def tf_munge(t, i, r, j, axis=0):
#insert tensor t at indices i and tensor r at indices j on axis `axis`.
#requires: i.shape[0] == t.shape[axis] && j.shape[0] == r.shape[axis] && t.shape[k] == r.shape[k] ∀k != axis
i = tf.expand_dims(i, -1)
j = tf.expand_dims(j, -1)
rank_indices = tf.range(tf.rank(t))
roller = tf.roll(rank_indices, -axis, 0)
rolled_t = tf.transpose(t, roller)
rolled_r = tf.transpose(r, roller)
scatter_shape = tf.concat((tf.shape(i)[0:1] + tf.shape(j)[0:1], tf.shape(rolled_t)[1:]), axis=0)
scattered = tf.scatter_nd(i, rolled_t, scatter_shape) + tf.scatter_nd(j, rolled_r, scatter_shape)
return tf.transpose(scattered, tf.roll(rank_indices, axis, 0))
It works as expected, generally. However, it fails whenever both r and t are empty along some axis. I have two code "paths" depending on a boolean wherein I split my tensor and perform different operations depending on whether that boolean is true or false. Sometimes, that boolean is false for 0 rows. In this case, I end up doing things to an empty tensor. One of those things is this attempted scattering. The error actually references the output shape (scatter_shape in the above code) claiming that:
ValueError: Indices and updates specified for empty output shape for 'ScatterNd_4' (op: 'ScatterNd')
with input shapes: [3,1], [3,0,2], [3] and with input tensors computed as partial shapes: input[2] = [5,0,2].
Note that the axis that is empty is different than the axis along which I'm scattering. Here is a working example:
foo = tf.ones((3,1,2))
bar = tf.ones((2,1,2))*2
i = tf.constant([1,3,4])
j = tf.constant([0,2])
tf_munge(foo,i,bar,j,axis=0)
#Output: <tf.Tensor 'transpose_13:0' shape=(5, 1, 2) dtype=float32>
Here is a failing example:
foo = tf.ones((3,0,2))
bar = tf.ones((2,0,2))*2
tf_munge(foo,i,bar,j,axis=0)
#Output: The error above
The expected output here would obviously be an empty tensor of shape (5,0,2).
I thought about using a conditional on the shape of the input, but tf.cond executes both pathways. How can I handle this situation when I have an empty tensor with scatter_nd?
You can do that more simply with tf.gather in a way that works for all cases:
import tensorflow as tf
def tf_munge(t, i, r, j, axis=0):
tr = tf.concat([t, r], axis=axis)
idx = tf.argsort(tf.concat([i, j], axis=0))
return tf.gather(tr, idx, axis=axis)
with tf.Graph().as_default(), tf.Session() as sess:
foo = tf.ones((3, 1, 2))
bar = tf.ones((2, 1, 2)) * 2
i = tf.constant([1, 3, 4])
j = tf.constant([0, 2])
out = tf_munge(foo, i, bar, j, axis=0)
print(sess.run(out))
# [[[2. 2.]]
#
# [[1. 1.]]
#
# [[2. 2.]]
#
# [[1. 1.]]
#
# [[1. 1.]]]
foo2 = tf.ones((3, 0, 2))
bar2 = tf.ones((2, 0, 2)) * 2
out2 = tf_munge(foo2, i, bar2, j, axis=0)
print(sess.run(out2))
# []
Related
I have a sparse tensor of indiscriminate size, with some non-zero entries. I can use tf.where(tensor != 0) to obtain the index of the location of the non-zero entry. For my application, it would be useful to get the index of the location of the non-zero entry and the indices on either side of the location of the non-zero entry. Is there a clever way I can do this with indexing or with some sort of built-in function?
For example, if I have
x = tf.constant([0,0,1,0,0]), then tf.where(x != 0) would return tf.Tensor([[2]], shape = [1,1], dtype = int64). However, the function I have in mind would have some sort of method of instead returning tf.Tensor([[1][2][3]], shape = [3,1], dtype = int64)
The closest thing to a solution I could come up with was this for loop
x = tf.constant([0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,1.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.])
index = tf.where(x != 0)
next_to = tf.zeros(shape = [0,1], dtype = tf.int64)
for i in index[0]:
index_minus_one = index - 1
index_naught = index
index_plus_one = index + 1
next_to = tf.concat([next_to, index_minus_one, index_naught, index_plus_one], axis = 0)
print(next_to)
which gives the output tf.Tensor([[13][14][15]], shape=(3, 1), dtype=int64). However, this ends up interleaving the returned indices if there is more than one non-zero element, which is something I would like to be able to handle. Surely there's a built-in function that I haven't heard of that I could use instead?
Try this:
def f(x):
a = tf.tile(x, [3])
b = tf.constant([1, 0, -1], shape=[3, 1], dtype=tf.int64)
sliced = tf.slice((a - b), begin=[0,0], size=[3,1])
return tf.reshape(sliced, shape=[3, 1])
output = tf.map_fn(f, index)
I have two placholders with the following dimensions
x_ph = tf.placeholder(tf.float32,[None, 4])
and
y_ph = tf.placeholder(tf.float32,[None, 4, 3])
I want to multiply each element of x with one row of y_ph to get an output of shape (None, 4, 3).
Example of output I am looking,
x = np.random.uniform(-1,1, (2,2))
z = np.random.uniform(-1,1, (2,2,3))
x, z
([[ 0.27083503, -0.13795923],[ 0.8436118 , 0.00771057]])
([[[ 0.51905276, 0.01173655, -0.57335926],
[ 0.42347431, -0.05438272, 0.21042366]]
[[ 0.91347706, -0.28086164, 0.54952429],
[ 0.41551953, -0.6207727 , 0.32066292]]]))
I want to do following operation:
result = np.zeros((2,3))
for i in range(2):
for j in range(2):
result[i] += x[i,j]*z[i,j,:]
print(result)
[[ 0.08215548 0.01068127 -0.18431566]
[ 0.77382391 -0.2417247 0.46605767]]
Any way to do it in tensorflow?
Add one dimension at the end of x_ph so you can use broadcasting to multiply both tensors:
import tensorflow as tf
x_ph = tf.placeholder(tf.float32,[None, 4])
y_ph = tf.placeholder(tf.float32,[None, 4, 3])
result = tf.expand_dims(x_ph, -1) * y_ph
In Tensorflow, I'm trying to create the following matrix:
A = [[a, 0], [0,b]]
Where a and b are the parameters I'm trying to solve for.
Here's what I have so far:
a = tf.Variable((1,), name="a", dtype = tf.float64)
b = tf.Variable((1,), name="b", dtype = tf.float64)
const = tf.constant(0,dtype = tf.float64, shape = (1,))
A0 = tf.transpose(tf.stack([a,const]))
A1 = tf.transpose(tf.stack([const,b]))
A = tf.stack([A0,A1])
However the shape of A ends up being (2,1,2) which is wrong (since A0 and B0 both have shapes (1,2))
Is there an easier way to create the matrix object A in Tensorflow, or does anyone know why the shape is getting messed up with what I'm doing?
Well you can create a single variable vector params = tf.Variable((2,), name="ab") and then multiply with the identity matrix tf.eye(2):
A = tf.matmul(tf.expand_dims(params,0), tf.eye(2))
tf.stack increases the rank of the tensor (creating a new axis) and combines them in the new axis. If you want to combine tensors along an existing axis, you should use tf.concat.
a = tf.Variable((1,), name="a", dtype = tf.float64)
b = tf.Variable((1,), name="b", dtype = tf.float64)
const = tf.constant(0,dtype = tf.float64, shape = (1,))
A0 = tf.stack([a, const], axis=1)
A1 = tf.stack([const, b], axis=1) # more clear than tf.transpose
A = tf.concat((A0, A1), axis=0)
A is now shape (2, 2).
To explain, each object is a rank-1 tensor with one element:
A = [1]
const = [0]
stacking gives:
tf.stack((A, const), axis=0) = [[1], [0]] # 2x1 matrix
concatenating gives:
tf.concat((A, const), axis=0) = [1, 0] # 2 vector
I have a placeholder tensor with shape: [batch_size, sentence_length, word_dim] and a list of indices with shape=[batch_size, num_indices]. Indices are on the second axes and are indices of words in the sentence. Batch_size & sentence_length are only known at runtime.
How do I extract a tensor with shape [batch_size, len(indices), word_dim]?
I was reading about tensorflow.gather but it seems like gather only slices along the first axes. Am I correct?
Edit: I managed to get it work with constant
def tile_repeat(n, repTime):
'''
create something like 111..122..2333..33 ..... n..nn
one particular number appears repTime consecutively.
This is for flattening the indices.
'''
print n, repTime
idx = tf.range(n)
idx = tf.reshape(idx, [-1, 1]) # Convert to a n x 1 matrix.
idx = tf.tile(idx, [1, int(repTime)]) # Create multiple columns, each column has one number repeats repTime
y = tf.reshape(idx, [-1])
return y
def gather_along_second_axis(x, idx):
'''
x has shape: [batch_size, sentence_length, word_dim]
idx has shape: [batch_size, num_indices]
Basically, in each batch, get words from sentence having index specified in idx
However, since tensorflow does not fully support indexing,
gather only work for the first axis. We have to reshape the input data, gather then reshape again
'''
reshapedIdx = tf.reshape(idx, [-1]) # [batch_size*num_indices]
idx_flattened = tile_repeat(tf.shape(x)[0], tf.shape(x)[1]) * tf.shape(x)[1] + reshapedIdx
y = tf.gather(tf.reshape(x, [-1,int(tf.shape(x)[2])]), # flatten input
idx_flattened)
y = tf.reshape(y, tf.shape(x))
return y
x = tf.constant([
[[1,2,3],[3,5,6]],
[[7,8,9],[10,11,12]],
[[13,14,15],[16,17,18]]
])
idx=tf.constant([[0,1],[1,0],[1,1]])
y = gather_along_second_axis(x, idx)
with tf.Session(''):
print y.eval()
print tf.Tensor.get_shape(y)
And the output is:
[[[ 1 2 3]
[ 3 5 6]]
[[10 11 12]
[ 7 8 9]]
[[16 17 18]
[16 17 18]]]
shape: (3, 2, 3)
However, when inputs are placeholder it does not work return error:
idx = tf.tile(idx, [1, int(repTime)])
TypeError: int() argument must be a string or a number, not 'Tensor'
Python 2.7, tensorflow 0.12
Thank you in advance.
Thank to #AllenLavoie's comments, I could eventually come up with the solution:
def tile_repeat(n, repTime):
'''
create something like 111..122..2333..33 ..... n..nn
one particular number appears repTime consecutively.
This is for flattening the indices.
'''
print n, repTime
idx = tf.range(n)
idx = tf.reshape(idx, [-1, 1]) # Convert to a n x 1 matrix.
idx = tf.tile(idx, [1, repTime]) # Create multiple columns, each column has one number repeats repTime
y = tf.reshape(idx, [-1])
return y
def gather_along_second_axis(x, idx):
'''
x has shape: [batch_size, sentence_length, word_dim]
idx has shape: [batch_size, num_indices]
Basically, in each batch, get words from sentence having index specified in idx
However, since tensorflow does not fully support indexing,
gather only work for the first axis. We have to reshape the input data, gather then reshape again
'''
reshapedIdx = tf.reshape(idx, [-1]) # [batch_size*num_indices]
idx_flattened = tile_repeat(tf.shape(x)[0], tf.shape(x)[1]) * tf.shape(x)[1] + reshapedIdx
y = tf.gather(tf.reshape(x, [-1,tf.shape(x)[2]]), # flatten input
idx_flattened)
y = tf.reshape(y, tf.shape(x))
return y
x = tf.constant([
[[1,2,3],[3,5,6]],
[[7,8,9],[10,11,12]],
[[13,14,15],[16,17,18]]
])
idx=tf.constant([[0,1],[1,0],[1,1]])
y = gather_along_second_axis(x, idx)
with tf.Session(''):
print y.eval()
print tf.Tensor.get_shape(y)
#Hoa Vu's answer was very helpful. The code works with the example x and idx which is sentence_length == len(indices), but it gives an error when sentence_length != len(indices).
I slightly changed the code and now it works when sentence_length >= len(indices).
I tested with new x and idx on Python 3.x.
def tile_repeat(n, repTime):
'''
create something like 111..122..2333..33 ..... n..nn
one particular number appears repTime consecutively.
This is for flattening the indices.
'''
idx = tf.range(n)
idx = tf.reshape(idx, [-1, 1]) # Convert to a n x 1 matrix.
idx = tf.tile(idx, [1, repTime]) # Create multiple columns, each column has one number repeats repTime
y = tf.reshape(idx, [-1])
return y
def gather_along_second_axis(x, idx):
'''
x has shape: [batch_size, sentence_length, word_dim]
idx has shape: [batch_size, num_indices]
Basically, in each batch, get words from sentence having index specified in idx
However, since tensorflow does not fully support indexing,
gather only work for the first axis. We have to reshape the input data, gather then reshape again
'''
reshapedIdx = tf.reshape(idx, [-1]) # [batch_size*num_indices]
idx_flattened = tile_repeat(tf.shape(x)[0], tf.shape(idx)[1]) * tf.shape(x)[1] + reshapedIdx
y = tf.gather(tf.reshape(x, [-1,tf.shape(x)[2]]), # flatten input
idx_flattened)
y = tf.reshape(y, [tf.shape(x)[0],tf.shape(idx)[1],tf.shape(x)[2]])
return y
x = tf.constant([
[[1,2,3],[1,2,3],[3,5,6],[3,5,6]],
[[7,8,9],[7,8,9],[10,11,12],[10,11,12]],
[[13,14,15],[13,14,15],[16,17,18],[16,17,18]]
])
idx=tf.constant([[0,1],[1,2],[0,3]])
y = gather_along_second_axis(x, idx)
with tf.Session(''):
print(y.eval())
The general solution to this question is being worked on in this github issue, but I was wondering if there are workarounds using tf.gather (or something else) to achieve array indexing using a multi-index. One solution I came up with was to broadcast multiply each index in the multi-idx with the cumulative product of the tensor shape, which produces indices suitable for indexing the flattened tensor:
import tensorflow as tf
import numpy as np
def __cumprod(l):
# Get the length and make a copy
ll = len(l)
l = [v for v in l]
# Reverse cumulative product
for i in range(ll-1):
l[ll-i-2] *= l[ll-i-1]
return l
def ravel_multi_index(tensor, multi_idx):
"""
Returns a tensor suitable for use as the index
on a gather operation on argument tensor.
"""
if not isinstance(tensor, (tf.Variable, tf.Tensor)):
raise TypeError('tensor should be a tf.Variable')
if not isinstance(multi_idx, list):
multi_idx = [multi_idx]
# Shape of the tensor in ints
shape = [i.value for i in tensor.get_shape()]
if len(shape) != len(multi_idx):
raise ValueError("Tensor rank is different "
"from the multi_idx length.")
# Work out the shape of each tensor in the multi_idx
idx_shape = [tuple(j.value for j in i.get_shape()) for i in multi_idx]
# Ensure that each multi_idx tensor is length 1
assert all(len(i) == 1 for i in idx_shape)
# Create a list of reshaped indices. New shape will be
# [1, 1, dim[0], 1] for the 3rd index in multi_idx
# for example.
reshaped_idx = [tf.reshape(idx, [1 if i !=j else dim[0]
for j in range(len(shape))])
for i, (idx, dim)
in enumerate(zip(multi_idx, idx_shape))]
# Figure out the base indices for each dimension
base = __cumprod(shape)
# Now multiply base indices by each reshaped index
# to produce the flat index
return (sum(b*s for b, s in zip(base[1:], reshaped_idx[:-1]))
+ reshaped_idx[-1])
# Shape and slice starts and sizes
shape = (Z, Y, X) = 4, 5, 6
Z0, Y0, X0 = 1, 1, 1
ZS, YS, XS = 3, 3, 4
# Numpy matrix and index
M = np.random.random(size=shape)
idx = [
np.arange(Z0, Z0+ZS).reshape(ZS,1,1),
np.arange(Y0, Y0+YS).reshape(1,YS,1),
np.arange(X0, X0+XS).reshape(1,1,XS),
]
# Tensorflow matrix and indices
TM = tf.Variable(M)
TF_flat_idx = ravel_multi_index(TM, [
tf.range(Z0, Z0+ZS),
tf.range(Y0, Y0+YS),
tf.range(X0, X0+XS)])
TF_data = tf.gather(tf.reshape(TM,[-1]), TF_flat_idx)
with tf.Session() as S:
S.run(tf.initialize_all_variables())
# Obtain data via flat indexing
data = S.run(TF_data)
# Check that it agrees with data obtained
# by numpy smart indexing
assert np.all(data == M[idx])
However, this only works on tensors of rank 3 due to this (current) limitation limiting broadcasts to tensors of rank 3.
At the moment I can only think of doing a chained gather, transpose, gather, transpose, gather, but this is unlikely to be efficient. e.g.
shape = (8, 9, 10)
A = tf.random_normal(shape)
data = tf.gather(tf.transpose(tf.gather(A, [1, 3]), [1,0,2]), ...)
Any ideas?
It sounds like you want gather_nd.