I was testing this simple slicing operation in TF and PyTorch which should match in both
import tensorflow as tf
import numpy as np
import torch
tf_x = tf.random.uniform((4, 64, 64, 3))
pt_x = torch.Tensor(tf_x.numpy())
pt_x = pt_x.permute(0, 3, 1, 2)
# slicing operation
print(np.any(pt_x[:, :, 1:].permute(0, 2, 3, 1).numpy() - tf_x[:, 1:].numpy()))
# > False
pt_x = torch.Tensor(tf_x.numpy())
b, h, w, c = pt_x.shape
pt_x = pt_x.reshape((b, c, h, w))
print(np.any(pt_x.view(b, h, w, c).numpy() - tf_x.numpy())) # False
print(np.any(pt_x[:, :, 1:].reshape(4, 63, 64, 3).numpy() - tf_x[:, 1:].numpy()))
# > True
In the last line lies the problem. Both PyTorch and TF should lead to the same value but they don't. Is this discrepancy caused when I try to reshape the tensor?
On one hand, you have pt_x equal to tf_x, use np.isclose to verify:
>>> np.isclose(pt_x.view(b, h, w, c).numpy(), tf_x.numpy()).all()
True
On the other hand, you are slicing both tensors differently: pt_x[:, :, 1:] removes the first element along axis=2, while tf_x[:, 1:] removed the first element along axis=1. Therefore you end up with two distinct elements with overlapping values, like tf_x[:, 1:][0,-1,-1,-1] and pt_x[0,-1,-1,-1].
Also keep in mind tensor layouts are different in Tensorflow and PyTorch, while the former uses channel last layout, the latter does channel first. The operation needed between those two is a permutation (not a reshape).
Related
First things first: I'm relatively new to TensorFlow.
I'm trying to implement a custom layer in tensorflow.keras and I'm having relatively hard time when I try to achieve the following:
I've got 3 Tensors (x,y,z) of shape (?,49,3,3,32) [where ? is the batch size]
On each Tensor I compute the sum over the 3rd and 4th axes [thus I end up with 3 Tensors of shape (?,49,32)]
By doing an argmax (A)on the above 3 Tensors (?,49,32) I get a single (?,49,32) Tensor
Now I want to use this tensor to select slices from the initial x,y,z Tensors in the following form:
Each element in the last dimension of A corresponds to the selected Tensor.
(aka: 0 = X, 1 = Y, 2 = Z)
The index of the last dimension of A corresponds to the slice that I would like to extract from the Tensor last dimension.
I've tried to achieve the above using tf.gather but I had no luck. Then I tried using a series of tf.map_fn, which is ugly and computationally costly.
To simplify the above:
let's say we've got an A array of shape (3,3,3,32). Then the numpy equivalent of what I try to achieve is this:
import numpy as np
x = np.random.rand(3,3,32)
y = np.random.rand(3,3,32)
z = np.random.rand(3,3,32)
x_sums = np.sum(np.sum(x,axis=0),0);
y_sums = np.sum(np.sum(y,axis=0),0);
z_sums = np.sum(np.sum(z,axis=0),0);
max_sums = np.argmax([x_sums,y_sums,z_sums],0)
A = np.array([x,y,z])
tmp = []
for i in range(0,len(max_sums)):
tmp.append(A[max_sums[i],:,:,i)
output = np.transpose(np.stack(tmp))
Any suggestions?
ps: I tried tf.gather_nd but I had no luck
This is how you can do something like that with tf.gather_nd:
import tensorflow as tf
# Make example data
tf.random.set_seed(0)
b = 10 # Batch size
x = tf.random.uniform((b, 49, 3, 3, 32))
y = tf.random.uniform((b, 49, 3, 3, 32))
z = tf.random.uniform((b, 49, 3, 3, 32))
# Stack tensors together
data = tf.stack([x, y, z], axis=2)
# Put reduction axes last
data_t = tf.transpose(data, (0, 1, 5, 2, 3, 4))
# Reduce
s = tf.reduce_sum(data_t, axis=(4, 5))
# Find largest sums
idx = tf.argmax(s, 3)
# Make gather indices
data_shape = tf.shape(data_t, idx.dtype)
bb, ii, jj = tf.meshgrid(*(tf.range(data_shape[i]) for i in range(3)), indexing='ij')
# Gather result
output_t = tf.gather_nd(data_t, tf.stack([bb, ii, jj, idx], axis=-1))
# Reorder axes
output = tf.transpose(output_t, (0, 1, 3, 4, 2))
print(output.shape)
# TensorShape([10, 49, 3, 3, 32])
I am trying to perform a local reduce with specified slices over a single axis on a 2D array.
I achieved this using numpy's numpy.ufunc.reduceat or numpy.add.reduceat but I would like do the same in tensorflow as the input to this reduce operation is an output from tensorflow convolution.
I came across tf.math.reduce_sum but I am not sure how this can be used in my case.
It will be great if I can do the reduceat operation in tensorflow as I can take advantage of a GPU.
You can do almost the same using tf.math.segment_sum:
import tensorflow as tf
import numpy as np
def add_reduceat_tf(a, indices, axis=0):
a = tf.convert_to_tensor(a)
indices = tf.convert_to_tensor(indices)
# Transpose if necessary
transpose = not (isinstance(axis, int) and axis == 0)
if transpose:
axis = tf.convert_to_tensor(axis)
ndims = tf.cast(tf.rank(a), axis.dtype)
a = tf.transpose(a, tf.concat([[axis], tf.range(axis),
tf.range(axis + 1, ndims)], axis=0))
# Make segment ids
r = tf.range(tf.shape(a, out_type=indices.dtype)[0])
segments = tf.searchsorted(indices, r, side='right')
# Compute segmented sum and discard first unused segment
out = tf.math.segment_sum(a, segments)[1:]
# Transpose back if necessary
if transpose:
out = tf.transpose(out, tf.concat([tf.range(1, axis + 1), [0],
tf.range(axis + 1, ndims)], axis=0))
return out
# Test
np.random.seed(0)
a = np.random.rand(5, 10).astype(np.float32)
indices = [2, 4, 7]
axis = 1
# NumPy computation
out_np = np.add.reduceat(a, indices, axis=axis)
# TF computation
with tf.Graph().as_default(), tf.Session() as sess:
out = add_reduceat_tf(a, indices, axis=axis)
out_tf = sess.run(out)
# Check result
print(np.allclose(out_np, out_tf))
# True
You can replace tf.math.segment_sum above with the reduction function you want to use. The only difference between this and the actual np.ufunc.reduceat is the special case where indices[i] >= indices[i + 1]. The posted function requires indices to be sorted, and if there were a case where indices[i] == indices[i + 1] the corresponding i position in the output would be zero, not a[indices[i]].
I want to filter a tensor by keeping 10% of the largest entries. Is there a Tensorflow function to do that? How would a possible implementation look like? I am looking for something that can handle tensors of shape [N,W,H,C] and [N,W*H*C].
By filter I mean that the shape of the tensor remains the same but only the largest 10% are kept. Thus all entries become zero except the 10% largest.
Is that possible?
The correct way of doing this would be computing the 90 percentile, for example with tf.contrib.distributions.percentile:
import tensorflow as tf
images = ... # [N, W, H, C]
n = tf.shape(images)[0]
images_flat = tf.reshape(images, [n, -1])
p = tf.contrib.distributions.percentile(images_flat, 90, axis=1, interpolation='higher')
images_top10 = tf.where(images >= tf.reshape(p, [n, 1, 1, 1]),
images, tf.zeros_like(images))
If you want to be ready for TensorFlow 2.x, where tf.contrib will be removed, you can instead use TensorFlow Probability, which is where the percentile function will be permanently in the future.
EDIT: If you want to do the filtering per channel, you can modify the code slightly like this:
import tensorflow as tf
images = ... # [N, W, H, C]
shape = tf.shape(images)
n, c = shape[0], shape[3]
images_flat = tf.reshape(images, [n, -1, c])
p = tf.contrib.distributions.percentile(images_flat, 90, axis=1, interpolation='higher')
images_top10 = tf.where(images >= tf.reshape(p, [n, 1, 1, c]),
images, tf.zeros_like(images))
I've not found any built-in method yet. Try this workaround:
import numpy as np
import tensorflow as tf
def filter(tensor, ratio):
num_entries = tf.reduce_prod(tensor.shape)
num_to_keep = tf.cast(tf.multiply(ratio, tf.cast(num_entries, tf.float32)), tf.int32)
# Calculate threshold
x = tf.contrib.framework.sort(tf.reshape(tensor, [num_entries]))
threshold = x[-num_to_keep]
# Filter the tensor
mask = tf.cast(tf.greater_equal(tensor, threshold), tf.float32)
return tf.multiply(tensor, mask)
tensor = tf.constant(np.arange(40).reshape(2, 4, 5), dtype=tf.float32)
filtered_tensor = filter(tensor, 0.1)
# Print result
tf.InteractiveSession()
print(tensor.eval())
print(filtered_tensor.eval())
The code works well but is very slow. How can I vectorize the color substitution to avoid usage of Python for loop?
processed_image = np.empty(initial_image.shape)
for i, j in np.ndindex(initial_image.shape[:2]):
l_, a, b = initial_image[i, j, :]
idx = mapping[a + 128, b + 128]
a, b = new_colors[tuple(idx)]
processed_image[i, j] = l_, a, b
I have an image initial_image in CIELAB space as numpy array of shape (some height, some width, 3). I need to produce a corrected image by changing a and b color components of image using mapping. mapping is a numpy array of shape (255, 255, 2). It gives me indices which can be used to get corrected a and b colors from new_colors. new_colors is of shape (table height, table width, 2).
Solutions that use scikit-image will also be helpful.
You can use advanced indexing:
# chain the two maps
chained = new_colors[(*np.moveaxis(mapping, 2, 0),)]
# split color channels
c1, *c23 = np.moveaxis(initial_image, 2, 0)
# add 128
c23 = *map(np.add, c23, (128, 128)),
# apply chained map
processed_image_2 = np.concatenate([c1[..., None], chained[c23]], axis=2)
The general solution to this question is being worked on in this github issue, but I was wondering if there are workarounds using tf.gather (or something else) to achieve array indexing using a multi-index. One solution I came up with was to broadcast multiply each index in the multi-idx with the cumulative product of the tensor shape, which produces indices suitable for indexing the flattened tensor:
import tensorflow as tf
import numpy as np
def __cumprod(l):
# Get the length and make a copy
ll = len(l)
l = [v for v in l]
# Reverse cumulative product
for i in range(ll-1):
l[ll-i-2] *= l[ll-i-1]
return l
def ravel_multi_index(tensor, multi_idx):
"""
Returns a tensor suitable for use as the index
on a gather operation on argument tensor.
"""
if not isinstance(tensor, (tf.Variable, tf.Tensor)):
raise TypeError('tensor should be a tf.Variable')
if not isinstance(multi_idx, list):
multi_idx = [multi_idx]
# Shape of the tensor in ints
shape = [i.value for i in tensor.get_shape()]
if len(shape) != len(multi_idx):
raise ValueError("Tensor rank is different "
"from the multi_idx length.")
# Work out the shape of each tensor in the multi_idx
idx_shape = [tuple(j.value for j in i.get_shape()) for i in multi_idx]
# Ensure that each multi_idx tensor is length 1
assert all(len(i) == 1 for i in idx_shape)
# Create a list of reshaped indices. New shape will be
# [1, 1, dim[0], 1] for the 3rd index in multi_idx
# for example.
reshaped_idx = [tf.reshape(idx, [1 if i !=j else dim[0]
for j in range(len(shape))])
for i, (idx, dim)
in enumerate(zip(multi_idx, idx_shape))]
# Figure out the base indices for each dimension
base = __cumprod(shape)
# Now multiply base indices by each reshaped index
# to produce the flat index
return (sum(b*s for b, s in zip(base[1:], reshaped_idx[:-1]))
+ reshaped_idx[-1])
# Shape and slice starts and sizes
shape = (Z, Y, X) = 4, 5, 6
Z0, Y0, X0 = 1, 1, 1
ZS, YS, XS = 3, 3, 4
# Numpy matrix and index
M = np.random.random(size=shape)
idx = [
np.arange(Z0, Z0+ZS).reshape(ZS,1,1),
np.arange(Y0, Y0+YS).reshape(1,YS,1),
np.arange(X0, X0+XS).reshape(1,1,XS),
]
# Tensorflow matrix and indices
TM = tf.Variable(M)
TF_flat_idx = ravel_multi_index(TM, [
tf.range(Z0, Z0+ZS),
tf.range(Y0, Y0+YS),
tf.range(X0, X0+XS)])
TF_data = tf.gather(tf.reshape(TM,[-1]), TF_flat_idx)
with tf.Session() as S:
S.run(tf.initialize_all_variables())
# Obtain data via flat indexing
data = S.run(TF_data)
# Check that it agrees with data obtained
# by numpy smart indexing
assert np.all(data == M[idx])
However, this only works on tensors of rank 3 due to this (current) limitation limiting broadcasts to tensors of rank 3.
At the moment I can only think of doing a chained gather, transpose, gather, transpose, gather, but this is unlikely to be efficient. e.g.
shape = (8, 9, 10)
A = tf.random_normal(shape)
data = tf.gather(tf.transpose(tf.gather(A, [1, 3]), [1,0,2]), ...)
Any ideas?
It sounds like you want gather_nd.