Keras Lambda layer, how to use multiple arguments - python

I have this function:
def sampling(x):
zeros = x*0
samples = tf.random.categorical(tf.math.log(x), 1)
samples = tf.squeeze(tf.one_hot(samples, depth=2), axis=1)
return zeros+samples
That I call from this layer:
x = layers.Lambda(sampling, name="lambda")(x)
But I need to change the depth variable in the sampling function, so I would need something like this:
def sampling(x, depth):
But, how can I make it work with the Lambda layer ?
Thanks a lot

Use a lambda function inside the Lambda layer...
def sampling(x, depth):
zeros = x*0
samples = tf.random.categorical(tf.math.log(x), 1)
samples = tf.squeeze(tf.one_hot(samples, depth=depth), axis=1)
return zeros+samples
usage:
Lambda(lambda t: sampling(t, depth=3), name="lambda")(x)

Related

Store array of tensors in for loop

I have a #tf.function decorated function. Inside the function, I would like to draw from a distribution and compute some value several times (let's call this proceduce f(x)).
How can I do this in Tensorflow 2.0? I can't use numpy arrays as I would like to use the #tf.function decorator.
A numpy implementation would look like:
reps = 4
store = np.zeros((n, reps))
for i in range(reps):
store[:, i] = f(x) #f(x) is shape (n,)
The goal would then be to compute the row means of store.
This should be easy but I haven't been able to work out how to do it!
Something like this maybe:
import tensorflow as tf
def f():
return tf.random.normal((10,))
#tf.function
def store_this():
reps = 4
n = 10
store = tf.zeros((n, reps))
values = [f() for _ in range(reps)]
indices = tf.stack([tf.tile(tf.range(n), multiples=[reps]), tf.repeat(tf.range(reps), repeats=n)], axis=-1)
return tf.tensor_scatter_nd_update(store, indices, tf.reshape(values, [-1]))
store_this()
If f takes a one-dimensional tensor as input, this is a shorter alternative:
#tf.function
def f(x):
return tf.random.normal((10,))
x = tf.constant([1.0, 2.0])
reps = 4
def store_this(fp, x, reps):
return tf.transpose(tf.map_fn(fp, tf.tile(tf.expand_dims(x, 0),[reps,1])))
store_this(f, x, reps)

Python Tensorflow itertools groupby: using itertools.groupby() in tf.data.Dataset.filter()

I am trying to apply a filter to a tf.data.Dataset which removes any strings where one group > 50% of the string. Here is my Dataset:
import tensorflow as tf
strings = [
["ABCDEFGABCDEFG\tUseless\tLabel1"],
["AAAAAAAADEFGAB\tUseless\tLabel2"],
["HIJKLMNHIJKLMN\tUseless\tLabel3"],
["HIJKLMMMMMMMNH\tUseless\tLabel4"],
]
ds = tf.data.Dataset.from_tensor_slices(strings)
def _clean(x):
x = tf.strings.split(x, "\t")
return x[0], x[2]
def _filter(x):
s = tf.strings.bytes_split(x)
_, _, count = tf.unique_with_counts(s)
percent = tf.reduce_max(count) / tf.shape(s)[0]
return tf.less_equal(percent, 0.5)
ds = ds.map(_clean)
ds = ds.filter(lambda x, y: _filter(x))
for x, y in ds:
tf.print(x, y)
This creates the following error:
TypeError: Failed to convert elements of tf.RaggedTensor(values=Tensor("StringsByteSplit/StringSplit:1", shape=(None,), dtype=string), row_splits=Tensor("StringsByteSplit/RaggedFromValueRowIds/RowPartitionFromValueRowIds/concat:0", shape=(None,), dtype=int64)) to Tensor. Consider casting elements to a supported type.
Any way to solve this problem in a tf.data.Dataset graph?
You can solve this using tf.strings:
import tensorflow as tf
def filter_data(x):
s = tf.strings.strip(tf.strings.regex_replace(x, '', ' '))
s = tf.strings.split(s, sep=" ")
_, _, count = tf.unique_with_counts(s)
return tf.less_equal(tf.reduce_max(count) / tf.shape(s)[0], 0.25)
ds = tf.data.Dataset.from_tensor_slices([["AAAABBBCC", "Label1"], ["AAAAAABC", "Label2"], ["ABBAABCCCCAB", "Label3"], ["ABDC", "Label4"]])
ds = ds.map(lambda x: (x[0], x[1]))
ds = ds.filter(lambda x, y: filter_data(x))
for x, y in ds:
tf.print(x, y)
"ABDC" "Label4"
However, I would reconsider the threshold of 25% as all the samples in your example dataset are above this threshold and therefore not added to the dataset. I have added a fourth example to your dataset to show that the method works with tf.less_equal.
Take for example AAAABBBCC, A occurs most often (4 times) and is divided by the total length of the string (9), giving 4/9=0.44, which means it is excluded from the dataset. Maybe this behavior is desired. Anyway, I just wanted to inform you about it.

If-else in #tf.function

I want to define a custom LearningRateSchedule, but AutoGraph seems to have trouble to convert it. The following code works fine without #tf.function. But it raises an error when working with #tf.function
def linear_interpolation(l, r, alpha):
return l + alpha * (r - l)
class TFPiecewiseSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
# This class currently cannot be used in #tf.function,
# Since tf.cond See the following link for details
def __init__(self, endpoints, end_learning_rate=None, name=None):
"""Piecewise schedule.
endpoints: [(int, int)]
list of pairs `(time, value)` meanining that schedule should output
`value` when `t==time`. All the values for time must be sorted in
an increasing order. When t is between two times, e.g. `(time_a, value_a)`
and `(time_b, value_b)`, such that `time_a <= t < time_b` then value outputs
`interpolation(value_a, value_b, alpha)` where alpha is a fraction of
time passed between `time_a` and `time_b` for time `t`.
outside_value: float
if the value is requested outside of all the intervals sepecified in
`endpoints` this value is returned. If None then AssertionError is
raised when outside value is requested.
"""
super().__init__()
idxes = [e[0] for e in endpoints]
assert idxes == sorted(idxes)
self.end_learning_rate = end_learning_rate or endpoints[-1][1]
self.endpoints = endpoints
self.name=name
def __call__(self, step):
if step < self.endpoints[0][0]:
return self.endpoints[0][1]
else:
for (l_t, l), (r_t, r) in zip(self.endpoints[:-1], self.endpoints[1:]):
if l_t <= step < r_t:
alpha = float(step - l_t) / (r_t - l_t)
return linear_interpolation(l, r, alpha)
# t does not belong to any of the pieces, so doom.
assert self.end_learning_rate is not None
return self.end_learning_rate
def get_config(self):
return dict(
endpoints=self.endpoints,
end_learning_rate=self.end_learning_rate,
name=self._name,
)
lr = TFPiecewiseSchedule([[10, 1e-3], [20, 1e-4]])
#tf.function
def f(x):
l = layers.Dense(10)
with tf.GradientTape() as tape:
y = l(x)
loss = tf.reduce_mean(y**2)
grads = tape.gradient(loss, l.trainable_variables)
opt = tf.keras.optimizers.Adam(lr)
opt.apply_gradients(zip(grads, l.trainable_variables))
f(tf.random.normal((2, 3)))
The error message says:
:10 f *
opt.apply_gradients(zip(grads, l.trainable_variables))
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:437 apply_gradients
apply_state = self._prepare(var_list)
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:614 _prepare
self._prepare_local(var_device, var_dtype, apply_state)
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/keras/optimizer_v2/adam.py:154 _prepare_local
super(Adam, self)._prepare_local(var_device, var_dtype, apply_state)
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:620 _prepare_local
lr_t = array_ops.identity(self._decayed_lr(var_dtype))
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:672 _decayed_lr
lr_t = math_ops.cast(lr_t(local_step), var_dtype)
:32 call
if step < self.endpoints[0][0]:
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:765 bool
self._disallow_bool_casting()
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:531 _disallow_bool_casting
"using a tf.Tensor as a Python bool")
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:518 _disallow_when_autograph_enabled
" decorating it directly with #tf.function.".format(task))
OperatorNotAllowedInGraphError: using a tf.Tensor as a Python bool is not allowed: AutoGraph did not convert this function. Try decorating it directly with #tf.function.
I think the error arises because of the if statement, so I replace the content of the __call__ function with the following code. But almost the same error arises.
def compute_lr(step):
for (l_t, l), (r_t, r) in zip(self.endpoints[:-1], self.endpoints[1:]):
if l_t <= step < r_t:
alpha = float(step - l_t) / (r_t - l_t)
return linear_interpolation(l, r, alpha)
# t does not belong to any of the pieces, so doom.
assert self.end_learning_rate is not None
return self.end_learning_rate
return tf.cond(tf.less(step, self.endpoints[0][0]), lambda: self.endpoints[0][1], lambda: compute_lr(step))
What should I do to make the code work as I wish?
The error message is garbled by the markdown formatter, but it seems that the __call__ function itself was not processed by AutoGraph. In the error message, converted functions are marked with an asterisk. This is a bug in the Adam optimizer. Anyway, you can annotate it directly with tf.function it will be picked up:
#tf.function
def __call__(self, step):
That said, there are a few things in the code that AutoGraph doesn't like: zip, returning from a loop, chained inequalities - it's safer to use basic constructs when possible. Sadly the errors are still you get are quite a bit confusing. Rewriting it like this should work:
#tf.function
def __call__(self, step):
if step < self.endpoints[0][0]:
return self.endpoints[0][1]
else:
# Can't return from a loop
lr = self.end_learning_rate
# Since it needs to break based on the value of a tensor, loop
# needs to be a tf.while_loop
for pair in tf.stack([self.endpoints[:-1], self.endpoints[1:]], axis=1):
left, right = tf.unstack(pair)
l_t, l = tf.unstack(left)
r_t, r = tf.unstack(right)
# Chained inequalities not supported yet
if l_t <= step and step < r_t:
alpha = float(step - l_t) / (r_t - l_t)
lr = linear_interpolation(l, r, alpha)
break
return lr
There is one last issue - tf.function doesn't like it when things create variables, so you need to move the creation of the layer and the optimizer outside:
lr = TFPiecewiseSchedule([[10, 1e-3], [20, 1e-4]])
l = layers.Dense(10)
opt = tf.keras.optimizers.Adam(lr)
#tf.function
def f(x):
...
I hope this helps!

Implementing heaviside step function in TensorFlow

I want to create heaviside step function in TensorFlow. Since Heaviside function is not differentiable I also need to choose derivative approximation and define custom gradient so full implementation looks like this:
import tensorflow as tf
#tf.RegisterGradient("HeavisideGrad")
def _heaviside_grad(unused_op: tf.Operation, grad: tf.Tensor):
x = unused_op.inputs[0]
# During backpropagation heaviside behaves like sigmoid
return tf.sigmoid(x) * (1 - tf.sigmoid(x)) * grad
def heaviside(x: tf.Tensor, g: tf.Graph = tf.get_default_graph()):
custom_grads = {
"Sign": "HeavisideGrad"
}
with g.gradient_override_map(custom_grads):
# TODO: heaviside(0) currently returns 0. We need heaviside(0) = 1
sign = tf.sign(x)
# tf.stop_gradient is needed to exclude tf.maximum from derivative
step_func = sign + tf.stop_gradient(tf.maximum(0.0, sign) - sign)
return step_func
There is one caveat in my implementation: tf.sign(0) returns zero value so heaviside(0) also returns zero and I want heaviside(0) to return 1. How can I achieve such behavior?
A very hacky way would be to use
1 - max(0.0, sign(-x))
as your step function instead of
max(0.0, sign(x))
Another option would be to use greater_equal and cast the result to your desired type, and override its gradient with the sigmoid override you already have.
Ok, I think I figured it out. Many thanks to etarion who pointed out the correct approach to solve my issue.
So the basic idea is to use tf.greater_equal instead of combination of tf.sign and maximum. The custom gradient is applied to tf.identity operation.
Here is updated implementation of heaviside function:
import tensorflow as tf
#tf.RegisterGradient("HeavisideGrad")
def _heaviside_grad(unused_op: tf.Operation, grad: tf.Tensor):
return tf.maximum(0.0, 1.0 - tf.abs(unused_op.inputs[0])) * grad
def heaviside(x: tf.Tensor, g: tf.Graph = tf.get_default_graph()):
custom_grads = {
"Identity": "HeavisideGrad"
}
with g.gradient_override_map(custom_grads):
i = tf.identity(x, name="identity_" + str(uuid.uuid1()))
ge = tf.greater_equal(x, 0, name="ge_" + str(uuid.uuid1()))
# tf.stop_gradient is needed to exclude tf.to_float from derivative
step_func = i + tf.stop_gradient(tf.to_float(ge) - i)
return step_func
This would make the unit step function, using only TensorFlow APIs so the result is still a tensor:
#in Eager mode
def heaviside(v):
return 1-tf.reduce_max(tf.constant([0,-tf.sign(v).numpy()], tf.float32));
In TensorFlow 2, use the decorator #tf.custom_gradient better:
#tf.custom_gradient
def heaviside(X):
#This custom op is converted to graph, no 'if', 'else' allowed,
#so use 'tf.cond'
List = [];
for I in range(BSIZE): #Batch size
Item = tf.cond(X[I]<0, lambda: tf.constant([0], tf.float32),
lambda: tf.constant([1], tf.float32));
List.append(Item);
U = tf.stack(List);
#Heaviside half-maximum formula
#U = (tf.sign(X)+1)/2;
#Div is differentiation intermediate value
def grad(Div):
return Div*1; #Heaviside has no gradient, use 1.
return U,grad;
Easiest fix for you code is to add a small number to the result of tf.sign() and take the sign again. This will result in getting a 1 for 0:
sign = tf.sign ( tf.sign( x ) + 0.1 )

Theano reshape – index out ouf bounds

I can't seem to get Theano to reshape my tensors as want it to. The reshaping in the code bellow is supposed to keep keep_dims dimensions and flatten all remaining ones into a single array.
The code fails with IndexError: index out of bounds on the reshape line if I run it with a test value. Otherwise, the function seems to compile, but fails upon first real input with ValueError: total size of new array must be unchanged.
When I tried using just numpy for an equivalent code, it worked normally. Is there anything I am doing wrong? Or is there any easy way to see the resulting dimensions that are used for the reshaping (ipdb does not help since everything is a Theano variable)?
import theano
import theano.tensor as T
import numpy as np
theano.config.compute_test_value = 'warn'
theano.config.optimizer = 'None'
class Layer(object):
def __init__(self, name):
self.name = name
self.inputs = []
self.outputs = []
def get_init_weights(self, shape):
rows, cols = shape
w_init = np.reshape(np.asarray([rnd.uniform(-0.05, 0.05)
for _ in xrange(rows * cols)]),
newshape=(rows, cols))
return w_init
class Embedding(Layer):
def __init__(self, name, dict_size, width, init='uniform_005'):
super(Embedding, self).__init__(name)
self.width = width
self.dict_size = dict_size
e_init = self.get_init_weights((dict_size, width))
self.e = theano.shared(value=e_init, name=self.name)
def connect(self, inputs):
output = self.e[inputs]
self.inputs.append(inputs)
self.outputs.append(output)
return output
class Flatten(Layer):
def __init__(self, name, keep_dims=1):
super(Flatten, self).__init__(name)
self.params = []
self.keep_dims = keep_dims
def connect(self, inputs):
keep_dims = self.keep_dims
# this line fails
output = inputs.reshape(inputs.shape[0:keep_dims] +
(T.prod(inputs.shape[keep_dims:]),),
ndim=(keep_dims + 1))
return output
if __name__ == '__main__':
x = T.itensor3('x') # batch embedding * embedding size * number of different embeddings
x.tag.test_value = np.random.randint(0, 50, (5, 20, 3)).astype('int32')
emb_layer = Embedding('e', dict_size=50, width=10)
y = emb_layer.connect(x)
flat_layer = Flatten('f')
y = flat_layer.connect(y)
func = theano.function([x], y, allow_input_downcast=True)
The problem relates to how you're combining the two components of the new shape. The reshape command requires an lvector for the new shape.
Since you're using the test values mechanism you can debug this problem by simply printing test value bits and pieces. For example, I used
print inputs.shape.tag.test_value
print inputs.shape[0:keep_dims].tag.test_value
print inputs.shape[keep_dims:].tag.test_value
print T.prod(inputs.shape[keep_dims:]).tag.test_value
print (inputs.shape[0:keep_dims] + (T.prod(inputs.shape[keep_dims:]),)).tag.test_value
print T.concatenate([inputs.shape[0:keep_dims], [T.prod(inputs.shape[keep_dims:])]]).tag.test_value
This shows a fix to the problem: using T.concatenate to combine the keep_dims and the product of the remaining dims.

Categories

Resources