If-else in #tf.function - python

I want to define a custom LearningRateSchedule, but AutoGraph seems to have trouble to convert it. The following code works fine without #tf.function. But it raises an error when working with #tf.function
def linear_interpolation(l, r, alpha):
return l + alpha * (r - l)
class TFPiecewiseSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
# This class currently cannot be used in #tf.function,
# Since tf.cond See the following link for details
def __init__(self, endpoints, end_learning_rate=None, name=None):
"""Piecewise schedule.
endpoints: [(int, int)]
list of pairs `(time, value)` meanining that schedule should output
`value` when `t==time`. All the values for time must be sorted in
an increasing order. When t is between two times, e.g. `(time_a, value_a)`
and `(time_b, value_b)`, such that `time_a <= t < time_b` then value outputs
`interpolation(value_a, value_b, alpha)` where alpha is a fraction of
time passed between `time_a` and `time_b` for time `t`.
outside_value: float
if the value is requested outside of all the intervals sepecified in
`endpoints` this value is returned. If None then AssertionError is
raised when outside value is requested.
"""
super().__init__()
idxes = [e[0] for e in endpoints]
assert idxes == sorted(idxes)
self.end_learning_rate = end_learning_rate or endpoints[-1][1]
self.endpoints = endpoints
self.name=name
def __call__(self, step):
if step < self.endpoints[0][0]:
return self.endpoints[0][1]
else:
for (l_t, l), (r_t, r) in zip(self.endpoints[:-1], self.endpoints[1:]):
if l_t <= step < r_t:
alpha = float(step - l_t) / (r_t - l_t)
return linear_interpolation(l, r, alpha)
# t does not belong to any of the pieces, so doom.
assert self.end_learning_rate is not None
return self.end_learning_rate
def get_config(self):
return dict(
endpoints=self.endpoints,
end_learning_rate=self.end_learning_rate,
name=self._name,
)
lr = TFPiecewiseSchedule([[10, 1e-3], [20, 1e-4]])
#tf.function
def f(x):
l = layers.Dense(10)
with tf.GradientTape() as tape:
y = l(x)
loss = tf.reduce_mean(y**2)
grads = tape.gradient(loss, l.trainable_variables)
opt = tf.keras.optimizers.Adam(lr)
opt.apply_gradients(zip(grads, l.trainable_variables))
f(tf.random.normal((2, 3)))
The error message says:
:10 f *
opt.apply_gradients(zip(grads, l.trainable_variables))
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:437 apply_gradients
apply_state = self._prepare(var_list)
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:614 _prepare
self._prepare_local(var_device, var_dtype, apply_state)
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/keras/optimizer_v2/adam.py:154 _prepare_local
super(Adam, self)._prepare_local(var_device, var_dtype, apply_state)
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:620 _prepare_local
lr_t = array_ops.identity(self._decayed_lr(var_dtype))
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py:672 _decayed_lr
lr_t = math_ops.cast(lr_t(local_step), var_dtype)
:32 call
if step < self.endpoints[0][0]:
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:765 bool
self._disallow_bool_casting()
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:531 _disallow_bool_casting
"using a tf.Tensor as a Python bool")
/Users/aptx4869/anaconda3/envs/drl/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:518 _disallow_when_autograph_enabled
" decorating it directly with #tf.function.".format(task))
OperatorNotAllowedInGraphError: using a tf.Tensor as a Python bool is not allowed: AutoGraph did not convert this function. Try decorating it directly with #tf.function.
I think the error arises because of the if statement, so I replace the content of the __call__ function with the following code. But almost the same error arises.
def compute_lr(step):
for (l_t, l), (r_t, r) in zip(self.endpoints[:-1], self.endpoints[1:]):
if l_t <= step < r_t:
alpha = float(step - l_t) / (r_t - l_t)
return linear_interpolation(l, r, alpha)
# t does not belong to any of the pieces, so doom.
assert self.end_learning_rate is not None
return self.end_learning_rate
return tf.cond(tf.less(step, self.endpoints[0][0]), lambda: self.endpoints[0][1], lambda: compute_lr(step))
What should I do to make the code work as I wish?

The error message is garbled by the markdown formatter, but it seems that the __call__ function itself was not processed by AutoGraph. In the error message, converted functions are marked with an asterisk. This is a bug in the Adam optimizer. Anyway, you can annotate it directly with tf.function it will be picked up:
#tf.function
def __call__(self, step):
That said, there are a few things in the code that AutoGraph doesn't like: zip, returning from a loop, chained inequalities - it's safer to use basic constructs when possible. Sadly the errors are still you get are quite a bit confusing. Rewriting it like this should work:
#tf.function
def __call__(self, step):
if step < self.endpoints[0][0]:
return self.endpoints[0][1]
else:
# Can't return from a loop
lr = self.end_learning_rate
# Since it needs to break based on the value of a tensor, loop
# needs to be a tf.while_loop
for pair in tf.stack([self.endpoints[:-1], self.endpoints[1:]], axis=1):
left, right = tf.unstack(pair)
l_t, l = tf.unstack(left)
r_t, r = tf.unstack(right)
# Chained inequalities not supported yet
if l_t <= step and step < r_t:
alpha = float(step - l_t) / (r_t - l_t)
lr = linear_interpolation(l, r, alpha)
break
return lr
There is one last issue - tf.function doesn't like it when things create variables, so you need to move the creation of the layer and the optimizer outside:
lr = TFPiecewiseSchedule([[10, 1e-3], [20, 1e-4]])
l = layers.Dense(10)
opt = tf.keras.optimizers.Adam(lr)
#tf.function
def f(x):
...
I hope this helps!

Related

How does tf.GradientTape record operations inside the with statement?

I don't understand how tf.GradientTape record operations like y=x**2 inside the "with" statement (following operations).
x = tf.Variable(3.0)
with tf.GradientTape() as tape:
y = x**2
What Python syntax can be used to achieve this behavior?
EDIT:
As per the GitHub source code, GradientTape,
At Line 897:
#tf_contextlib.contextmanager
def _ensure_recording(self):
"""Ensures that this tape is recording."""
if not self._recording:
try:
self._push_tape()
yield
finally:
self._pop_tape()
else:
yield
If you don't know, contextmanager triggers whenever with keyword is used. It tells us that it starts keeping track of tape.
self._pop_tape() is in Line 891:
def _pop_tape(self):
if not self._recording:
raise ValueError("Tape is not recording.")
tape.pop_tape(self._tape)
self._recording = False
self._push_tape() is in Line 878:
def _push_tape(self):
"""Pushes a new tape onto the tape stack."""
if self._recording:
raise ValueError("Tape is still recording, This can happen if you try to "
"re-enter an already-active tape.")
if self._tape is None:
self._tape = tape.push_new_tape(
persistent=self._persistent,
watch_accessed_variables=self._watch_accessed_variables)
else:
tape.push_tape(self._tape)
self._recording = True
Here, you can notice tape.push_new_tape is being accessed here which can be found in this source code at Line 43:
def push_new_tape(persistent=False, watch_accessed_variables=True):
"""Pushes a new tape onto the tape stack."""
tape = pywrap_tfe.TFE_Py_TapeSetNew(persistent, watch_accessed_variables)
return Tape(tape)
In this you can see the Tape class just above at Line 31.
class Tape(object):
"""Represents a gradient propagation trace."""
__slots__ = ["_tape"]
def __init__(self, tape):
self._tape = tape
def watched_variables(self):
return pywrap_tfe.TFE_Py_TapeWatchedVariables(self._tape)
Also, I tried to track pywrap_tfe.TFE_Py_TapeSetNew but couldn't find it in this source code of the file.
Original Answer:
The documentation of GradientTape states:
By default GradientTape will automatically watch any trainable variables that are accessed inside the context. If you want fine grained control over which variables are watched you can disable automatic tracking by passing watch_accessed_variables=False to the tape constructor
With the following code:
x = tf.Variable(2.0)
w = tf.Variable(5.0)
with tf.GradientTape(
watch_accessed_variables=False, persistent=True) as tape:
tape.watch(x)
y = x ** 2 # Gradients will be available for `x`.
z = w ** 3 # No gradients will be available as `w` isn't being watched.
dy_dx = tape.gradient(y, x)
print(dy_dx)
>>> tf.Tensor(4.0, shape=(), dtype=float32)
# No gradients will be available as `w` isn't being watched.
dz_dw = tape.gradient(z, w)
print(dz_dw)
>>> None

Keras Lambda layer, how to use multiple arguments

I have this function:
def sampling(x):
zeros = x*0
samples = tf.random.categorical(tf.math.log(x), 1)
samples = tf.squeeze(tf.one_hot(samples, depth=2), axis=1)
return zeros+samples
That I call from this layer:
x = layers.Lambda(sampling, name="lambda")(x)
But I need to change the depth variable in the sampling function, so I would need something like this:
def sampling(x, depth):
But, how can I make it work with the Lambda layer ?
Thanks a lot
Use a lambda function inside the Lambda layer...
def sampling(x, depth):
zeros = x*0
samples = tf.random.categorical(tf.math.log(x), 1)
samples = tf.squeeze(tf.one_hot(samples, depth=depth), axis=1)
return zeros+samples
usage:
Lambda(lambda t: sampling(t, depth=3), name="lambda")(x)

tf.functon in tensorflow with a loop

I am trying to train a model using TensorFlow. There is a bottleneck in my code, which make my Tensor flow optimization to crash. I found out that this most likely is relating to a part in my code where I have a loop:
here is a minimum working example:
import numpy as np
import tensorflow as tf
import scipy.optimize
kon = 0.01
mu = 1.5
fi = 0.5
kappa = 22
w = (1-theta)
n =100
xs = tf.random.normal(shape=(n,), stddev=0.2)
eps = tf.random.normal(shape=(n,), stddev=0.17)
z = tf.sigmoid(tf.random.normal(shape=(n,), stddev=0.22))
def my_function(z, eps, x0):
def F(hi):
return (mu/fi)*np.log(hi) -(1-mu)*kappa*(hi)**(1+(1/fi))-mu*(np.log(w*ei*xs)-np.log(kon))-np.log(ze)
hvec = np.empty((0,))
# leisure today
for ze,ei,xs in zip(z, eps, x0):
ei=np.exp(ei)
xs=np.exp(xs)
htemp = scipy.optimize.newton_krylov(F, 0.5)
hvec = np.append(hvec, htemp)
return hvec
if I use the tf.function to decorate my function I get this error.
#tf.function
def my_function(z, eps, x0):
def F(hi):
return (mu/fi)*np.log(hi) -(1-mu)*kappa*(hi)**(1+(1/fi))-mu*(np.log(w*ei*xs)-np.log(kon))-np.log(ze)
hvec = np.empty((0,))
# leisure today
for ze,ei,xs in zip(z, eps, x0):
ei=np.exp(ei)
xs=np.exp(xs)
htemp = scipy.optimize.newton_krylov(F, 0.5)
hvec = np.append(hvec, htemp)
return hvec
htest=my_function(z,eps,x0)
ERROR MESSAGE
OperatorNotAllowedInGraphError: in converted code:
.....
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed: AutoGraph did not convert this function. Try decorating it directly with #tf.function.
I tried to follow this route:
#tf.function(input_signature=[tf.TensorSpec(None, tf.float32)])
def tf_function(input):
y = tf.numpy_function(my_function, [input], tf.float32)
return y
but the error message that I get once I call the tf_function is:
htestTF= tf_function(z,eps,x0)
Error
TypeError: When input_signature is provided, only pass arguments covered by it. Received 3 argument(s).
Can someone who has experience help me to debugg this?
I believe it's the zip function, which autograph doesn't currently transform: you need to use for i in range(len(z)) and then ze = z[i].
That said, your function uses only NumPy so it won't work as expected: tf.function requires TensorFlow code.
To train in TensorFlow, you would need to change all the np.* calls to their equivalent tf.*, replace hvec with hvec = tf.TensorArray(...), and replace scipy.optimize.newton_krylov with a TF-based implementation.

Implementing Perceptron with an array error

My question is if there was an issue in changing def step(self,x) function since the original was faulty.
I attempted to change def step(self,x) to x.any. It resulted in a prediction error where all predictions were 1 I attempted to implement an OR Perceptron neural network from a book by following the codes given. However, I received an error The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
This is the code:
from nn import Perceptron
import numpy as np
X = np.array([[0,0],[0,1],[1,0],[1,1]])
print(X[1])
y = np.array([0],[1],[1],[0])
print("[INFO] training perceptron...")
p = Perceptron(X.shape[1],alpha = 0.1)
p.fit(X,y,epochs=20)
print("[INFO] testing perceptron...")
for (x,target) in zip(X,y):
pred=p.predict(X)
print("[INFO] data={}, ground-truth={}, pred={}". format(x, target[0], pred))
The package that I imported was:
import numpy as np
class Perceptron:
def __init__(self, N, alpha = 0.1):
self.W = np.random.randn(N+1)/np.sqrt(N)
self.alpha = alpha
def step(self,x):
if x>0:
return 1
else:
return 0
def fit(self, X, y, epochs = 10):
X = np.c_[X,np.ones((X.shape[0]))]
for epoch in np.arange(0, epochs):
for (x,target) in zip(X,y):
p = self.step(np.dot(x, self.W))
if p!= target:
error = p-target
self.W += -self.alpha * error * x
def predict(self,X,addBias=True):
X = np.atleast_2d(X)
if addBias:
X=np.c_[X, np.ones((X.shape[0]))]
return self.step(np.dot(X,self.W))
My apologies if its a silly question as I spent the whole day thinking about it to no avail.
Thanks in advance!
The error that you are facing is because step() is coded to evaluate 1 element of the array at a time but when you pass an array to it in the predict function it has to do something like this:
[0.266,1.272,-1.282,0.889] > 1
The interpreter doesn't know which value to evaluate since it's an array and hence gives the error. Using any or all would check for 'any' or 'all' value in the array and give you 0 or 1 correspondingly, which is why you get an array of 1s when you write x.any().
Another thing that bothered me about the code you imported was that the forward pass is done in a loop, which is not very efficient or pythonic. A vectorized implementation is way better. I have changed the step function and fit function in that imported code to be vectorized and it runs fine for me.
import numpy as np
class Perceptron:
def __init__(self, N, alpha = 0.1):
self.W = np.random.randn(N+1)/np.sqrt(N)
self.alpha = alpha
def step(self,x):
return 1. * (x > 0)
def fit(self, X, y, epochs = 10):
X = np.c_[X,np.ones((X.shape[0]))]
for epoch in np.arange(0, epochs):
Z = np.dot(X, self.W)
p = self.step(Z)
if np.any(p != y):
error = (p-y)
self.W += -self.alpha * np.dot(X.T,error)
def predict(self,X,addBias=True):
X = np.atleast_2d(X)
if addBias:
X=np.c_[X, np.ones((X.shape[0]))]
return self.step(np.dot(X,self.W))
Now the step function is returning a binary array where the value is 1 when the input is greater than 0 else 0. For example if you had an array say:
X= [0.266,1.272,-1.282,0.889]
would be converted to:
[1,1,0,1]
I also changed the fit function so that it does everything vectorized.
One other thing that I did to my code was this :
Instead of
y = np.array([0],[1],[1],[0])
I did
y = np.array([0,1,1,0])
to get it working. I hope this helps. Be sure to ask anything if you don't understand.

Implementing heaviside step function in TensorFlow

I want to create heaviside step function in TensorFlow. Since Heaviside function is not differentiable I also need to choose derivative approximation and define custom gradient so full implementation looks like this:
import tensorflow as tf
#tf.RegisterGradient("HeavisideGrad")
def _heaviside_grad(unused_op: tf.Operation, grad: tf.Tensor):
x = unused_op.inputs[0]
# During backpropagation heaviside behaves like sigmoid
return tf.sigmoid(x) * (1 - tf.sigmoid(x)) * grad
def heaviside(x: tf.Tensor, g: tf.Graph = tf.get_default_graph()):
custom_grads = {
"Sign": "HeavisideGrad"
}
with g.gradient_override_map(custom_grads):
# TODO: heaviside(0) currently returns 0. We need heaviside(0) = 1
sign = tf.sign(x)
# tf.stop_gradient is needed to exclude tf.maximum from derivative
step_func = sign + tf.stop_gradient(tf.maximum(0.0, sign) - sign)
return step_func
There is one caveat in my implementation: tf.sign(0) returns zero value so heaviside(0) also returns zero and I want heaviside(0) to return 1. How can I achieve such behavior?
A very hacky way would be to use
1 - max(0.0, sign(-x))
as your step function instead of
max(0.0, sign(x))
Another option would be to use greater_equal and cast the result to your desired type, and override its gradient with the sigmoid override you already have.
Ok, I think I figured it out. Many thanks to etarion who pointed out the correct approach to solve my issue.
So the basic idea is to use tf.greater_equal instead of combination of tf.sign and maximum. The custom gradient is applied to tf.identity operation.
Here is updated implementation of heaviside function:
import tensorflow as tf
#tf.RegisterGradient("HeavisideGrad")
def _heaviside_grad(unused_op: tf.Operation, grad: tf.Tensor):
return tf.maximum(0.0, 1.0 - tf.abs(unused_op.inputs[0])) * grad
def heaviside(x: tf.Tensor, g: tf.Graph = tf.get_default_graph()):
custom_grads = {
"Identity": "HeavisideGrad"
}
with g.gradient_override_map(custom_grads):
i = tf.identity(x, name="identity_" + str(uuid.uuid1()))
ge = tf.greater_equal(x, 0, name="ge_" + str(uuid.uuid1()))
# tf.stop_gradient is needed to exclude tf.to_float from derivative
step_func = i + tf.stop_gradient(tf.to_float(ge) - i)
return step_func
This would make the unit step function, using only TensorFlow APIs so the result is still a tensor:
#in Eager mode
def heaviside(v):
return 1-tf.reduce_max(tf.constant([0,-tf.sign(v).numpy()], tf.float32));
In TensorFlow 2, use the decorator #tf.custom_gradient better:
#tf.custom_gradient
def heaviside(X):
#This custom op is converted to graph, no 'if', 'else' allowed,
#so use 'tf.cond'
List = [];
for I in range(BSIZE): #Batch size
Item = tf.cond(X[I]<0, lambda: tf.constant([0], tf.float32),
lambda: tf.constant([1], tf.float32));
List.append(Item);
U = tf.stack(List);
#Heaviside half-maximum formula
#U = (tf.sign(X)+1)/2;
#Div is differentiation intermediate value
def grad(Div):
return Div*1; #Heaviside has no gradient, use 1.
return U,grad;
Easiest fix for you code is to add a small number to the result of tf.sign() and take the sign again. This will result in getting a 1 for 0:
sign = tf.sign ( tf.sign( x ) + 0.1 )

Categories

Resources