I've attempted converting a Python-side training loop to Tensorflow to (hypothetically) make the code run faster - not having to pass control over to cpu constantly. However, I can't manage using tf.while_loop.
Here's the code that works:
import numpy as np
import tensorflow as tf
from tqdm import tqdm
from sklearn.datasets import load_iris
from sklearn.preprocessing import RobustScaler
x, y = load_iris(True)
x = RobustScaler().fit_transform(x)
shape = (10, 10)
max_epochs = 1000
graph = tf.Graph()
sess = tf.Session(graph=graph)
x = x.astype(np.float64)
# Construct graph
with graph.as_default():
weights = tf.get_variable(
'weights', shape, initializer=tf.constant_initializer, dtype=tf.float64
)
curr_epoch = tf.placeholder(dtype=tf.int64, shape=())
with tf.name_scope('data'):
data = tf.data.Dataset.from_tensor_slices(x)
data = data.shuffle(buffer_size=10000)
data = data.repeat(max_epochs)
data = data.batch(1)
data = data.make_one_shot_iterator().get_next()
with tf.name_scope('update'):
update_op = make_update_op(weights)
init = tf.global_variables_initializer()
sess.run(init)
for i in tqdm(range(max_epochs)):
for _ in range(x.shape[0]):
sess.run(update_op, feed_dict={
curr_epoch: i
})
np_weights = sess.run(weights)
print(np_weights) # Correctly prints an array of 150's.
Now, if I create an update function to pass tf.while_loop, an error is thrown.
def make_update_op(w):
return w.assign(
w + 0.001
)
# In the code above:
update_op = tf.while_loop(lambda _: True, make_update_op, (weights,), maximum_iterations=x.shape[0])
# No inner loop:
for i in tqdm(range(max_epochs)):
sess.run(update_op, feed_dict={
curr_epoch: i
})
Line 22, in make_update_op
return w.assign(
AttributeError: 'Tensor' object has no attribute 'assign'
I don't quite understand what is happening even after reading the documentation. weights is a Variable after all. What could be done to correctly make the training loop?
The tensor that you're trying to assign a new value within a while loop is a result of a sequence of multiple operations-tensors (operation is node in the graph, while tensor is a directed edge). In particular, the while loop will produce:
Variable/Read-->while/Enter-->while/Merge-->while/Switch-->while/Identity
What you're trying to assign here is a tensor while/Identity.
tf.while_loop is usually used to iterate over the dimensions of a tensor (also over the None - the unknown dimension). You're trying to iterate over the variables that are fully defined. You don't need to create a tf.while_loop for that. Just create operations that update each variable and group these operations together:
update_ops = [w.assign(w + 0.001) for w in weights]
update_op = tf.group(update_ops)
Now, when you execute the update_op with tf.Session() interface it will update all variables.
Example:
import tensorflow as tf
v1 = tf.Variable(tf.ones((1, 2), dtype=tf.float32))
v2 = tf.Variable(2*tf.ones((1, 3), dtype=tf.float32))
update_ops = [w.assign(w + 0.001) for w in [v1, v2]]
update_op = tf.group(update_ops)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print('before update:')
print(v1.eval(), v2.eval())
print('after update:')
sess.run(update_op) # <-- update your variables
print(v1.eval(), v2.eval())
# before update:
# [[1. 1.]] [[2. 2. 2.]]
# after update:
# [[1.001 1.001]] [[2.001 2.001 2.001]]
Turns out, all that was missing was the fact that one cannot assign to a variable inside a loop as Vlad pointed out. Instead, one can return the new value of a variable.
def make_update_op(w):
return w + 0.001
new_w = tf.while_loop(lambda _: True, make_update_op, (weights,), maximum_iterations=x.shape[0])
update_op = weights.assign(new_w)
To use more variables one would need to return the same amount from the function and unpack them in Python, but the principle is the same.
def make_update_op(w, d):
return w + 0.001, d
new_w, _ = tf.while_loop(lambda *_: True, make_update_op, (weights, data), maximum_iterations=x.shape[0])
update_op = weights.assign(new_w)
I have created a function with TF operations that I invoke with tf.data.Dataset.map() to transform the input data to my model. Inside that function I create a tf.Variable and assign to it. When initializing the variables, TF complains that the variable's init operation is not an element of the graph, or that the variable does not belong to the same graph as the other variables. I would appreciate any help to solve this issue.
Here you can see some toy code to reproduce the issue (TF 1.12):
import tensorflow as tf
def fun(x):
f = tf.Variable(tf.ones((1,), dtype=tf.int64), name='test')
op = f.assign(x, name='test_assign')
with tf.control_dependencies([op]):
f = tf.identity(f)
return f
def generator():
while True:
yield [2]
ds = tf.data.Dataset.from_generator(generator,
output_shapes=tf.TensorShape([1,]), output_types=tf.int64)
ds = ds.map(fun)
iterator = ds.make_one_shot_iterator()
y = iterator.get_next()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for _ in range(5):
print(sess.run(y)
I wanna implement a function like this:if x == k, f(x) = 1, else f(x) = 0(k is a parameter). So I used tf.equal and tf.cast and my code was like this:
import tensorflow as tf
a = range(12)
a = tf.Variable(a)
b = 6
b = tf.Variable(b)
a = tf.reshape(a, [3, 4])
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
c = tf.equal(a, b)
d = tf.cast(c, tf.int32)
print sess.run(c)
print sess.run(d)
It seems fine, but the problem is tf.gradients(d, a) and tf.gradients(d, b) are None. I tried tf.gradients(c, a) and got TypedError. Are there any decent way to implement this function?
I'm not sure the gradient is even defined here.
The indicator function is f(a,b) = 1 if a=b, 0 otherwise. Away from a=b, this function is constant (zero) and so has zero derivative. At any point where a=b the function is discontinuous, so it doesn't have a derivative there.
More intuitively: derivatives don't exist where you have a 'jump' in your function.
It would be possible to have the PDF of the normal distribution to approximate the indicator function. I am also new to TensorFlow, so feel free to point out any issue.
##I am using tensorflow2
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import tensorflow_probability as tfp
a = tf.range(12)
a = tf.Variable(a)
b = 6
b = tf.Variable(b)
a = tf.reshape(a, [3, 4])
## Define the PDF of a normal distribution to approximate the indicator function
dist = tfp.distributions.Normal(0., 0.1)
scalar = dist.prob(0) # a normalization constant
#since the pdf at data zero is not one
## Implement the approximazed indicator function
a = tf.cast(a, dtype= tf.float32)
b = tf.cast(b, dtype= tf.float32)
c = dist.prob(a-b)/scalar
#d = tf.cast(c, tf.int32)
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
print(sess.run(c))
## calcualte the gradient
c_a = tf.gradients(c, a)
print(sess.run(c_a))
How can I assign a value to a tf Variable inside a function?
Based on the link here, it say thats you have to run a sess on the tf tensor. I want to update the tf variable inside the function after few calculations.
Example:
def update(weights):
value_1 = 0
value_2 = 2
........... some code here ...........
weights['layer_1'] = tf.multiply(weights['layer_1'],value_1)
weights['layer_2'] = tf.multiply(weights['layer_2'],value_2)
............some code here.............
I can't do the above code. But how do I use assign to make this code work?
You have to use assign, which take a Tensor which has to be exactly the same shape as the original Variable. If you want to have different shape use the validate_shape=False. But you have to keep in mind that you'll get the actual changes on run time, thus you will code the behavior of your variable not assigning values.
Here an example that shows variable assignment with variable shapes:
import tensorflow as tf
var = tf.Variable(tf.zeros((1, 3)))
new_v = tf.assign(var, tf.ones((5, 7)), validate_shape=False)
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
print sess.run([var])
print sess.run([new_v])
For your particular example you could try:
def update(weights):
value_1 = tf.constant(0)
value_2 = tf.constant(2)
........... some code here ...........
weights['layer_1'] = tf.assign(weights['layer_1'], tf.multiply(weights['layer_1'],value_1))
weights['layer_2'] = tf.assign(weights['layer_2'], tf.multiply(weights['layer_2'],value_2))
............some code here.............
This works for me -
import tensorflow as tf
import numpy as np
# function to randomly initialize weights for a specific layer
def assign_var(layer_number):
weight_value = np.random.rand(5,3) # or any calculations you need
weight_var = tf.get_variable('weights_layer_'+str(layer_number))
return tf.assign(weight_var,weight_value)
with tf.Session() as sess:
sess.run(assign_var(1))
sess.run(assign_var(2))
EDIT The problem with the above code is - it keeps adding to the graph every time you call the function.
Alternatively, I think this should be better.
import tensorflow as tf
import numpy as np
var_name = tf.placeholder(tf.string)
weight_value = tf.placeholder(tf.float32)
weight_var = tf.get_variable(var_name)
assign_weights = tf.assign(weight_var,weight_value)
sess = tf.Session()
# function to randomly initialize weights for a specific layer
def assign_var(layer_number):
rand_weight_value = np.random.rand(5,3) # or any calculations you need
sess.run(assign_weights,{var_name:'weights_layer'+str(layer_number),weight_value:rand_weight_value})
assign_var(1) # assigns random weight values to layer 1
I've found that indexing still is an open issue in tensorflow (#206), so I'm wondering what I could use as a workaround at the moment. I want to index/slice a row/column of a matrix based on a variable that changes for every training example.
What I've tried so far:
Slicing based on placeholder (doesn't work)
The following (working) code slices based on a fixed number.
import tensorflow as tf
import numpy as np
x = tf.placeholder("float")
y = tf.slice(x,[0],[1])
#initialize
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
#run
result = sess.run(y, feed_dict={x:[1,2,3,4,5]})
print(result)
However, it seems that I can't simply replace one of these fixed numbers with a tf.placeholder. The following code gives me the error "TypeError: List of Tensors when single Tensor expected."
import tensorflow as tf
import numpy as np
x = tf.placeholder("float")
i = tf.placeholder("int32")
y = tf.slice(x,[i],[1])
#initialize
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
#run
result = sess.run(y, feed_dict={x:[1,2,3,4,5],i:0})
print(result)
This sounds like the brackets around [i] are too much, but removing them doesn't help either. How to use a placeholder/variable as index?
Slicing based on python variable (doesn't backprop/update properly)
I've also tried using a normal python variable as index. This does not lead to an error, but the network doesn't learn anything while training. I suppose because the changing variable is not properly registered, the graph is malformed and updates don't work?
Slicing via one-hot vector + multiplication (works, but is slow)
One workaround I found is using a one-hot vector. Making a one-hot vector in numpy, passing this using a placeholder, then doing the slicing via matrix multiplication. This works, but is quite slow.
Any ideas how to efficiently slice/index based on a variable?
Slicing based on a placeholder should work just fine. It looks like you are running into a type error, due to some subtle issues of shapes and types. Where you have the following:
x = tf.placeholder("float")
i = tf.placeholder("int32")
y = tf.slice(x,[i],[1])
...you should instead have:
x = tf.placeholder("float")
i = tf.placeholder("int32")
y = tf.slice(x,i,[1])
...and then you should feed i as [0] in the call to sess.run().
To make this a little clearer, I would recommend rewriting the code as follows:
import tensorflow as tf
import numpy as np
x = tf.placeholder(tf.float32, shape=[None]) # 1-D tensor
i = tf.placeholder(tf.int32, shape=[1])
y = tf.slice(x, i, [1])
#initialize
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
#run
result = sess.run(y, feed_dict={x: [1, 2, 3, 4, 5], i: [0]})
print(result)
The additional shape arguments to the tf.placeholder op help to ensure that the values you feed have the appropriate shapes, and also that TensorFlow will raise an error if the shapes are not correct.
If you have an extra dimension, this works.
import tensorflow as tf
import numpy as np
def reorder0(e, i, length):
'''
e: a two dimensional tensor
i: a one dimensional int32 tensor, of shape (e.shape[0])
returns: a tensor of the same shape as e, where the jth entry is entry i[j] from e
'''
return tf.concat(
[ tf.expand_dims( e[i[j],:], axis=0) for j in range(length) ],
axis=0
)
e = tf.placeholder(tf.float32, shape=(2,3,5), name='e' ) # sentences, words, embedding
i = tf.placeholder(tf.int32, shape=(2,3), name='i' ) # for each word, index of parent
p = tf.concat(
[ tf.expand_dims(reorder0(e[k,:,:], i[k,:], 3), axis=0) for k in range(2) ],
axis=0,
name='p'
)
#initialize
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
#run
result = sess.run(p, feed_dict={
e: [
( (1.0,1.1,1.2,1.3,1.4),(2.0,2.1,2.2,2.3,2.4),(3.0,3.1,3.2,3.3,3.4) ),
( (21.0,21.1,21.2,21.3,21.4),(22.0,22.1,22.2,22.3,22.4),(23.0,23.1,23.2,23.3,23.4) ),
],
i: [ (1,1,1), (2,0,2)]
})
print(result)
If the sizes are not known when building the model, use TensorArray.
e = tf.placeholder(tf.float32, shape=(3,5) ) # words, embedding
i = tf.placeholder(tf.int32, shape=(3) ) # for each word, index of parent
#p = reorder0(e, i, 3)
a = tf.TensorArray(
tf.float32,
size=e.get_shape()[0],
dynamic_size=True,
infer_shape= True,
element_shape=e.get_shape()[1],
clear_after_read = False
)
#initialize
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
#run
result = sess.run(
a.unstack(e).gather(i),
feed_dict={
e: ( (1.0,1.1,1.2,1.3,1.4),(2.0,2.1,2.2,2.3,2.4),(3.0,3.1,3.2,3.3,3.4) ),
#( (21.0,21.1,21.2,21.3,21.4),(22.0,22.1,22.2,22.3,22.4),(23.0,23.1,23.2,23.3,23.4) ),
i: (2,0,2)
}
)
print(result)