I have a custom loss where I want to apply Gaussian filter to a predicted label to manipulate it a little. Using max or average pooling is simple as it is predefined in keras, but I had to make my own class for Gaussian pooling:
import numpy as np
from keras.layers import DepthwiseConv2D
from keras.layers import Input
from keras.models import Model
import tensorflow as tf
class Gaussian():
def __init__(self,shape, f = 3):
self.filt = f
self.g = self.gaussFilter(shape)
def doFilter(self, data):
return self.g.predict(data, steps=1) #steps are for predicting on const tensor, I change it when predicting on predictions
def gauss2D(self,shape=(3,3),sigma=0.5):
m,n = [(ss-1.)/2. for ss in shape]
y,x = np.ogrid[-m:m+1,-n:n+1]
h = np.exp( -(x*x + y*y) / (2.*sigma*sigma) )
h[ h < np.finfo(h.dtype).eps*h.max() ] = 0
sumh = h.sum()
if sumh != 0:
h /= sumh
return h
def gaussFilter(self, size=256):
kernel_weights = self.gauss2D(shape=(self.filt,self.filt))
in_channels = 1 # the number of input channels
kernel_weights = np.expand_dims(kernel_weights, axis=-1)
kernel_weights = np.repeat(kernel_weights, in_channels, axis=-1) # apply the same filter on all the input channels
kernel_weights = np.expand_dims(kernel_weights, axis=-1) # for shape compatibility reasons
inp = Input(shape=(size,size,1))
g_layer = DepthwiseConv2D(self.filt, use_bias=False, padding='same')(inp)
model_network = Model(input=inp, output=g_layer)
print(model_network.summary())
model_network.layers[1].set_weights([kernel_weights])
model_network.trainable= False
return model_network
This works as expected when feeding a constant tensor to the doFilter function, an example of simple data:
a = np.array([[[1, 2, 3], [4, 5, 6], [4, 5, 6]]])
filt = Gaussian(3)
print(filt.doFilter(tf.constant(a.reshape(1,3,3,1))))
However, if I try to use this in a custom loss :
def custom_loss_no_true(input_tensor, length):
def loss(y_true, y_pred):
gaus_pooler = Gaussian(256, length//8)
a = gaus_pooler.doFilter(y_pred)
...more stuff comes after
I get an error:
ValueError: When feeding symbolic tensors to a model, we expect the
tensors to have a static batch size. Got tensor with shape: (None,
256, 256, 1)
This is as I have found caused by the fact, that I am feeding a tensor that is an output of other model, a symbolic data, not actual values (source). Thus I need to change the logic of my approach, because evaluating the tensor to feed my class would break the graph and lead to no gradient propagation within the loss (or am I incorrect?). How can I apply such convolution operation on a tensor that is an output of other model? Is it even possible? Or maybe there is a way to use it without adding the layer to the model, such as MaxPooling?
You don't really need a complex keras Model nor a keras Layer if what you want to do is just convolve your input with a Gaussian kernel. Here is a port of your code with simple tensorflow ops :
import tensorflow as tf
def get_gaussian_kernel(shape=(3,3), sigma=0.5):
"""build the gaussain filter"""
m,n = [(ss-1.)/2. for ss in shape]
x = tf.expand_dims(tf.range(-n,n+1,dtype=tf.float32),1)
y = tf.expand_dims(tf.range(-m,m+1,dtype=tf.float32),0)
h = tf.exp(tf.math.divide_no_nan(-((x*x) + (y*y)), 2*sigma*sigma))
h = tf.math.divide_no_nan(h,tf.reduce_sum(h))
return h
def gaussian_blur(inp, shape=(3,3), sigma=0.5):
"""Convolve using tf.nn.depthwise_conv2d"""
in_channel = tf.shape(inp)[-1]
k = get_gaussian_kernel(shape,sigma)
k = tf.expand_dims(k,axis=-1)
k = tf.repeat(k,in_channel,axis=-1)
k = tf.reshape(k, (*shape, in_channel, 1))
# using padding same to preserve size (H,W) of the input
conv = tf.nn.depthwise_conv2d(inp, k, strides=[1,1,1,1],padding="SAME")
return conv
You can use it simply in your custom loss (assuming a 4D y_pred [batch, height width, channel]) :
a = gaussian_blur(y_pred)
Related
I am writing a custom layer in Tensorflow 2.0 and I ran to a problem as follow:
I want to transform a 1D weight array (5x1) to a 2D array (10x10). Suppose I have the index to transform from 1D to 2D as follow, weight_index_lst:
weight_id, row, col
1,5,6
2,6,7
3,7,8
4,8,9
5,9,10
The others location of the 2D array will just get a value of 0. Here's my script for the custom layers. My input is in (10x1) shape. For the w_mat, it receives 0 anywhere else that self.w is not assigned
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class mylayer(layers.Layer):
def __init__(self, weight_index_lst, **kwargs):
super(mylayer, self).__init__(**kwargs)
self.weight_index_lst= weight_index_lst
def build(self):
self.w = self.add_weight(shape = (5,1),
initializer = 'he_normal',
trainable = True)
def call(self, inputs):
ct = 0
w_mat = tf.Variable(np.zeros((21, 21)),dtype='float32',trainable=False)
for i in range(20):
i1 = self.weight_index_lst[i,1] #row index
i2 = self.weight_index_lst[i,2] #column index
w_mat[i1,i2].assign(self.w[ct,0]) #problem with no gradient provided
#or w_mat[i1,i2] = self.w[ct,0] #resource variable cannot be assigned
ct = ct+1
y = tf.matmul(w_mat,inputs)
return y
I could have declared a (10x10) weight array but my deep learning wants the others weight to be 0 and cannot be trained.
If you want to specifically create a new layer with the weights and such then the resolution to your problem (no gradients propagating through assign) is to change all of your operations to be symbolic tensor operations - then TF will be able to propagate the gradients. One way to do so is to create 1d tensor of weights you want to train, append non-trainable const tensor with 0.0 value and then use tf.gather to select the needed weights and/or constant zero for each of n**2 elements of the matrix you want to use to multiply the layer's input by. Since all operations are symbolic tensor operations TF will be able to propagate gradients with no problems. Code of such approach below:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
class mylayer(layers.Layer):
def __init__(self, n, weight_index_lst, **kwargs):
super(mylayer, self).__init__(**kwargs)
self.weight_index_lst = weight_index_lst
self.n = n
def build(self, input_shape):
self.w = self.add_weight(shape = (len(self.weight_index_lst),),
initializer = 'he_normal',
trainable = True)
def call(self, inputs):
const_zero = tf.constant([0.], dtype=tf.float32)
const_zero_and_weights = tf.concat([const_zero, self.w], axis=0)
ct = 1 # start with 1 since 0 means take the non-trainable 0. from const_zero_and_weights
selector = np.zeros((self.n ** 2), dtype=np.int32) # indicies
for i, j in self.weight_index_lst:
selector[i * self.n + j] = ct
ct = ct+1
t_ind = tf.constant(selector, dtype=tf.int32)
w_flattened = tf.gather(const_zero_and_weights, t_ind)
w_matrix = tf.reshape(w_flattened, (self.n, self.n))
y = tf.matmul(w_matrix, inputs)
return y
m = tf.keras.Sequential([
layers.Dense(21**2, input_shape=(45,)),
layers.Reshape(target_shape=(21,21)),
mylayer(21, [(4,5), (5,6), (6,7), (7,8), (8,9)]),
])
m.summary()
You don't need to create a trainable layer for this. Consider just using non-trainable lambda layer:
def select_as_needed(x, wrc, n):
selector = np.zeros(n * n, dtype=np.int32) # tensor with the index of input element we want to select in each cell (0 otherwise)
mask = np.zeros(n * n, dtype=np.float32) # 0./1. tensor with ones only on the positions where we put some selected element
for w, r, c in wrc:
selector[r * n + c] = w
mask[r * n + c] = 1.0
t_ind = tf.constant(selector, dtype=tf.int32)
t_mask = tf.constant(mask, dtype=tf.float32)
return tf.gather(x, t_ind, axis=1) * mask # if we don't multiply by mask the 0-index value of input will go to all positions for which we didn't select anything
wrc = [(0,4,5), (1,5,6), (2,6,7), (3,7,8), (4,8,9)] # same as your table, but 0-based
n = 10
model = tf.keras.models.Sequential([
# ... your stuff
tf.keras.layers.Dense(5, 'linear'), # output of 5 neurons (or replace with whatever else you have which is producing 5 outputs per sample)
tf.keras.layers.Lambda(select_as_needed, arguments={'wrc': wrc, 'n':n}),
tf.keras.layers.Reshape(target_shape=(n, n)),
])
I'm getting a runtime error:
RuntimeError: Dimension out of range (expected to be in range of [-1, 0], but got 1)`
and can't figure out how to fix it.
The error appears to refer to the line:
i_enc = F.normalize(input =i_batch, p=2, dim=1, eps=1e-12) # (batch, K, feat_dim)
I'm trying to encode image features (batch x 36 x 2038) by applying a L2 norm. Below is the full code for the section.
def forward(self, q_batch, i_batch):
# batch size = 512
# q -> 512(batch)x14(length)
# i -> 512(batch)x36(K)x2048(f_dim)
# one-hot -> glove
emb = self.embed(q_batch)
output, hn = self.gru(emb.permute(1, 0, 2))
q_enc = hn.view(-1,self.h_dim)
# image encoding with l2 norm
i_enc = F.normalize(input =i_batch, p=2, dim=1, eps=1e-12) # (batch, K, feat_dim)
q_enc_copy = q_enc.repeat(1, self.K).view(-1, self.K, self.h_dim)
q_i_concat = torch.cat((i_enc, q_enc_copy), -1)
q_i_concat = self.non_linear(q_i_concat, self.td_W, self.td_W2 )#512 x 36 x 512
i_attention = self.att_w(q_i_concat) #512x36x1
i_attention = F.softmax(i_attention.squeeze(),1)
#weighted sum
i_enc = torch.bmm(i_attention.unsqueeze(1), i_enc).squeeze() # (batch, feat_dim)
# element-wise multiplication
q = self.non_linear(q_enc, self.q_W, self.q_W2)
i = self.non_linear(i_enc, self.i_W, self.i_W2)
h = torch.mul(q, i) # (batch, hid_dim)
# output classifier
# BCE with logitsloss
score = self.c_Wo(self.non_linear(h, self.c_W, self.c_W2))
return score
I would appreciate any help.
Thanks
I would suggest to check the shape of i_batch (e.g. print(i_batch.shape)), as I suspect i_batch has only 1 dimension (e.g. of shape [N]).
This would explain why PyTorch is complaining you can normalize only over the dimension #0; while you are asking for the operation to be done over a dimension #1 (c.f. dim=1).
I am implementing an OCR with Keras, Tensorflow backend.
I want to use keras.backend.ctc_decode implementation.
I have a model class :
import keras
def ctc_lambda_func(args):
y_pred, y_true, input_x_width, input_y_width = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
# y_pred = y_pred[:, 2:, :]
return keras.backend.ctc_batch_cost(y_true, y_pred, input_x_width, input_y_width)
class ModelOcropy(keras.Model):
def __init__(self, alphabet: str):
self.img_height = 48
self.lstm_size = 100
self.alphabet_size = len(alphabet)
# check backend input shape (channel first/last)
if keras.backend.image_data_format() == "channels_first":
input_shape = (1, None, self.img_height)
else:
input_shape = (None, self.img_height, 1)
# data input
input_x = keras.layers.Input(input_shape, name='x')
# training inputs
input_y = keras.layers.Input((None,), name='y')
input_x_widths = keras.layers.Input([1], name='x_widths')
input_y_widths = keras.layers.Input([1], name='y_widths')
# network
flattened_input_x = keras.layers.Reshape((-1, self.img_height))(input_x)
bidirectional_lstm = keras.layers.Bidirectional(
keras.layers.LSTM(self.lstm_size, return_sequences=True, name='lstm'),
name='bidirectional_lstm'
)(flattened_input_x)
dense = keras.layers.Dense(self.alphabet_size, activation='relu')(bidirectional_lstm)
y_pred = keras.layers.Softmax(name='y_pred')(dense)
# ctc loss
ctc = keras.layers.Lambda(ctc_lambda_func, output_shape=[1], name='ctc')(
[dense, input_y, input_x_widths, input_y_widths]
)
# init keras model
super().__init__(inputs=[input_x, input_x_widths, input_y, input_y_widths], outputs=[y_pred, ctc])
# ctc decoder
top_k_decoded, _ = keras.backend.ctc_decode(y_pred, input_x_widths)
self.decoder = keras.backend.function([input_x, input_x_widths], [top_k_decoded[0]])
# decoded_sequences = self.decoder([test_input_data, test_input_lengths])
My use of ctc_decode comes from another post : Keras using Lambda layers error with K.ctc_decode
I get an error :
ValueError: Shape must be rank 1 but is rank 2 for 'CTCGreedyDecoder' (op: 'CTCGreedyDecoder') with input shapes: [?,?,7], [?,1].
I guess I have to squeeze my input_x_widths, but Keras does not seem to have such function (it always outputs something like (batch_size, 1))
Indeed, the function is expecting a 1D tensor, and you've got a 2D tensor.
Keras does have the keras.backend.squeeze(x, axis=-1) function.
And you can also use keras.backend.reshape(x, (-1,))
If you need to go back to the old shape after the operation, you can both:
keras.backend.expand_dims(x)
keras.backend.reshape(x,(-1,1))
Complete fix :
# ctc decoder
flattened_input_x_width = keras.backend.reshape(input_x_widths, (-1,))
top_k_decoded, _ = keras.backend.ctc_decode(y_pred, flattened_input_x_width)
self.decoder = keras.backend.function([input_x, flattened_input_x_width], [top_k_decoded[0]])
# decoded_sequences = self.decoder([input_x, flattened_input_x_width])
I am generally struggling with indexing tensors in tensorflow.
I have image data and additional scalar data. I can only use a single placeholder to input all the data to a Neural Network.
The images (img) are numpy arrays with shape (84,84,3) and I have data a with shape (2) and b with shape (1).
Now I create a single sample
sample = np.reshape(np.array([img,a,b]),(3,1)) #shape (3,1)
The placeholder is
input = tf.placeholder(dtype=tf.float32,shape=[None] + list(sample.shape))
Now when TF reads a batch of samples I would like to retrieve the batch of images, the batch of a, and the batch of b, because they need to be input in different locations in the Neural Network.
Here is a minimal example:
import tensorflow as tf
from tensorflow.contrib import layers
import numpy as np
#Numpy
img = np.random.rand(84,84,3)
a = np.random.rand(2)
b = np.random.rand(1)
sample = np.reshape(np.array([img,a,b]),(3,1)) #shape (3,1)
batch = np.repeat(np.expand_dims(sample,axis=0),32,axis=0) #shape (32,3,1)
#TF
input = tf.placeholder(dtype=tf.float32,shape=[None] + list(sample.shape))
#TODO:
tf_img = tf.#get image batch from input
tf_a = tf.#get a batch from input
tf_b = tf.#get b batch from input
out = layers.convolution2d(tf_img,num_outputs=64,kernel_size=8,stride=2,activation_fn=tf.nn.relu)
out = layers.flatten(out)
out = tf.concat([out,tf_a,tf_b])
out = layers.fully_connected(out,10,activation_fn=tf.nn.relu)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
_ = sess.run(out,feed_dict={input:batch})
How can I extract the individual parts of the input from a tensor with shape (?,3,1), use the image data to create an embedding and concatenate the other two parts to that output enbedding.
Is there a better way to input the data? My only constraint is that it has to be a single placeholder.
Here's a complete example for my comment above:
import numpy as np
import tensorflow as tf
im_height = 84
im_width = 84
im_channels = 3
a_len = 2
b_len = 1
np_img = np.random.rand(im_height, im_width, im_channels)
np_a = np.random.rand(a_len)
np_b = np.random.rand(b_len)
# flatten the input and concatenate to a single 1D numpy array
np_sample = np.concatenate((np_img.reshape(-1), np_a.reshape(-1), np_b.reshape(-1)), axis=0)
# construct a pseudo batch
np_batch = np.repeat(np_sample[np.newaxis, :], 32, axis=0)
tf_batch = tf.placeholder(shape=(None, im_height*im_width*im_channels + a_len + b_len), dtype=tf.float32)
img_stop = im_height*im_width*im_channels
a_stop = img_stop+a_len
# you could also use tf.slice(...) here
tf_img = tf.reshape(tf_batch[:, 0:img_stop], (-1, im_height, im_width, im_channels))
tf_a = tf.reshape(tf_batch[:, img_stop:a_stop], (-1, a_len))
tf_b = tf.reshape(tf_batch[:, a_stop:], (-1, b_len))
with tf.Session() as sess:
fetch_dict = {'img': tf_img, 'a': tf_a, 'b': tf_b}
feed_dict = {tf_batch: np_batch}
res = sess.run(fetch_dict, feed_dict=feed_dict)
assert(np.isclose(res['img'][0, ...], np_img).all())
assert(np.isclose(res['a'][0, :], np_a).all())
assert(np.isclose(res['b'][0, :], np_b).all())
However, this is at least as invasive as adding appropriate placeholders to the code. Additionally, it's much less readable, in my opinion.
I am using a recurrent neural network in tensorflow with BasicLSTMCells. Basically, I have an input sequence of word ids, I convert each id to word embeddings, pass the word embeddings one at a time through the rnn, and then make a prediction for a single word after reading the whole sequence. My embedding matrix is of dimension V x H where V is the size of my vocabulary and H is the number of hidden units in my rnn. In order to make a prediction for the next word, I multiply my hidden vector by a weight matrix of size H x V and then compute a softmax. With the setup I described, everything seems to work as expected. I'm able to train on some examples and make reasonable predictions.
However, I've noticed that if I try to use the transpose of the embedding matrix, which will be a matrix of size H x V, instead of a separate matrix for the softmax layer, tensorflow raises a value error claiming that the dimensions of something it's not specifying don't have the same rank. I've verified that the dimensions of my embedding matrix (well, its transpose) are the same as those of the separate softmax matrix I'm creating. Changing just the one line of code from using my embedding matrix vs a separate softmax weight matrix causes the error.
I created a relatively small program to demonstrate what I'm trying to do and to show what causes the error. I was not able to make the error occur on a smaller network when I tried with just a single hidden layer network.
import sys
import time
import tensorflow as tf
from tensorflow.models.rnn import rnn
from tensorflow.models.rnn import rnn_cell
from tensorflow.models.rnn.rnn_cell import BasicLSTMCell
import numpy as np
INPUT_LENGTH = 17
BATCH_SIZE = 20
VOCAB_SIZE = 11
NUM_EPOCHS = 1000
HIDDEN_UNITS = 100
class Model(object):
def __init__(self, is_training):
initializer = tf.random_uniform_initializer(-1.0, 1.0)
self._target = tf.placeholder(tf.float32, [BATCH_SIZE, VOCAB_SIZE])
self._input_data=tf.placeholder(tf.int32,[BATCH_SIZE, INPUT_LENGTH])
self.embedding = tf.get_variable("embedding",
[VOCAB_SIZE, HIDDEN_UNITS],
initializer=initializer)
self.inputs = tf.split(1, INPUT_LENGTH,
tf.nn.embedding_lookup(self.embedding, self._input_data))
self.inputs2 = [tf.squeeze(input_, [1]) for input_ in self.inputs]
cell = rnn_cell.BasicLSTMCell(num_units=HIDDEN_UNITS)
initial_state = cell.zero_state(BATCH_SIZE, tf.float32)
outputs, states = rnn.rnn(cell, self.inputs2,
initial_state=initial_state)
self._outputs = outputs[-1]
self.soft_w = tf.get_variable("softmax_w",
[HIDDEN_UNITS, VOCAB_SIZE],
initializer=initializer)
prod = tf.matmul(self._outputs, self.soft_w)
#uncommenting out the following line causes the error
# prod = tf.matmul(self._outputs, self.embedding, False, True)
soft_b = tf.get_variable("softmax_b", [VOCAB_SIZE],
initializer=initializer)
self._logits = tf.nn.bias_add(prod,soft_b)
self._loss = tf.nn.softmax_cross_entropy_with_logits(self._logits,
self._target)
if not is_training:
return
learning_rate = .010001
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
self._train_op = optimizer.minimize(self._loss)
def train(self, sess, inputs, targets):
t = np.zeros((BATCH_SIZE, VOCAB_SIZE))
for i, target in enumerate(targets):
t[i,target] = 1.0
inputs = np.array(inputs)
inputs = inputs.reshape(BATCH_SIZE,INPUT_LENGTH)
fd = {self._target:t,
self._input_data:inputs}
o = sess.run([self._train_op, self._loss, self._outputs, self.embedding, self.soft_w], feed_dict = fd)
print o[2].shape
print o[3].shape
print o[4].shape
sys.exit()
return np.mean(o[1])
#this just generates dummy data
def read_data_rows(count):
ret = []
for i in range(count):
inputs = [4] * INPUT_LENGTH
output = 1
ret.append((inputs, output))
return ret
def main():
start = time.time()
tf.set_random_seed(1)
print "creating model",time.time()-start
m = Model(is_training=True)
with tf.Session() as sess:
print "initializing variables", time.time()-start
tf.initialize_all_variables().run()
for epoch in range(NUM_EPOCHS):
train_rows = read_data_rows(100)
for row_num in range(0, len(train_rows), BATCH_SIZE):
qs = []
ans = []
batch = train_rows[row_num:row_num+BATCH_SIZE]
for b in batch:
qs.append(b[0])
ans.append(b[1])
m.train(sess, qs, ans)
if __name__ == "__main__":
main()
The error I see is ValueError: Shapes TensorShape([Dimension(100)]) and TensorShape([Dimension(17), Dimension(100)]) must have the same rank
when uncommenting the line I mentioned above. What is the cause of the error I'm seeing? Why is the embedding matrix not treated the same way as the matrix self.soft_w?
The 0.6.0 (and earlier) release of TensorFlow had a bug in the implementation of gradients for tf.nn.embedding_lookup() and tf.gather() when the indices argument (self._input_data in your code) had more than one dimension.
Upgrading to the latest source release should fix this error. Otherwise, this commit has the relevant change (to array_grad.py) that will enable your program to work.