Converting Tensor to a SparseTensor for ctc_loss [duplicate] - python

This question already has answers here:
Sparse Tensor (matrix) from a dense Tensor Tensorflow
(5 answers)
Closed 5 years ago.
Is there a way to convert a dense tensor into a sparse tensor? Apparently, Tensorflow's Estimator.fit doesn't accept SparseTensors as labels. One reason I would like to pass SparseTensors into Tensorflow's Estimator.fit is to be able to use tensorflow ctc_loss. Here's the code:
import dataset_utils
import tensorflow as tf
import numpy as np
from tensorflow.contrib import grid_rnn, learn, layers, framework
def grid_rnn_fn(features, labels, mode):
input_layer = tf.reshape(features["x"], [-1, 48, 1596])
indices = tf.where(tf.not_equal(labels, tf.constant(0, dtype=tf.int32)))
values = tf.gather_nd(labels, indices)
sparse_labels = tf.SparseTensor(indices, values, dense_shape=tf.shape(labels, out_type=tf.int64))
cell_fw = grid_rnn.Grid2LSTMCell(num_units=128)
cell_bw = grid_rnn.Grid2LSTMCell(num_units=128)
bidirectional_grid_rnn = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, input_layer, dtype=tf.float32)
outputs = tf.reshape(bidirectional_grid_rnn[0], [-1, 256])
W = tf.Variable(tf.truncated_normal([256,
80],
stddev=0.1, dtype=tf.float32), name='W')
b = tf.Variable(tf.constant(0., dtype=tf.float32, shape=[80], name='b'))
logits = tf.matmul(outputs, W) + b
logits = tf.reshape(logits, [tf.shape(input_layer)[0], -1, 80])
logits = tf.transpose(logits, (1, 0, 2))
loss = None
train_op = None
if mode != learn.ModeKeys.INFER:
#Error occurs here
loss = tf.nn.ctc_loss(inputs=logits, labels=sparse_labels, sequence_length=320)
... # returning ModelFnOps
def main(_):
image_paths, labels = dataset_utils.read_dataset_list('../test/dummy_labels_file.txt')
data_dir = "../test/dummy_data/"
images = dataset_utils.read_images(data_dir=data_dir, image_paths=image_paths, image_extension='png')
print('Done reading images')
images = dataset_utils.resize(images, (1596, 48))
images = dataset_utils.transpose(images)
labels = dataset_utils.encode(labels)
x_train, x_test, y_train, y_test = dataset_utils.split(features=images, test_size=0.5, labels=labels)
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": np.array(x_train)},
y=np.array(y_train),
num_epochs=1,
shuffle=True,
batch_size=1
)
classifier = learn.Estimator(model_fn=grid_rnn_fn, model_dir="/tmp/grid_rnn_ocr_model")
classifier.fit(input_fn=train_input_fn)
UPDATE:
It turns out, this solution from here converts the dense tensor into a sparse one:
indices = tf.where(tf.not_equal(labels, tf.constant(0, dtype=tf.int32)))
values = tf.gather_nd(labels, indices)
sparse_labels = tf.SparseTensor(indices, values, dense_shape=tf.shape(labels, out_type=tf.int64))
However, I encounter this error now raised by ctc_loss:
ValueError: Shape must be rank 1 but is rank 0 for 'CTCLoss' (op: 'CTCLoss') with input shapes: [?,?,80], [?,2], [?], [].
I have this code that converts dense labels to sparse:
def convert_to_sparse(labels, dtype=np.int32):
indices = []
values = []
for n, seq in enumerate(labels):
indices.extend(zip([n] * len(seq), range(len(seq))))
values.extend(seq)
indices = np.asarray(indices, dtype=dtype)
values = np.asarray(values, dtype=dtype)
shape = np.asarray([len(labels), np.asarray(indices).max(0)[1] + 1], dtype=dtype)
return indices, values, shape
I converted y_train to sparse labels, and place the values inside a SparseTensor:
sparse_y_train = convert_to_sparse(y_train)
print(tf.SparseTensor(
indices=sparse_y_train[0],
values=sparse_y_train[1],
dense_shape=sparse_y_train
))
And compared it to the SparseTensor created inside the grid_rnn_fn:
indices = tf.where(tf.not_equal(labels, tf.constant(0, dtype=tf.int32)))
values = tf.gather_nd(labels, indices)
sparse_labels = tf.SparseTensor(indices, values, dense_shape=tf.shape(labels, out_type=tf.int64))
Here's what I got:
For sparse_y_train:
SparseTensor(indices=Tensor("SparseTensor/indices:0", shape=(33, 2), dtype=int64), values=Tensor("SparseTensor/values:0", shape=(33,), dtype=int32), dense_shape=Tensor("SparseTensor/dense_shape:0", shape=(2,), dtype=int64))
For sparse_labels:
SparseTensor(indices=Tensor("Where:0", shape=(?, 2), dtype=int64), values=Tensor("GatherNd:0", shape=(?,), dtype=int32), dense_shape=Tensor("Shape:0", shape=(2,), dtype=int64))
Which leads me to think that ctc_loss can't seem to handle SparseTensors as labels with dynamic shapes.

Yes. It is possible to convert a tensor to a sparse tensor and back:
Let sparse be a sparse tensor and dense be a dense tensor.
From sparse to dense:
dense = tf.sparse_to_dense(sparse.indices, sparse.shape, sparse.values)
From dense to sparse:
zero = tf.constant(0, dtype=tf.float32)
where = tf.not_equal(dense, zero)
indices = tf.where(where)
values = tf.gather_nd(dense, indices)
sparse = tf.SparseTensor(indices, values, dense.shape)

Related

Broadcast SparseTensor in tensorflow

I want to elementwise multiply a dense tensor with shape [n, n, k] with a sparse tensor that has the shape [n, n, 1]. I want the values from the sparse tensor to repeat along the axis with the size s, like it would do if I used a dense tensor instead and relied on implicit broadcasting.
However the SparseTensor.__mul__ operation does not support broadcasting the sparse operand. I didn't find an operator to explicitly broadcast the sparse Tensor. How could I achieve this?
If you do not want to just convert the sparse tensor to dense, you can extract select the right values from the dense tensor to build a sparse result directly, something like this:
import tensorflow as tf
import numpy as np
with tf.Graph().as_default(), tf.Session() as sess:
# Input data
x = tf.placeholder(tf.float32, shape=[None, None, None])
y = tf.sparse.placeholder(tf.float32, shape=[None, None, 1])
# Indices of sparse tensor without third index coordinate
indices2 = y.indices[:, :-1]
# Values of dense tensor corresponding to sparse tensor values
x_sp = tf.gather_nd(x, indices2)
# Values of the resulting sparse tensor
res_vals = tf.reshape(x_sp * tf.expand_dims(y.values, 1), [-1])
# Shape of the resulting sparse tensor
res_shape = tf.shape(x, out_type=tf.int64)
# Make sparse tensor indices
k = res_shape[2]
v = tf.size(y.values)
# Add third coordinate to existing sparse tensor coordinates
idx1 = tf.tile(tf.expand_dims(indices2, 1), [1, k, 1])
idx2 = tf.tile(tf.range(k), [v])
res_idx = tf.concat([tf.reshape(idx1, [-1, 2]), tf.expand_dims(idx2, 1)], axis=1)
# Make sparse result
res = tf.SparseTensor(res_idx, res_vals, res_shape)
# Dense value for testing
res_dense = tf.sparse.to_dense(res)
# Dense operation for testing
res_dense2 = x * tf.sparse.to_dense(y)
# Test
x_val = np.arange(48).reshape(4, 4, 3)
y_val = tf.SparseTensorValue([[0, 0, 0], [2, 3, 0], [3, 1, 0]], [1, 2, 3], [4, 4, 1])
res_dense_val, res_dense2_val = sess.run((res_dense, res_dense2),
feed_dict={x: x_val, y: y_val})
print(np.allclose(res_dense_val, res_dense2_val))
# True

tf.reshape is not working in the cases where you are adding an extra dimension

According to the tensorflow website, tf.reshape takes a tensor of a certain shape and maps it to a tensor of another shape. I want to map a tensor of size [600, 64] to a tensor of size [-1, 8, 8, 1] (in which the dimension at the -1 position is 600). This doesn't seem to be working though.
I am running this on tensorflow on python 3.6 and although it reshapes to something like [-1, 8, 8], it doesn't reshape to [-1, 8, 8, 1]
import tensorflow as tf
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import LabelBinarizer
# preprocessing method needed
def flatten(array):
temp = []
for j in array:
temp.extend(j)
return temp
# preprocess the data
digits = datasets.load_digits()
images = digits.images
images = [flatten(i) for i in images]
labels = digits.target
labels = LabelBinarizer().fit_transform(labels)
# the stats needed
width = 8
height = 8
alpha = 0.1
num_labels = 10
kernel_length = 3
batch_size = 10
channels = 1
# the tensorflow placeholders and reshaping
X = tf.placeholder(tf.float32, shape = [None, width * height * channels])
# AND NOW HERE IS WHERE THE ERROR STARTS
y_true = tf.placeholder(tf.float32, shape = [None, num_labels])
X = tf.reshape(X, [-1, 8, 8, 1])
# the convolutional model
conv1 = tf.layers.conv2d(X, filters = 32, kernel_size = [kernel_length, kernel_length])
conv2 = tf.layers.conv2d(conv1, filters = 64, kernel_size = [2, 2])
flatten = tf.reshape(X, [-1, 1])
dense1 = tf.layers.dense(flatten, units=50, activation = tf.nn.relu)
y_pred = tf.layers.dense(dense1, units=num_labels, activation = tf.nn.softmax)
# the loss and training functions
loss = tf.losses.mean_squared_error(labels=y_true, predictions=y_pred)
train = tf.train.GradientDescentOptimizer(alpha).minimize(loss)
# initializing the variables and the tf.session
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# running the session
for i in range(batch_size):
_, lossVal = sess.run((train, loss), feed_dict = {X:images[:600], y_true: labels[:600]})
print(lossVal)
I keep on getting this error:
ValueError: Cannot feed value of shape (600, 64) for Tensor 'Reshape:0', which has shape '(?, 8, 8, 1)'
And I feel like that should not be the case since 8 * 8 * 1 does equal 64.
images[:600]'s shape is (600, 64), which does not correspond to the placeholder expected shape, (None, 8, 8, 1).
Either reshape your data or change the shape of the placeholder.
Note that the fact that you originally defined the placeholder shape to be (None, 64) is inconsequential as you reshape it a few lines later.

Tensorflow RNN output tensor shapes

I'm quite new to tensorflow and I can't quite get how to shape tensors so that I would get the output as a single number. Basically, my recurrent network should guess the next number. Instead, with each prediction it returns me a list with five numbers? I guess either one or more of my tensors are misshaped.
My input data is formatted to be around 2000 lists with 5 features each like this:
[
np.array ([
[1],[2],[3],[4],[5]
])
]
This is the code for the RNN:
cell_units = 400
batch_size = 5
no_of_epochs = 500
data = tf.placeholder (tf.float32, [None, 5, 1])
target = tf.placeholder (tf.float32, [None, 1, 1])
weight = tf.Variable (tf.random_normal ([cell_units, 5, 1]))
bias = tf.Variable (tf.random_normal([1, 1]))
cell = tf.contrib.rnn.BasicRNNCell (num_units = cell_units)
output, states = tf.nn.dynamic_rnn (cell, data, dtype=tf.float32)
output = tf.transpose (output, [1, 0, 2])
activation = tf.matmul (output, weight) + bias
cost = tf.reduce_mean (
(
tf.log (tf.square (activation - target))
)
)
optimizer = tf.train.AdamOptimizer (learning_rate = 0.01).minimize(cost)
with tf.Session () as sess:
sess.run (tf.global_variables_initializer ())
no_of_batches = int (len (train_x) / batch_size)
for i in range(no_of_epochs):
start = 0
for j in range(no_of_batches):
inp = train_x [start:start+batch_size]
out = train_y [start:start+batch_size]
start += batch_size
sess.run (optimizer, {data: inp, target: out})
tf.nn.dynamic_rnn expects inputs of shape [batch_size, max_time, ...]. In your example batch_size is dynamic (i.e., unknown) and max_time is 5 (i.e., number of time steps.). Naturally RNN's output contains 5 entries, one per input step: [None, 5, cell_units].
As #Ishant Mrinal suggested you can select the last output step.
weight = tf.Variable (tf.random_normal ([cell_units, 1]))
bias = tf.Variable (tf.random_normal([1, 1]))
cell = tf.contrib.rnn.BasicRNNCell (num_units = cell_units)
output, states = tf.nn.dynamic_rnn (cell, data, dtype=tf.float32)
# Get the last step (4th index).
output = tf.squeeze(tf.transpose (output, [0, 2, 1])[:,:,4]) # Shape of [batch_size, cell_units].
activation = tf.matmul (output, weight) + bias
activation has shape of [batch_size, 1].

Data Structure Discrepancy in Tensorflow/TFLearn

I have two datasets, which is like:
input:
array([[[ 0.99309823],
...
[ 0. ]]])
shape : (1, 2501)
output:
array([[0, 0, 0, ..., 0, 0, 1],
...,
[0, 0, 0, ..., 0, 0, 0]])
shape : (2501, 9)
And I processed it with TFLearn; as
input_layer = tflearn.input_data(shape=[None,2501])
hidden1 = tflearn.fully_connected(input_layer,1205,activation='ReLU', regularizer='L2', weight_decay=0.001)
dropout1 = tflearn.dropout(hidden1,0.8)
hidden2 = tflearn.fully_connected(dropout1,1205,activation='ReLU', regularizer='L2', weight_decay=0.001)
dropout2 = tflearn.dropout(hidden2,0.8)
softmax = tflearn.fully_connected(dropout2,9,activation='softmax')
# Regression with SGD
sgd = tflearn.SGD(learning_rate=0.1,lr_decay=0.96, decay_step=1000)
top_k=tflearn.metrics.Top_k(3)
net = tflearn.regression(softmax,optimizer=sgd,metric=top_k,loss='categorical_crossentropy')
model = tflearn.DNN(net)
model.fit(input,output,n_epoch=10,show_metric=True, run_id='dense_model')
It works but not the way that I want. It's a DNN model. I want that when I enter 0.95, model must give me corresponding prediction for example [0,0,0,0,0,0,0,0,1]. However, when I want to enter 0.95, it says that,
ValueError: Cannot feed value of shape (1,) for Tensor 'InputData/X:0', which has shape '(?, 2501)'
When I tried to understand I realise that I need (1,2501) shaped data to predict for my wrong based model.
What i want is for every element in input, predict corresponding element in output. As you can see, in the instance dataset,
for [0.99309823], corresponding output is [0,0,0,0,0,0,0,0,1]. I want tflearn to train itself like this.
I may have wrong structured data, or model(probably dataset), I explained all the things, I need help I'm really out of my mind.
Your input data should be Nx1 (N = number of samples) dimensional to archive this transformation ([0.99309823] --> [0,0,0,0,0,0,0,0,1] ). According to your input data shape, it looks more likely including 1 sample with 2501 dimensions.
ValueError: Cannot feed value of shape (1,) for Tensor 'InputData/X:0', which has shape '(?, 2501)' This error means that tensorflow expecting you to provide a vector with shape (,2501), but you are feeding the network with a vector with shape (1,).
Example modified code with dummy data:
import numpy as np
import tflearn
#creating dummy data
input_data = np.random.rand(1, 2501)
input_data = np.transpose(input_data) # now shape is (2501,1)
output_data = np.random.randint(8, size=2501)
n_values = 9
output_data = np.eye(n_values)[output_data]
# checking the shapes
print input_data.shape #(2501,1)
print output_data.shape #(2501,9)
input_layer = tflearn.input_data(shape=[None,1]) # now network is expecting ( Nx1 )
hidden1 = tflearn.fully_connected(input_layer,1205,activation='ReLU', regularizer='L2', weight_decay=0.001)
dropout1 = tflearn.dropout(hidden1,0.8)
hidden2 = tflearn.fully_connected(dropout1,1205,activation='ReLU', regularizer='L2', weight_decay=0.001)
dropout2 = tflearn.dropout(hidden2,0.8)
softmax = tflearn.fully_connected(dropout2,9,activation='softmax')
# Regression with SGD
sgd = tflearn.SGD(learning_rate=0.1,lr_decay=0.96, decay_step=1000)
top_k=tflearn.metrics.Top_k(3)
net = tflearn.regression(softmax,optimizer=sgd,metric=top_k,loss='categorical_crossentropy')
model = tflearn.DNN(net)
model.fit(input_data, output_data, n_epoch=10,show_metric=True, run_id='dense_model')
Also my friend warned me about same thing as rcmalli. He says
reshape:
input = tf.reshape(input, (2501,1))
change
input_layer = tflearn.input_data(shape=[None,2501])
to
input_layer = tflearn.input_data(shape=[None, 1])
Variable dimension must be "None". In your wrong case, 2501 is the magnitude(or something else, I translated from another lang., but you got it) of your dataset. 1 is constant input magnitude.

How to multiply list of tensors by single tensor on TensorFlow?

I am implementing an RNN and contrarily to the examples I have found which minimize only the cost for the output in the last step
x = tf.placeholder ("float", [features_dimension, None, n_timesteps])
y = tf.placeholder ("float", [labels_dimension, None, n_timesteps])
# Define weights
weights = {'out': tf.Variable (tf.random_normal ([N_HIDDEN, labels_dimension]))}
biases = {'out': tf.Variable (tf.random_normal ([labels_dimension]))}
def RNN (x, weights, biases):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (features_dimension, BATCH_SIZE, n_timesteps)
# Required shape: `n_timesteps` tensors list of shape (BATCH_SIZE, features_dimension)
# We make a division of the data to split it in individual vectors that
# will be fed for each timestep
# Permuting features_dimension and n_timesteps
# Shape will be (n_timesteps, BATCH_SIZE, features_dimension)
x = tf.transpose (x, [2, 1, 0])
# Reshaping to (BATCH_SIZE*n_timesteps, features_dimension) (we are removing the depth dimension with this)
x = tf.reshape(x, [BATCH_SIZE*n_timesteps, features_dimension])
# Split the previous 2D tensor to get a list of `n_timesteps` tensors of
# shape (batch_size, features_dimension).
x = tf.split (x, n_timesteps, 0)
# Define a lstm cell with tensorflow
lstm_cell = rnn.BasicLSTMCell (N_HIDDEN, forget_bias=1.0)
# Get lstm cell output
outputs, states = rnn.static_rnn (lstm_cell, x, dtype=tf.float32)
# Linear activation; outputs contains the array of outputs for all the
# timesteps
pred = tf.matmul (outputs, weights['out']) + biases['out']
However, the object outputs is a list of Tensor with n_timesteps elements, so the pred = tf.matmul (outputs, weights['out']) + biases['out'] throws the error
ValueError: Shape must be rank 2 but is rank 3 for 'MatMul' (op:
'MatMul') with input shapes: [100,128,16], [16,1].
. How can I do this multiplication?
The solution is to tf.stack the list of tensors into a 3d tensor and then use tf.map_fn to apply the multiplication operation on each 2d tensor along dimension 0:
# Transform the list into a 3D tensor with dimensions (n_timesteps, batch_size, N_HIDDEN)
outputs = tf.stack(outputs)
def pred_fn(current_output):
return tf.matmul(current_output, weights['out']) + biases['out']
# Use tf.map_fn to apply pred_fn to each tensor in outputs, along dimension 0 (timestep dimension)
pred = tf.map_fn(pred_fn, outputs)

Categories

Resources