Keras ctc_decode shape must be rank 1 but is rank 2 - python

I am implementing an OCR with Keras, Tensorflow backend.
I want to use keras.backend.ctc_decode implementation.
I have a model class :
import keras
def ctc_lambda_func(args):
y_pred, y_true, input_x_width, input_y_width = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
# y_pred = y_pred[:, 2:, :]
return keras.backend.ctc_batch_cost(y_true, y_pred, input_x_width, input_y_width)
class ModelOcropy(keras.Model):
def __init__(self, alphabet: str):
self.img_height = 48
self.lstm_size = 100
self.alphabet_size = len(alphabet)
# check backend input shape (channel first/last)
if keras.backend.image_data_format() == "channels_first":
input_shape = (1, None, self.img_height)
else:
input_shape = (None, self.img_height, 1)
# data input
input_x = keras.layers.Input(input_shape, name='x')
# training inputs
input_y = keras.layers.Input((None,), name='y')
input_x_widths = keras.layers.Input([1], name='x_widths')
input_y_widths = keras.layers.Input([1], name='y_widths')
# network
flattened_input_x = keras.layers.Reshape((-1, self.img_height))(input_x)
bidirectional_lstm = keras.layers.Bidirectional(
keras.layers.LSTM(self.lstm_size, return_sequences=True, name='lstm'),
name='bidirectional_lstm'
)(flattened_input_x)
dense = keras.layers.Dense(self.alphabet_size, activation='relu')(bidirectional_lstm)
y_pred = keras.layers.Softmax(name='y_pred')(dense)
# ctc loss
ctc = keras.layers.Lambda(ctc_lambda_func, output_shape=[1], name='ctc')(
[dense, input_y, input_x_widths, input_y_widths]
)
# init keras model
super().__init__(inputs=[input_x, input_x_widths, input_y, input_y_widths], outputs=[y_pred, ctc])
# ctc decoder
top_k_decoded, _ = keras.backend.ctc_decode(y_pred, input_x_widths)
self.decoder = keras.backend.function([input_x, input_x_widths], [top_k_decoded[0]])
# decoded_sequences = self.decoder([test_input_data, test_input_lengths])
My use of ctc_decode comes from another post : Keras using Lambda layers error with K.ctc_decode
I get an error :
ValueError: Shape must be rank 1 but is rank 2 for 'CTCGreedyDecoder' (op: 'CTCGreedyDecoder') with input shapes: [?,?,7], [?,1].
I guess I have to squeeze my input_x_widths, but Keras does not seem to have such function (it always outputs something like (batch_size, 1))

Indeed, the function is expecting a 1D tensor, and you've got a 2D tensor.
Keras does have the keras.backend.squeeze(x, axis=-1) function.
And you can also use keras.backend.reshape(x, (-1,))
If you need to go back to the old shape after the operation, you can both:
keras.backend.expand_dims(x)
keras.backend.reshape(x,(-1,1))

Complete fix :
# ctc decoder
flattened_input_x_width = keras.backend.reshape(input_x_widths, (-1,))
top_k_decoded, _ = keras.backend.ctc_decode(y_pred, flattened_input_x_width)
self.decoder = keras.backend.function([input_x, flattened_input_x_width], [top_k_decoded[0]])
# decoded_sequences = self.decoder([input_x, flattened_input_x_width])

Related

How to save model properly to continue the training of VAE in keras

I have built the VAE in keras using functional API. The VAE has 3 models:
encoder
def _create_encoder(self):
# create convolutional layers for encoder
X = self.images
for i in range(len(self.encoder_filters)):
X = self._create_conv_layer(X,
"Conv2D",
self.encoder_filters[i],
self.encoder_kernel_size[i],
self.encoder_strides[i],
self.encoder_padding[i],
"encoder_conv"+str(i)
)
# keep track of tensor shape before flattening (we will need this to build decoder)
encoder_shape_before_flattening = K.int_shape(X)[1:]
# flatten the tensor
X = Flatten()(X)
# create dense layers for mu and sigma
self.encoder_mu = Dense(units=self.latent_space_size, name='encoder_mu')(X)
self.encoder_log_var = Dense(units=self.latent_space_size, name='encoder_log_var')(X)
self.encoder_parameters = Model(self.images, (self.encoder_mu, self.encoder_log_var))
# create encoder output by sampling from normal distribution
self.encoder_output = Lambda(self.sample_latent_space,name="encoder_output")([self.encoder_mu,self.encoder_log_var])
self.encoder = Model(inputs=self.images, outputs=self.encoder_output)
return encoder_shape_before_flattening
decoder
def _create_decoder(self, encoder_shape_before_flattening):
X = Dense(np.prod(encoder_shape_before_flattening))(self.decoder_input)
X = Reshape(encoder_shape_before_flattening)(X)
# create convolutional layers for decoder
for i in range(len(self.decoder_filters)):
is_not_last_layer = i < len(self.decoder_filters)-1
X = self._create_conv_layer(X,
"Conv2DTranspose",
self.decoder_filters[i],
self.decoder_kernel_size[i],
self.decoder_strides[i],
self.decoder_padding[i],
"decoder_conv"+str(i),
batch_norm=is_not_last_layer,
dropout=is_not_last_layer,
activation=is_not_last_layer
)
# output values should be between 0 and 1
self.decoder_output = Activation("sigmoid")(X)
self.decoder = Model(inputs=self.decoder_input, outputs=self.decoder_output)
the whole model
def _create_model(self):
self.images = Input(shape=self.input_dims, name="images")
# create encoder as separate model
encoder_shape_before_flattening = self._create_encoder()
# create decoder as separate model
self.decoder_input = Input(shape=(self.latent_space_size,), name="decoder_input")
self._create_decoder(encoder_shape_before_flattening)
# create unique model
self.model = Model(inputs=self.images, outputs=self.decoder(self.encoder_output))
I am using ModelCheckpoint callback to save the whole model after every epoch.
checkpoint_model = ModelCheckpoint(os.path.join(save_path, "model.h5"), verbose=1)
But when I load the model with load_model
def load_trained_model(self, load_path, r_loss_factor):
self.model = load_model(os.path.join(load_path, "model.h5"), custom_objects={"loss": self.penalized_loss(r_loss_factor),"sample_latent_space":self.sample_latent_space})
and call fit_generator again to continue the training I get the following error:
InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: You must feed a value for placeholder tensor 'images' with dtype float and shape [?,128,128,3]
[[{{node images}}]]
[[metrics_1/loss_1/Identity/_1971]]
(1) Invalid argument: You must feed a value for placeholder tensor 'images' with dtype float and shape [?,128,128,3]
[[{{node images}}]]
The code can be found here

Keras custom layer to Conv2D input channels error, ValueError: number of input channels does not match corresponding dimension of filter, 50 != 3200

I am trying to create a model with Normalized cross correlation custom layer, code taken from here
from keras import backend as K
from keras.layers import Conv2D, MaxPooling2D, Dense, Input, Flatten
from keras.models import Model, Sequential
from keras.engine import InputSpec, Layer
from keras import regularizers
from keras.optimizers import SGD, Adam
from keras.utils.conv_utils import conv_output_length
from keras import activations
import numpy as np
class Normalized_Correlation_Layer(Layer):
# create a class inherited from keras.engine.Layer.
def __init__(self, patch_size=(5, 5),
dim_ordering='tf',
border_mode='same',
stride=(1, 1),
activation=None,
**kwargs):
if border_mode != 'same':
raise ValueError('Invalid border mode for Correlation Layer '
'(only "same" is supported as of now):', border_mode)
self.kernel_size = patch_size
self.subsample = stride
self.dim_ordering = dim_ordering
self.border_mode = border_mode
self.activation = activations.get(activation)
super(Normalized_Correlation_Layer, self).__init__(**kwargs)
def compute_output_shape(self, input_shape):
return(input_shape[0][0], input_shape[0][1], input_shape[0][2], self.kernel_size[0] * input_shape[0][2]*input_shape[0][-1])
def get_config(self):
config = {'patch_size': self.kernel_size,
'activation': self.activation.__name__,
'border_mode': self.border_mode,
'stride': self.subsample,
'dim_ordering': self.dim_ordering}
base_config = super(Correlation_Layer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, x, mask=None):
input_1, input_2 = x
stride_row, stride_col = self.subsample
inp_shape = input_1._keras_shape
output_shape = self.compute_output_shape([inp_shape, inp_shape])
padding_row = (int(self.kernel_size[0] / 2),int(self.kernel_size[0] / 2))
padding_col = (int(self.kernel_size[1] / 2),int(self.kernel_size[1] / 2))
input_1 = K.spatial_2d_padding(input_1, padding =(padding_row,padding_col))
input_2 = K.spatial_2d_padding(input_2, padding = ((padding_row[0]*2, padding_row[1]*2),padding_col))
output_row = output_shape[1]
output_col = output_shape[2]
output = []
for k in range(inp_shape[-1]):
xc_1 = []
xc_2 = []
# print("here")
for i in range(padding_row[0]):
for j in range(output_col):
xc_2.append(K.reshape(input_2[:, i:i+self.kernel_size[0], j:j+self.kernel_size[1], k],
(-1, 1,self.kernel_size[0]*self.kernel_size[1])))
for i in range(output_row):
slice_row = slice(i, i + self.kernel_size[0])
slice_row2 = slice(i + padding_row[0], i +self.kernel_size[0] + padding_row[0])
# print("dfg")
for j in range(output_col):
slice_col = slice(j, j + self.kernel_size[1])
xc_2.append(K.reshape(input_2[:, slice_row2, slice_col, k],
(-1, 1,self.kernel_size[0]*self.kernel_size[1])))
xc_1.append(K.reshape(input_1[:, slice_row, slice_col, k],
(-1, 1,self.kernel_size[0]*self.kernel_size[1])))
for i in range(output_row, output_row+padding_row[1]):
for j in range(output_col):
xc_2.append(K.reshape(input_2[:, i:i+ self.kernel_size[0], j:j+self.kernel_size[1], k],
(-1, 1,self.kernel_size[0]*self.kernel_size[1])))
xc_1_aggregate = K.concatenate(xc_1, axis=1)
xc_1_mean = K.mean(xc_1_aggregate, axis=-1, keepdims=True)
xc_1_std = K.std(xc_1_aggregate, axis=-1, keepdims=True)
xc_1_aggregate = (xc_1_aggregate - xc_1_mean) / xc_1_std
xc_2_aggregate = K.concatenate(xc_2, axis=1)
xc_2_mean = K.mean(xc_2_aggregate, axis=-1, keepdims=True)
xc_2_std = K.std(xc_2_aggregate, axis=-1, keepdims=True)
xc_2_aggregate = (xc_2_aggregate - xc_2_mean) / xc_2_std
xc_1_aggregate = K.permute_dimensions(xc_1_aggregate, (0, 2, 1))
block = []
len_xc_1= len(xc_1)
print("asdf")
for i in range(len_xc_1):
#This for loop is to compute the product of a given patch of feature map 1 and the feature maps on which it is supposed to
sl1 = slice(int(i/inp_shape[2])*inp_shape[2],
int(i/inp_shape[2])*inp_shape[2]+inp_shape[2]*self.kernel_size[0])
#This calculates which are the patches of feature map 2 to be considered for a given patch of first feature map.
block.append(K.reshape(K.batch_dot(xc_2_aggregate[:,sl1,:],
xc_1_aggregate[:,:,i]),(-1,1,1,inp_shape[2] *self.kernel_size[0])))
block = K.concatenate(block, axis=1)
# print("zxcv")
block= K.reshape(block,(-1,output_row,output_col,inp_shape[2] *self.kernel_size[0]))
output.append(block)
output = self.activation(output)
print(output)
return output
My model is a combination of cross correlation and Conv2D layers,
dt = 'float32'
def create_model():
ip = keras.layers.Input((50,50, 1))
ncx1_1 = Normalized_Correlation_Layer(patch_size=(1, 1))([ip,ip])
ncn1_1 = keras.layers.Conv2D(64, (1,1), activation = 'relu', dtype=dt)(ip)
ncn2_1 = keras.layers.Conv2D(64, (1,1), activation = 'relu', dtype=dt)(ncx1_1)
ncx2_1 = Normalized_Correlation_Layer(patch_size=(1, 1),dtype=dt)([ncn1_1,ncn2_1])
# ncx2_1 = keras.layers.Reshape((50, 50, 3200))(ncx2_1)
# Problem occurs here
ncn3 = keras.layers.Conv2D(filters=64,kernel_size=(1,1), activation = 'relu', dtype=dt)(ncx2_1)
ncn4 = keras.layers.Conv2D(12, (1,1), activation = 'sigmoid', dtype=dt)(ncn3)
model = keras.models.Model(ip,ncn4)
return model
The model till the last cross correlation layer is successfully created, but I get problem for ncn3 layer
ValueError: number of input channels does not match corresponding dimension of filter, 50 != 3200
The output shape printed from the ncx2_1 layer, while creating it is printed as (?, 50, 50, 50),
when I print ncx2_1.shape and also the outputs returned from call function of layer class ([<tf.Tensor 'normalized__correlation__layer_4/Reshape_10000:0' shape=(?, 50, 50, 50) dtype=float32>]).
But the model summary shows it as (?,50,50,3200) when I create the model till that layer only, ie. model = keras.models.Model(ip,ncx2_1)
When I reshape the layer using ncx2_1 = keras.layers.Reshape((50, 50, 3200))(ncx2_1) , I can create the model successfully, but when I try to fit the data on it, I get :
InvalidArgumentError: Input to reshape is a tensor with 6250000 values, but the requested shape has 400000000
[[node reshape_1/Reshape (defined at /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1781) ]]
[[node loss/mul (defined at /usr/local/lib/python3.6/dist-packages/keras/engine/training.py:865) ]]
Here, my batch size is 50, so for a layer with (B,H,W,C) inputs of (50,50,50,50), the size should be 6250000, butt for (50,50,50,3200), it should be 400000000, which means that the output of cross correlation layer is 50 channels.
I am either interpreting this wrong or I have made a mistake somewhere which I would like to know about.
I am using keras 2.1.2 with tensorflow 1.13.1 (That was the version in which the custom layer was written and I was getting other problems with latest version)
I am also using a custom generator if that is needed info and calling fit using md.fit_generator(train_gen,verbose=1). I can also add any other detail necessary.

ValueError: `decode_predictions` expects a batch of predictions (i.e. a 2D array of shape (samples, 1000)). Found array with shape: (1, 7)

I am using VGG16 with keras for transfer learning (I have 7 classes in my new model) and as such I want to use the build-in decode_predictions method to output the predictions of my model. However, using the following code:
preds = model.predict(img)
decode_predictions(preds, top=3)[0]
I receive the following error message:
ValueError: decode_predictions expects a batch of predictions (i.e. a 2D array of shape (samples, 1000)). Found array with shape: (1, 7)
Now I wonder why it expects 1000 when I only have 7 classes in my retrained model.
A similar question I found here on stackoverflow (Keras: ValueError: decode_predictions expects a batch of predictions
) suggests to include 'inlcude_top=True' upon model definition to solve this problem:
model = VGG16(weights='imagenet', include_top=True)
I have tried this, however it is still not working - giving me the same error as before. Any hint or suggestion on how to solve this issue is highly appreciated.
i suspect you are using some pre-trained model, let's say for instance resnet50 and you are importing decode_predictions like this:
from keras.applications.resnet50 import decode_predictions
decode_predictions transform an array of (num_samples, 1000) probabilities to class name of original imagenet classes.
if you want to transer learning and classify between 7 different classes you need to do it like this:
base_model = resnet50 (weights='imagenet', include_top=False)
# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 7 classes
predictions = Dense(7, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
...
after fitting the model and calculate predictions you have to manually assign the class name to output number without using imported decode_predictions
Overloading of 'decode_predictions' function.Comment out the 1000 classes constraints of original function:
CLASS_INDEX = None
#keras_modules_injection
def test_my_decode_predictions(*args, **kwargs):
return my_decode_predictions(*args, **kwargs)
def my_decode_predictions(preds, top=5, **kwargs):
global CLASS_INDEX
backend, _, _, keras_utils = get_submodules_from_kwargs(kwargs)
# if len(preds.shape) != 2 or preds.shape[1] != 1000:
# raise ValueError('`decode_predictions` expects '
# 'a batch of predictions '
# '(i.e. a 2D array of shape (samples, 1000)). '
# 'Found array with shape: ' + str(preds.shape))
if CLASS_INDEX is None:
fpath = keras_utils.get_file(
'imagenet_class_index.json',
CLASS_INDEX_PATH,
cache_subdir='models',
file_hash='c2c37ea517e94d9795004a39431a14cb')
with open(fpath) as f:
CLASS_INDEX = json.load(f)
results = []
for pred in preds:
top_indices = pred.argsort()[-top:][::-1]
result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices]
result.sort(key=lambda x: x[2], reverse=True)
results.append(result)
return results
print('Predicted: ', test_my_decode_predictions(pred, top=10))

How to use TensorFlow Dataset API in combination with dense layers

I am trying out the Dataset API for my input pipeline shown in the TensorFlow documentation and use almost the same code:
tr_data = Dataset.from_tensor_slices((train_images, train_labels))
tr_data = tr_data.map(input_parser, NUM_CORES, output_buffer_size=2000)
tr_data = tr_data.batch(BATCH_SIZE)
tr_data = tr_data.repeat(EPOCHS)
iterator = dataset.make_one_shot_iterator()
next_example, next_label = iterator.get_next()
# Script throws error here
loss = model_function(next_example, next_label)
with tf.Session(...) as sess:
sess.run(tf.global_variables_initializer())
while True:
try:
train_loss = sess.run(loss)
except tf.errors.OutOfRangeError:
print("End of training dataset.")
break
This should be faster since it avoids using the slow feed_dicts. But I can't make it work with my model, which is a simplified LeNet architecture. The problem is the tf.layers.dense in my model_function() which expects an known input shape (I guess because it has to know the number of weights beforehand). But next_example and next_label only get their shape by running them in the session. Before evaluating them their shape is just undefined ?
Declaring the model_function() throws this error:
ValueError: The last dimension of the inputs to Dense should be
defined. Found None.
Right now, I don't know if I am using this Dataset API in the intended way or if there is a workaround.
Thanks in advance!
Edit 1:
Below is my model and it throws the error at the first dense layer
def conv_relu(input, kernel_shape):
# Create variable named "weights".
weights = tf.get_variable("weights", kernel_shape,
initializer=tf.random_normal_initializer())
# Create variable named "biases".
biases = tf.get_variable("biases", kernel_shape[3],
initializer=tf.constant_initializer(0.0))
conv = tf.nn.conv2d(input, weights,
strides=[1, 1, 1, 1], padding='VALID')
return tf.nn.relu(conv + biases)
def fully(input, output_dim):
assert len(input.get_shape())==2, 'Wrong input shape, need flattened tensor as input'
input_dim = input.get_shape()[1]
weight = tf.get_variable("weight", [input_dim, output_dim],
initializer=tf.random_normal_initializer())
bias = tf.get_variable('bias', [output_dim],
initializer=tf.random_normal_initializer())
fully = tf.nn.bias_add(tf.matmul(input, weight), bias)
return fully
def simple_model(x):
with tf.variable_scope('conv1'):
conv1 = conv_relu(x, [3,3,1,10])
conv1 = tf.nn.max_pool(conv1,[1,2,2,1],[1,2,2,1],'SAME')
with tf.variable_scope('conv2'):
conv2 = conv_relu(conv1, [3,3,10,10])
conv2 = tf.nn.max_pool(conv2,[1,2,2,1],[1,2,2,1],'SAME')
with tf.variable_scope('conv3'):
conv3 = conv_relu(conv2, [3,3,10,10])
conv3 = tf.nn.max_pool(conv3,[1,2,2,1],[1,2,2,1],'SAME')
flat = tf.contrib.layers.flatten(conv3)
with tf.variable_scope('fully1'):
fully1 = tf.layers.dense(flat, 1000)
fully1 = tf.nn.relu(fully1)
with tf.variable_scope('fully2'):
fully2 = tf.layers.dense(fully1, 100)
fully2 = tf.nn.relu(fully2)
with tf.variable_scope('output'):
output = tf.layers.dense(fully2, 4)
fully1 = tf.nn.relu(output)
return output
Edit 2:
Here you see the print of the tensors. Notice that next_example does not have a shape
next_example: Tensor("IteratorGetNext:0", dtype=float32)
next_label: Tensor("IteratorGetNext:1", shape=(?, 4), dtype=float32)
I found the answer myself.
Following this thread the easy fix is to just set the shape with tf.Tensor.set_shape if you know your image sizes beforehand.
def input_parser(img_path, label):
# read the img from file
img_file = tf.read_file(img_path)
img_decoded = tf.image.decode_image(img_file, channels=1)
img_decoded = tf.image.convert_image_dtype(img_decoded, dtype=tf.float32)
img_decoded.set_shape([90,160,1]) # This line was missing
return img_decoded, label
It would have been nice if the tensorflow documentation included this line.

Keras deep variational autoencoder

Im trying to adapt the Keras VAE example to a deep network by adding one more layer.
Original code: Original VAE code
CHANGES:
batch_size = 200
original_dim = 784
latent_dim = 2
intermediate_dim_deep = 384 # <<<<<<<
intermediate_dim = 256
nb_epoch = 20
#
x = Input(batch_shape=(batch_size, original_dim))
x = Dense(intermediate_dim_deep, activation='relu')(x) # NEW LAYER <<<<<<
h = Dense(intermediate_dim, activation='relu')(x)
z_mean = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)
#
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(batch_size, latent_dim), mean=0.)
return z_mean + K.exp(z_log_var / 2) * epsilon
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
#
# we instantiate these layers separately so as to reuse them later
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_d = Dense(intermediate_dim_deep, activation='rely') # NEW LAYER <<<<<<
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
d_decoded = decoder_d(h_decoded) # ADDED ONE MORE STEP HERE <<<<<<<
x_decoded_mean = decoder_mean(d_decoded)
#
def vae_loss(x, x_decoded_mean):
xent_loss = original_dim * objectives.binary_crossentropy(x, x_decoded_mean)
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
return xent_loss + kl_loss
#
vae = Model(x, x_decoded_mean)
vae.compile(optimizer='rmsprop', loss=vae_loss)
#####
Compile I've me this error:
/usr/local/lib/python2.7/dist-packages/keras/engine/topology.py:1615: UserWarning: Model inputs must come from a Keras Input layer, they cannot be the output of a previous non-Input layer. Here, a tensor specified as input to "model_1" was not an Input tensor, it was generated by layer dense_1.
Note that input tensors are instantiated via `tensor = Input(shape)`.
The tensor that caused the issue was: None
str(x.name))
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-8-c9010948cdee> in <module>()
----> 1 vae = Model(x, x_decoded_mean)
2 vae.compile(optimizer='rmsprop', loss=vae_loss)
/usr/local/lib/python2.7/dist-packages/keras/engine/topology.pyc in __init__(self, input, output, name)
1788 'The following previous layers '
1789 'were accessed without issue: ' +
-> 1790 str(layers_with_complete_input))
1791 for x in node.output_tensors:
1792 computable_tensors.append(x)
Exception: Graph disconnected: cannot obtain value for tensor input_1 at layer "input_1". The following previous layers were accessed without issue: []
I have the other examples in the repo and it seems a valid way to do it.
Am I missing something?
When adding the new hidden layer you're overriding the x variable so you're left without an input layer. Also, is 'rely' a valid activation option?

Categories

Resources