I'm trying to write my own recurrent network using LSTM Cells and I'm having trouble passing the state to LSTMCell. In my FeedBack class (based on tensorflow tutorial) there are following lines to initialize final layers of a model:
class FeedBack(tf.keras.Model):
def __init__(self, units, in_steps, out_steps, graph):
...
self.lstm_cell = tf.keras.layers.LSTMCell(units) # units = 128 in this example
# Also wrap the LSTMCell in an RNN to simplify the `warmup` method.
self.lstm_rnn = tf.keras.layers.RNN(self.lstm_cell, return_state=True)
self.dense = tf.keras.layers.Dense(1)
I've also defined a warmup method inside which the first prediction is made like so:
...
# merged.shape = (None, 24, 34)
x, *state = self.lstm_rnn(merged)
# x.shape =(None, 128)
# [x.shape for x in state] = [TensorShape([None, 128]), TensorShape([None, 128])]
prediction = self.dense(x)
And finally inside a call method predictions I'm trying to predict the values with:
...
# merged.shape = (None, 24, 34)
# state remains untouched
x, state = self.lstm_cell(merged, states=state)
prediction = self.dense(x)
But it results with the following error:
ValueError: Dimensions must be equal, but are 512 and 128 for '{{node feed_back/lstm_cell/mul}} = Mul[T=DT_FLOAT](feed_back/lstm_cell/Sigmoid_1, feed_back/rnn/while/Identity_5)' with input shapes: [?,6,512], [?,128].
Where does [?,6,512] come from? And how can I fix this?
Related
I'm constructing an LSTM that has a fairly verbose layer construction thanks to an ongoing hyperparameter, so I wanted to wrap the creation of each layer in a function. However, the output behavior is not what I'm expecting.
This is my complete function (there are some helper functions I've left undefined that basically return functions from strings). I'm putting the hyperparameter suggestions for the first trial in comments next to the inputs to help you understand how the function is being called:
def _get_model(
self,
encoder_width: int, # 513
decoder_width: int, # 513
input_seq_len: int, # 12
output_seq_len: int, # 1
n_features_input: int, # x_train.shape[1]
n_features_output: int, #y_train.shape[1]
num_encoder_layers: int, #6
num_decoder_layers: int, # 6
dropout: float, #.25
recurrent_dropout: float, #.25
recurrent_regularizer: str,
kernel_regularizer: str,
activation: str,
recurrent_activation: str,
) -> tf.keras.models.Model:
"""Configures the regression model.
Args:
encoder_width: number of hidden units on the encoder layer
decoder_width: number of hidden units on the decoder layer
input_seq_len: number of past sequential observations as input
output_seq_len: number of future outputs to predict, generally 1
n_features_input: number of input features/dims
n_features_output: number of output features/dims
num_encoder_layers: number of layers to use in encoder
num_decoder_layers: number of layers to use in the decoder
dropout: the proportion of nodes to dropout during training
recurrent_dropout: proportion of nodes to dropout for recurrent state
recurrent_regularizer: regularizer to put on the recurrent_kernel weights matrix
kernel_regularizer: regularizer fuction applied to kernel weights matrix
activation: activation function to use
recurrent_activation: activation function to use for the recurrent step
Returns:
a keras LSTM model
"""
tf.keras.backend.clear_session()
recurrent_regularizer = self._get_keras_regularizer_from_str(recurrent_regularizer)
recurrent_activation = self._get_keras_activation_from_str(recurrent_activation)
kernel_regularizer = self._get_keras_regularizer_from_str(kernel_regularizer)
activation = self._get_keras_activation_from_str(activation)
# we define a local LSTM layer to keep the code tidy
def LSTM_layer(x: tf.Tensor, width:int,
return_sequences: bool, initial_state=None):
encoder_layer = tf.keras.layers.LSTM(
width, return_state=True, return_sequences=return_sequences,
dropout=dropout, recurrent_dropout=recurrent_dropout,
recurrent_regularizer=recurrent_regularizer,
kernel_regularizer=kernel_regularizer,
activation=activation, recurrent_activation=recurrent_activation)
x, state_h, state_c = encoder_layer(x, initial_state=initial_state)
encoder_states = [state_h, state_c]
return x, encoder_states
x = tf.keras.layers.Input(
shape=(input_seq_len, n_features_input))
if num_encoder_layers==1:
x, encoder_states_1 = LSTM_layer(x, encoder_width, return_sequences = False)
else:
temp_encoder_layers = num_encoder_layers
# we need to pass return_sequences to every succeeding LSTM layer
while temp_encoder_layers > 1:
x, encoder_states = LSTM_layer(x, encoder_width, return_sequences=True)
# we would like to keep the first encoder state to initialize all decoder states
if temp_encoder_layers == num_encoder_layers:
encoder_states_1 = encoder_states
temp_encoder_layers -= 1
# we want the final layer not to return sequences for the RepeatVector
x, encoder_states = LSTM_layer(x, encoder_width, return_sequences=False)
# we need the repeat layer to separate the encoder and decoder
x = tf.keras.layers.RepeatVector(output_seq_len)(x)
for i in range(num_decoder_layers):
x, encoder_states = LSTM_layer(x, decoder_width,
return_sequences=True)#, initial_state=encoder_states_1)
decoder_outputs2 = tf.keras.layers.TimeDistributed(
tf.keras.layers.Dense(n_features_output))(x)
return tf.keras.models.Model(x, decoder_outputs2)
As you can see, there's the LSTM_layer function which I believe might be causing trouble.
Then, when ran with data, this throws the error:
ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 1, 513), found shape=(514, 12, 109)
The shape of x_train is:
X_train shape
(1824, 12, 109)
And the model architecture is:
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_2 (InputLayer) [(None, 1, 513)] 0
time_distributed (TimeDistr (None, 1, 80) 41120
ibuted)
=================================================================
Total params: 41,120
Trainable params: 41,120
Non-trainable params: 0
_________________________________________________________________
So, I know that the model must be getting confused as the shape of x_train seems to be getting passed to the very first layer, and a full 12 layers are certainly not being constructed. Am I doing this the wrong way then? Is some Pythonic thing happening where it's assigning the same layer over and over again each time the function is called?
Any help or advice much appreciated. Thank you!
Answer: the problem wasn't in the wrapping of the function. The problem was in the final definition of the model, tf.keras.models.Model(x, decoder_outputs2), that calls x as the first parameter. Since x is redefined so many times, the model is simply pointed to the last occurrence of x. So the model becomes a link from the last LSTM decoder layer to the dense layer only. RIP.
I am trying to tidy up my code by moving from the Keras functional API to the subclassing API. The class I came up with so far is below:
class FeedForwardNN(Model):
def __init__(self, params):
super().__init__()
self.params = params
self.layout = params['layout']
# Define layers
self.dense = Dense(units=params['layout'][1],
activation=params['activation'],
kernel_initializer=params['initializer'])
self.output_layer = Dense(units=params['layout'][-1],
kernel_initializer=params['initializer'])
self.dropout = Dropout(params['dropout'])
self.batch_norm = BatchNormalization()
def call(self, x):
for layer in self.layout[1:-1]:
x = self.dropout(self.dense(x))
if self.params['batch_norm']:
x = self.batch_norm(x)
x = self.output_layer(x)
return x
Where layout is a list of the neurons in each layer (including input and output layers).
However, when fitting the model, the following error is raised:
ValueError: Input 0 of layer "dense" is incompatible with the layer: expected axis -1 of input shape to have value 5, but received input with shape (None, 100)
Call arguments received:
• x=tf.Tensor(shape=(None, 5), dtype=float32)
which seems to occur on the line:
x = self.dropout(self.dense(x))
I checked the shape of the training data X that is passed to the fit() method, and it appears to have the right shape i.e. (number of observations, number of predictors).
Does anyone have an idea of where my mistake is?
The problem is that you are using same self.dense layer over and over again in your for loops
for layer in self.layout[1:-1]:
x = self.dropout(self.dense(x))
After the first loop, x has shape (batch, 100). Then in the second loop, instead of passing this x to the second Dense layer (which you don't seem to have created in the first place), you re-pass it to the first Dense layer, which expects shape (batch, 5), causing the error.
You can create a list of dense layer as follows in __init__
self.denses = [Dense(units=self.layout[i],
activation=params['activation'],
kernel_initializer=params['initializer']) for i in self.layout[1:-1]]
and call them in sequence
for dense_layer in self.denses:
x = self.dropout(dense_layer(x))
I'm building a character-based LSTM-RNN text generator using this tutorial on Google Colab: https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/text_generation.ipynb#scrollTo=d4tSNwymzf-q.
While their code runs and compiles on my Google Colab account with their Shakespeare dataset, it does not work when I input my own dataset. This error continuously comes up:
"ValueError: This model has not yet been built.
The dataset they used was the Shakespeare text from Tensorflow (https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt). My dataset, on the other hand, is in the form of short characters. Here are the first five lines of my dataset (I'm experimenting with generating peptide sequences):
acssspskhcg
agcknffwktftsc
agilkrw
agyllgkinlkalaalakkil
aplepeypgdnatpeqmaqyaaelrryinmltrpry
cagalcy
I'm thinking this might be part of the problem.
Here is the code that I have so far:
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing
import numpy as np
import os
import time
# Read, then decode for py2 compat.
text = open("/content/generatorinput.txt", 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')
# The unique characters in the file
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')
example_texts = ['acdefgh', 'tvy']
chars = tf.strings.unicode_split(example_texts, input_enco
chars
ids_from_chars = preprocessing.StringLookup(
vocabulary=list(vocab), mask_token=None)
ids = ids_from_chars(chars)
ids
chars_from_ids = tf.keras.layers.experimental.preprocessing.StringLookup(
vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)
chars = chars_from_ids(ids)
chars
tf.strings.reduce_join(chars, axis=-1).numpy()
def text_from_ids(ids):
return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
for ids in ids_dataset.take(10):
print(chars_from_ids(ids).numpy().decode('utf-8'))
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)
for seq in sequences.take(1):
print(chars_from_ids(seq))
def split_input_target(sequence):
input_text = sequence[:-1]
target_text = sequence[1:]
return input_text, target_text
dataset = sequences.map(split_input_target)
for input_example, target_example in dataset.take(1):
print("Input :", text_from_ids(input_example).numpy())
print("Target:", text_from_ids(target_example).numpy())
# Batch size
BATCH_SIZE = 64
# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 100
dataset = (
dataset
.shuffle(BUFFER_SIZE)
.batch(BATCH_SIZE, drop_remainder=True)
.prefetch(tf.data.experimental.AUTOTUNE))
dataset
# Length of the vocabulary in chars
vocab_size = len(vocab)
# The embedding dimension
embedding_dim = 256
# Number of RNN units
rnn_units = 1024
class MyModel(tf.keras.Model):
def __init__(self, vocab_size, embedding_dim, rnn_units):
super().__init__(self)
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
self.gru = tf.keras.layers.GRU(rnn_units,
return_sequences=True,
return_state=True)
self.dense = tf.keras.layers.Dense(vocab_size)
def call(self, inputs, states=None, return_state=False, training=False):
x = inputs
x = self.embedding(x, training=training)
if states is None:
states = self.gru.get_initial_state(x)
x, states = self.gru(x, initial_state=states, training=training)
x = self.dense(x, training=training)
if return_state:
return x, states
else:
return x
model = MyModel(
# Be sure the vocabulary size matches the `StringLookup` layers.
vocab_size=len(ids_from_chars.get_vocabulary()),
embedding_dim=embedding_dim,
rnn_units=rnn_units)
for input_example_batch, target_example_batch in dataset.take(1):
example_batch_predictions = model(input_example_batch)
print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
model.summary() # <-- This is where the code stops working
What I've tried: Doing a restart of my runtime, changing my buffer size and defining an input shape.
When I define the input shape and go on with the code, I get this:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()
ERROR: example_batch_predictions is not defined
Either way, I get an error. How do I fix this problem? Any advice is deeply appreciated.
If you try to pass some data to your model as you are trying to do with this line: example_batch_predictions = model(input_example_batch) (in your for loop), your model's summary would work, but notice how nothing gets printed inside your loop. The problem is you are using example_texts, which contains two strings and you are still using a batch_size of 64 and a sequence_length of 100. If you change your batch_size to say 2 and your sequence_length to 5, you should see an output like this:
Length of text: 100 characters
20 unique characters
a
c
s
s
s
p
s
k
h
c
tf.Tensor([b'a' b'c' b's'], shape=(3,), dtype=string)
Input : b'ac'
Target: b'cs'
(1, 2, 21) # (batch_size, sequence_length, vocab_size)
Model: "my_model_13"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_13 (Embedding) multiple 5376
_________________________________________________________________
gru_13 (GRU) multiple 3938304
_________________________________________________________________
dense_13 (Dense) multiple 21525
=================================================================
Total params: 3,965,205
Trainable params: 3,965,205
Non-trainable params: 0
_________________________________________________________________
I define my neural network
class Classifier(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(784, 256)
self.fc2 = nn.Linear(256, 128)
self.fc3 = nn.Linear(128, 64)
self.fc4 = nn.Linear(64, 10)
def forward(self, x):
# make sure input tensor is flattened
x = x.view(x.shape[0], -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.log_softmax(self.fc4(x), dim=1)
return x
model = Classifier()
I train the neural network
I save the trained neural net:
checkpoint = {'input_size': 784,
'output_size': 10,
'hidden_layers': [256, 128, 64],
'state_dict': model.state_dict()}
torch.save(checkpoint, 'checkpoint.pth')
state_dict = torch.load('checkpoint.pth')
when I try to load the saved neural network, I get an error
def load_checkpoint(filepath):
checkpoint = torch.load(filepath)
# I believe the error is in the line directly below
model_b = model(checkpoint['input_size'], checkpoint['output_size'], checkpoint['hidden_layers'])
model_b.load_state_dict(checkpoint['state_dict'])
return model_b
model_b = load_checkpoint('checkpoint.pth')
I get the following error:
TypeError: forward() takes 2 positional arguments but 4 were given
I think you're missing up a few points:
Your class __init__ function takes no argument, your neural network has fixed parameters, so you can't use the other keys of your dict object to create a model that has the same parameters.
A nn.Module function has a method called __call__ that redirects to forward method. This function is run whenever you run Object(something) where something will be the function parameters. In load_checkpoint, you ran model_b = model(checkpoint['input_size'], checkpoint['output_size'], checkpoint['hidden_layers']). You tried to do a forward pass using some elements from your dictionary. hence the error (the 4 parameters are model, checkpoint['input_size'], checkpoint['output_size'], checkpoint['hidden_layers']).
To fix the problem of loading the model, simply remove this line model_b = model(checkpoint['input_size'], checkpoint['output_size'], checkpoint['hidden_layers']) and I think it should work.
If you wish to create a model using the checkpoint input_size, output_size and hidden layers, you should use these parameters on the constructor:
model = Classifier(checkpoint['input_size'], checkpoint['output_size'], checkpoint['hidden_layers']). Your code will need a few changes in order for this to work.
I am implementing an OCR with Keras, Tensorflow backend.
I want to use keras.backend.ctc_decode implementation.
I have a model class :
import keras
def ctc_lambda_func(args):
y_pred, y_true, input_x_width, input_y_width = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
# y_pred = y_pred[:, 2:, :]
return keras.backend.ctc_batch_cost(y_true, y_pred, input_x_width, input_y_width)
class ModelOcropy(keras.Model):
def __init__(self, alphabet: str):
self.img_height = 48
self.lstm_size = 100
self.alphabet_size = len(alphabet)
# check backend input shape (channel first/last)
if keras.backend.image_data_format() == "channels_first":
input_shape = (1, None, self.img_height)
else:
input_shape = (None, self.img_height, 1)
# data input
input_x = keras.layers.Input(input_shape, name='x')
# training inputs
input_y = keras.layers.Input((None,), name='y')
input_x_widths = keras.layers.Input([1], name='x_widths')
input_y_widths = keras.layers.Input([1], name='y_widths')
# network
flattened_input_x = keras.layers.Reshape((-1, self.img_height))(input_x)
bidirectional_lstm = keras.layers.Bidirectional(
keras.layers.LSTM(self.lstm_size, return_sequences=True, name='lstm'),
name='bidirectional_lstm'
)(flattened_input_x)
dense = keras.layers.Dense(self.alphabet_size, activation='relu')(bidirectional_lstm)
y_pred = keras.layers.Softmax(name='y_pred')(dense)
# ctc loss
ctc = keras.layers.Lambda(ctc_lambda_func, output_shape=[1], name='ctc')(
[dense, input_y, input_x_widths, input_y_widths]
)
# init keras model
super().__init__(inputs=[input_x, input_x_widths, input_y, input_y_widths], outputs=[y_pred, ctc])
# ctc decoder
top_k_decoded, _ = keras.backend.ctc_decode(y_pred, input_x_widths)
self.decoder = keras.backend.function([input_x, input_x_widths], [top_k_decoded[0]])
# decoded_sequences = self.decoder([test_input_data, test_input_lengths])
My use of ctc_decode comes from another post : Keras using Lambda layers error with K.ctc_decode
I get an error :
ValueError: Shape must be rank 1 but is rank 2 for 'CTCGreedyDecoder' (op: 'CTCGreedyDecoder') with input shapes: [?,?,7], [?,1].
I guess I have to squeeze my input_x_widths, but Keras does not seem to have such function (it always outputs something like (batch_size, 1))
Indeed, the function is expecting a 1D tensor, and you've got a 2D tensor.
Keras does have the keras.backend.squeeze(x, axis=-1) function.
And you can also use keras.backend.reshape(x, (-1,))
If you need to go back to the old shape after the operation, you can both:
keras.backend.expand_dims(x)
keras.backend.reshape(x,(-1,1))
Complete fix :
# ctc decoder
flattened_input_x_width = keras.backend.reshape(input_x_widths, (-1,))
top_k_decoded, _ = keras.backend.ctc_decode(y_pred, flattened_input_x_width)
self.decoder = keras.backend.function([input_x, flattened_input_x_width], [top_k_decoded[0]])
# decoded_sequences = self.decoder([input_x, flattened_input_x_width])