How to use Embedding Layer along with textvectorization in functional API - python

Just starting on tensorflow
Working on imdb dataset. Process: Text encoding using textvectorization layer and passing it to embedded layer:
# Create a custom standardization function to strip HTML break tags '<br />'.
def custom_standardization(input_data):
lowercase = tf.strings.lower(input_data)
stripped_html = tf.strings.regex_replace(lowercase, '<br />', ' ')
return tf.strings.regex_replace(stripped_html,
'[%s]' % re.escape(string.punctuation), '')
# Vocabulary size and number of words in a sequence.
vocab_size = 10000
sequence_length = 100
# Use the text vectorization layer to normalize, split, and map strings to
# integers. Note that the layer uses the custom standardization defined above.
# Set maximum_sequence length as all samples are not of the same length.
vectorize_layer = TextVectorization(
standardize=custom_standardization,
max_tokens=vocab_size,
output_mode='int',
output_sequence_length=sequence_length)
# Make a text-only dataset (no labels) and call adapt to build the vocabulary.
text_ds = train_ds.map(lambda x, y: x)
vectorize_layer.adapt(text_ds)
I then try to build a functional API:
embedding_dim=16
text_model_catprocess2 = vectorize_layer
text_model_embedd = tf.keras.layers.Embedding(vocab_size, embedding_dim, name = 'embedding')(text_model_catprocess2)
text_model_embed_proc = tf.keras.layers.Lambda(embedding_mean_standard)(text_model_embedd)
text_model_dense1 = tf.keras.layers.Dense(2, activation = 'relu')(text_model_embed_proc)
text_model_dense2 = tf.keras.layers.Dense(2, activation = 'relu')(text_model_dense1)
text_model_output = tf.keras.layers.Dense(1, activation = 'sigmoid')(text_model_dense2)
However, this is giving the following error:
~\anaconda3\lib\site-packages\keras\backend.py in dtype(x)
1496
1497 """
-> 1498 return x.dtype.base_dtype.name
1499
1500
AttributeError: Exception encountered when calling layer "embedding" (type Embedding).
'str' object has no attribute 'base_dtype'
Call arguments received:
• inputs=<keras.layers.preprocessing.text_vectorization.TextVectorization object at 0x0000029B483AADC0>
Upon making a sequential API like this, it is working fine:
embedding_dim=16
modelcheck = tf.keras.Sequential([
vectorize_layer,
tf.keras.layers.Embedding(vocab_size, embedding_dim, name="embedding"),
tf.keras.layers.GlobalAveragePooling1D(),
tf.keras.layers.Dense(16, activation='relu'),
tf.keras.layers.Dense(1)
])
I am not sure why this is happening. Is it necessary for the functional API to have an input? Please help!

You have two options. Either you use a Sequential model and it will work as you have confirmed because you do not have to define an Input layer, or you use the functional API where you have to define an Input layer:
embedding_dim = 16
text_model_input = tf.keras.layers.Input(dtype=tf.string, shape=(1,))
text_model_catprocess2 = vectorize_layer(text_model_input)
text_model_embedd = tf.keras.layers.Embedding(vocab_size, embedding_dim, name = 'embedding')(text_model_catprocess2)
text_model_embed_proc = tf.keras.layers.Lambda(embedding_mean_standard)(text_model_embedd)
text_model_dense1 = tf.keras.layers.Dense(2, activation = 'relu')(text_model_embed_proc)
text_model_dense2 = tf.keras.layers.Dense(2, activation = 'relu')(text_model_dense1)
text_model_output = tf.keras.layers.Dense(1, activation = 'sigmoid')(text_model_dense2)
model = tf.keras.Model(text_model_input, text_model_output)

Related

Preprocessing Layers and KerasTuner Cooperation

I'm having troubles making the Preprocessing layers and the Keras Tuner cooperate.
I am referring to this tutorial Load CSV data | TensorFlow Core for the Preprocessing part, and to the Getting started with KerasTuner documentation for the Keras Tuner part.
Briefly, here's the code.
He loads the data:
titanic = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/train.csv")
titanic_features = titanic.copy()
titanic_labels = titanic_features.pop('survived')
He creates the symbolic tensors of the features in a dictionary
inputs = {}
for name, column in titanic_features.items():
dtype = column.dtype
if dtype == object:
dtype = tf.string
else:
dtype = tf.float32
inputs[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)
inputs
Then he applies normalization on the numerical features:
numeric_inputs = {name:input for name,input in inputs.items()
if input.dtype==tf.float32}
x = layers.Concatenate()(list(numeric_inputs.values()))
norm = layers.Normalization()
norm.adapt(np.array(titanic[numeric_inputs.keys()]))
all_numeric_inputs = norm(x)
all_numeric_inputs
He creates a list
preprocessed_inputs = [all_numeric_inputs]
He one hot encodes the categorical features:
for name, input in inputs.items():
if input.dtype == tf.float32:
continue
lookup = layers.StringLookup(vocabulary=np.unique(titanic_features[name]))
one_hot = layers.CategoryEncoding(num_tokens=lookup.vocabulary_size())
x = lookup(input)
x = one_hot(x)
preprocessed_inputs.append(x)
and then He concatenates it:
preprocessed_inputs_cat = layers.Concatenate()(preprocessed_inputs)
titanic_preprocessing = tf.keras.Model(inputs, preprocessed_inputs_cat)
Now what I want to do is insert this preprocessing part in KerasTuner. I tried this:
def titanic_model(units, activation):
model_inputs = tf.keras.Input(shape=28)
dense_1 = layers.Dense(units=units, activation=activation)(model_inputs)
dense_output = layers.Dense(1)(dense_1)
body = tf.keras.Model(inputs = model_inputs, outputs = dense_output)
return body
def build_model(hp,preprocessing_head, inputs):
units = hp.Int("units", min_value=32, max_value=512, step=32)
activation = hp.Choice("activation", ["relu", "tanh"])
preprocessed_inputs = preprocessing_head(inputs)
result = titanic_model(units,
activation)(preprocessed_inputs)
model = tf.keras.Model(inputs, result)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
optimizer=tf.keras.optimizers.Adam(),
metrics = ['accuracy'])
return model
titanic_model = build_model(keras_tuner.HyperParameters(),titanic_preprocessing, inputs)
but it gives me the following error:
Inputs to a layer should be tensors. Got: <keras_tuner.engine.hyperparameters.HyperParameters
object at 0x7ff52844da30>
I cannot understand if I am close to the solution, or this is not the right way to proceed.
However, the workaround i found was to insert directly in the build_model function the preprocessing layer (titanic_preprocessing) and the inputs dictionary, without passing it as an argument of the function.
Hence:
def titanic_model(units, activation):
model_inputs = tf.keras.Input(shape=28)
dense_1 = layers.Dense(units=units, activation=activation)(model_inputs)
dense_output = layers.Dense(1)(dense_1)
body = tf.keras.Model(inputs = model_inputs, outputs = dense_output)
return body
def build_model(hp):
units = hp.Int("units", min_value=32, max_value=512, step=32)
activation = hp.Choice("activation", ["relu", "tanh"])
preprocessed_inputs = titanic_preprocessing(inputs)
result = titanic_model(units,
activation)(preprocessed_inputs)
model = tf.keras.Model(inputs, result)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
optimizer=tf.keras.optimizers.Adam(),
metrics = ['accuracy'])
return model
titanic_model = build_model()
In this case it seems to work, and by setting the tuner
tuner = keras_tuner.RandomSearch(
hypermodel = build_model,
objective=keras_tuner.Objective("accuracy", direction="max"),
max_trials = 1,
overwrite = True,
directory = "tuner_dir",
project_name = "regression_tuner")
and searching it works:
tuner.search(x=titanic_features_dict, y=titanic_labels, epochs=10)
However, I have doubts about this solution and would appreciate feedback on this.
Thank you!

How to merge multiple input and embeddings into single input layer

I have various inputs, some that need embedding. I have been able to create them all as seen below:
I can then concatenate them all, for the following:
However, my disconnect is where to go from here. I have built the following autoencoder, but I am not sure how to "stack" the previous embedding+input mix on top of this flow:
So, how do I make the input layer whats already been defined above? I tried setting the first "encoder" part to take in merge_models, but it fails:
Code is the following:
num_input = Input(shape=scaled_data.shape[1], name='input_number_features')
models.append(num_input)
inputs.append(num_input)
binary_input = Input(shape=binary_data.shape[1], name='input_binary_features')
models.append(binary_input)
inputs.append(binary_input)
for var in cols_to_embed :
model = Sequential()
no_of_unique_cat = data[var].nunique()
embedding_size = np.ceil(np.sqrt(no_of_unique_cat))
embedding_size = int(embedding_size)
print(var + " - " + str(no_of_unique_cat) + ' unique values to ' + str(embedding_size))
inpt = tf.keras.layers.Input(shape=(1,),\
name='input_' + '_'.join(\
var.split(' ')))
embed = tf.keras.layers.Embedding(no_of_unique_cat, embedding_size,trainable=True,\
embeddings_initializer=tf.initializers\
.random_normal)(inpt)
embed_rehsaped = tf.keras.layers.Reshape(target_shape=(embedding_size,))(embed)
models.append(embed_rehsaped)
inputs.append(inpt)
merge_models = tf.keras.layers.concatenate(models)
# Input Layer
input_dim = merge_models.shape[1]
input_layer = Input(shape = (input_dim, ), name = 'input_layer')
# Encoder
encoder = Dense(16, activation='relu')(input_layer)
encoder = Dense(8, activation='relu')(encoder)
encoder = Dense(4, activation='relu')(encoder)
# Bottleneck
z = Dense(2, activation='relu')(encoder)
# Decoder
decoder = Dense(4, activation='relu')(z)
decoder = Dense(8, activation='relu')(decoder)
decoder = Dense(16, activation='relu')(decoder)
decoder = Dense(input_dim, activation='elu')(decoder) # intentionally using 'elu' instead of 'reul'
# Autoencoder
from tensorflow.keras.models import Model
autoencoder = Model(inputs = input_layer,
outputs = decoder,
name = 'ae_toy_example')
You should pass merge_models into the first encoder layer in this way:
encoder = Dense(16, activation='relu')(merge_models)
then you should define your final model in this way:
Model(inputs = inputs, outputs = decoder, name = 'ae_toy_example')
and NOT as:
Model(inputs = input_layer, outputs = decoder, name = 'ae_toy_example')

Graph disconnected: cannot obtain value for tensor "x" Tensor at layer "x" . The following previous layers were accessed without issue: []

I am building a small network using some custom network boxes for each use case, It looks like this :
def top_block(dropout = None, training = None):
# scaled input
input_1 = tf.keras.Input(shape=(1,15), dtype='float32')
input_2 = tf.keras.Input(shape=(1,15), dtype='float32')
if dropout:
layer_one = tf.keras.layers.Dropout(rate = dropout)(input_1, training = training)
layer_two = tf.keras.layers.Dropout(rate = dropout)(input_2, training = training)
return [layer_one,layer_two]
return [input_1,input_2]
def bottom_layer(input_layers):
data = tf.reduce_mean(input_layers,0)
cls_layer = tf.keras.layers.Dense(1,
kernel_initializer = keras.initializers.glorot_uniform(seed=200),
activation = 'sigmoid')(data)
model = tf.keras.Model([input_layers[0], input_layers[1]], cls_layer , name = 'model_1')
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
model.summary()
return model
If I am trying to access this network without dropout, it's working fine :
top_ = top_block()
model = bottom_layer(top_ )
But if I am accessing with dropout, it's giving error:
top_ = top_block(dropout = 0.2, training = True)
model = bottom_layer(top_ )
ValueError: Graph disconnected: cannot obtain value for tensor Tensor("input_72:0", shape=(None, 1, 15), dtype=float32) at layer "input_72". The following previous layers were accessed without issue: []
How to access the model with dropout layer?
How to disable training = False during evaluate? Do I need to load full model and old model weights?
Thank You!
I just realized my input is coming from intermediate layer (dropout layer), It should come directly from Input layer :
def top_block():
# scaled input
input_1 = tf.keras.Input(shape=(1,15), dtype='float32')
input_2 = tf.keras.Input(shape=(1,15), dtype='float32')
return [input_1, input_2]
def apply_dropout(layers_data, dropout_val, training):
layer_one = tf.keras.layers.Dropout(rate = dropout_val)(layers_data[0], training = training)
layer_two = tf.keras.layers.Dropout(rate = dropout_val)(layers_data[1], training = training)
return [layer_one, layer_two]
def bottom_layer(input_layers, data):
data = tf.reduce_mean(data, 0)
cls_layer = tf.keras.layers.Dense(1,
kernel_initializer = keras.initializers.glorot_uniform(seed=200),
activation = 'sigmoid')(data)
model = tf.keras.Model(input_layers, cls_layer , name = 'model_1')
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
model.summary()
return model
It's working now
top_ = top_block()
dropout_ = apply_dropout(top_, 0.2, True)
model = bottom_layer(top_ , dropout_)

Keras/Tf Custom Loss Function at the level of tensor entries

I would like to write a custom loss function in Keras that takes in a tensor of predicted values and then for each process it through another function before adding to the loss. Here is what this looks like. This is a complete code example, please look at the function sample_loss.
class AE():
def __init__(self,inputSize=78,numNeurons = 50, latentSize=5,expSize = 10,batchSize = 1, activation="relu"):
K.clear_session()
self.inputSize = inputSize
self.latentSize = latentSize
self.activation = activation
self.numNeurons = numNeurons
self.expSize = expSize
self.batchSize = batchSize
self.encoder = self.createEncoder()
self.decoder = self.createDecoder()
self.filterModel = self.createFilterModel()
self.AE = self.createAE()
def sample_loss(self,y_pred ):
loss = 0.0
for j in tf.range(0,self.inputSize ,2):
pt = y_pred[j:j+2]
loss += K.sum(self.filterModel.predict(pt))
return loss
def createEncoder(self):
# create the encoder
xIn = Input(shape=(self.inputSize,), name="data_in")
y_train = Input(shape=(self.latentSize,), name='y_train')
x = Dense(self.numNeurons,activation=self.activation)(xIn)
xlatent = Dense(self.latentSize,activation=self.activation)(x)
# create the encoder
encoder = Model(xIn,xlatent)
return encoder
def createDecoder(self):
# create the decoder
latentIn = Input(shape=(self.latentSize,))
x = Dense(self.numNeurons,activation=self.activation)(latentIn)
out = Dense(self.inputSize,activation="linear")(x)
# create a decoder
decoder = Model(latentIn,out)
return decoder
def createAE(self):
xIn = Input(shape=(self.inputSize,), name="ae_data_in")
# create the total model
latentOutput = self.encoder(xIn)
dataOutput = self.decoder(latentOutput)
model = Model(inputs=xIn,outputs=dataOutput)
model.add_loss( self.sample_loss( dataOutput) )
model.summary()
return model
def createFilterModel(self):
xIn = Input(shape=(2,), name="filter_data_in")
x = Dense(4, activation='sigmoid')(xIn)
x = Dense(1, activation='sigmoid')(x)
model = Model(xIn,x)
model.compile(optimizer='adam', loss='binary_crossentropy')
return model
modelAE = AE()
modelAE.AE.compile(optimizer='adam',loss="mse", metrics=['accuracy'])
So I have a dictionary of models, filterModels. These are actually predictive models in Keras. For each model in this dictionary, I want to pass it the corresponding part of y_pred and then add its output to the loss.
How can I do this?
Here is the error the current code gives me:
'''
ValueError: When feeding symbolic tensors to a model, we expect the tensors to have a static batch size. Got tensor with shape: (None, 78)
'''

How to get the output of all layers of a Keras model?

I have a Keras functional model defined as:
# Construct DNN
spec_input = keras.layers.Input(shape=(1, ctx, fft), name='spec')
x = keras.layers.Flatten(data_format)(spec_input)
for layer in range(len(args.dnn_struct)):
x = Dense(args.dnn_struct[layer])(x)
x = BatchNormalization()(x)
x = keras.layers.ReLU()(x)
out = Dense(fft, activation="sigmoid", name=f'spp')(x)
model = Model(inputs=spec_input, outputs=out)
I would like to get the output of each layer in the model for a given input and the answers given in Keras, How to get the output of each layer? do not work for a functional model. I am currently using Tensorflow 1.14
When I try using
from keras import backend as K
inp = model.input # input placeholder
outputs = [layer.output for layer in model.layers] # all layer outputs
functor = K.function([inp, K.learning_phase()], outputs ) # evaluation function
# Testing
test = np.random.random(input_shape)[np.newaxis,...]
layer_outs = functor([test, 1.])
print(layer_outs)
I get the error
Traceback (most recent call last):
File "/home/xyz/anaconda3/envs/tf/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1446, in __init__
session._session, options_ptr)
tensorflow.python.framework.errors_impl.InvalidArgumentError: spec:0 is both fed and fetched.
UPDATE: You cannot fetch outputs of all layers, because "all layers" includes Input - and the error message is self-explanatory. Use:
outputs = get_all_outputs(model, input_data, 1)
Below should work for any model, Model or Sequential:
def get_all_outputs(model, input_data, learning_phase=1):
outputs = [layer.output for layer in model.layers[1:]] # exclude Input
layers_fn = K.function([model.input, K.learning_phase()], outputs)
return layers_fn([input_data, learning_phase])
Layer-level solutions:
def get_layer_outputs(model, layer_name, input_data, learning_phase=1):
outputs = [layer.output for layer in model.layers if layer_name in layer.name]
layers_fn = K.function([model.input, K.learning_phase()], outputs)
return layers_fn([input_data, learning_phase])
# or, for passing in a layer directly
def get_layer_outputs(model, layer, input_data, learning_phase=1):
layer_fn = K.function([model.input, K.learning_phase()], layer.output)
return layer_fn([input_data, learning_phase])

Categories

Resources