I have a very simple question. I have a Keras model (TF backend) defined for classification. I want to dump the training images fed into my model during training for debugging purposes. I am trying to create a custom callback that writes Tensorboard image summaries for this.
But how can I obtain the real training data inside the callback?
Currently I am trying this:
class TensorboardKeras(Callback):
def __init__(self, model, log_dir, write_graph=True):
self.model = model
self.log_dir = log_dir
self.session = K.get_session()
tf.summary.image('input_image', self.model.input)
self.merged = tf.summary.merge_all()
if write_graph:
self.writer = tf.summary.FileWriter(self.log_dir, K.get_session().graph)
self.writer = tf.summary.FileWriter(self.log_dir)
def on_batch_end(self, batch, logs=None):
summary =, feed_dict={})
self.writer.add_summary(summary, batch)
But I am getting the error:
InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_1' with dtype float and shape [?,224,224,3]
There must be a way to see what models, get as an input, right?
You don't need callbacks for this. All you need to do is implementing a function that yields an image and its label as a tuple. flow_from_directory function has a parameter called save_to_dir which could satisfy all of your needs, in case it doesn't, here is what you can do:
def trainGenerator(batch_size,train_path, image_size)
#preprocessing see for details
image_datagen = ImageDataGenerator(horizontal_flip=True)
#create image generator see for details
train_generator = image_datagen.flow_from_directory(
class_mode = "categorical",
target_size = image_size,
batch_size = batch_size,
save_prefix = "augmented_train",
seed = seed)
for (batch_imgs, batch_labels) in train_generator:
#do other stuff such as dumping images or further augmenting images
yield (batch_imgs,batch_labels)
how to access within a keras custom callback?

I have written a custom keras callback to check the augmented data from a generator. (See this answer for the full code.) However, when I tried to use the same callback for a, it gave me an error:
File "/path/to/", line 16, in on_batch_end
imgs = self.train[batch][images_or_labels]
TypeError: 'PrefetchDataset' object is not subscriptable
Do keras callbacks in general only work with generators, or is it something about the way I've written my one? Is there a way to modify either my callback or the dataset to make it work?
I think there are three pieces to this puzzle. I'm open to changes to any and all of them. Firstly, the init function in the custom callback class:
class TensorBoardImage(tf.keras.callbacks.Callback):
def __init__(self, logdir, train, validation=None):
super(TensorBoardImage, self).__init__()
self.logdir = logdir
self.file_writer = tf.summary.create_file_writer(logdir)
self.train = train
self.validation = validation
Secondly, the on_batch_end function within that same class
def on_batch_end(self, batch, logs):
images_or_labels = 0 #0=images, 1=labels
imgs = self.train[batch][images_or_labels]
Thirdly, instantiating the callback
import tensorflow_image_callback
tensorboard_image_callback = tensorflow_image_callback.TensorBoardImage(logdir=tensorboard_log_dir, train=train_dataset, validation=valid_dataset),
Some related threads which haven't led me to an answer yet:
Accessing validation data within a custom callback
Create keras callback to save model predictions and targets for each batch during training
What ended up working for me was the following, using tfds:
the __init__ function:
def __init__(self, logdir, train, validation=None):
super(TensorBoardImage, self).__init__()
self.logdir = logdir
self.file_writer = tf.summary.create_file_writer(logdir)
# #from keras generator
# self.train = train
# self.validation = validation
#from tf.Data
my_data = tfds.as_numpy(train)
imgs = my_data['image']
then on_batch_end:
def on_batch_end(self, batch, logs):
images_or_labels = 0 #0=images, 1=labels
imgs = self.train[batch][images_or_labels]
#calculate epoch
n_batches_per_epoch = self.train.samples / self.train.batch_size
epoch = math.floor(self.train.total_batches_seen / n_batches_per_epoch)
#since the training data is shuffled each epoch, we need to use the index_array to find something which uniquely
#identifies the image and is constant throughout training
first_index_in_batch = batch * self.train.batch_size
last_index_in_batch = first_index_in_batch + self.train.batch_size
last_index_in_batch = min(last_index_in_batch, len(self.train.index_array))
img_indices = self.train.index_array[first_index_in_batch : last_index_in_batch]
with self.file_writer.as_default():
for ix,img in enumerate(imgs):
#only post 1 out of every 1000 images to tensorboard
if (img_indices[ix] % 1000) == 0:
#instead of img_filename, I could just use str(img_indices[ix]) as a unique identifier
#but this way makes it easier to find the unaugmented image
img_filename = self.train.filenames[img_indices[ix]]
#convert float to uint8, shift range to 0-255
img -= tf.reduce_min(img)
img *= 255 / tf.reduce_max(img)
img = tf.cast(img, tf.uint8)
img_tensor = tf.expand_dims(img, 0) #tf.summary needs a 4D tensor
tf.summary.image(img_filename, img_tensor, step=epoch)
I didn't need to make any changes to the instantiation.
save and load custom attention model lstm in keras

I want to run a seq2seq model using lstm for a customer journey analysis.I am able to run the model but unable to load the saved model on a different notebook.
Code for attention model is here:
# RNN "Cell" classes in Keras perform the actual data transformations at each timestep. Therefore, in order to add attention to LSTM, we need to make a custom subclass of LSTMCell.
class AttentionLSTMCell(LSTMCell):
def __init__(self, **kwargs):
self.attentionMode = False
super(AttentionLSTMCell, self).__init__(**kwargs)
# Build is called to initialize the variables that our cell will use. We will let other Keras
# classes (e.g. "Dense") actually initialize these variables.
def build(self, input_shape):
# Converts the input sequence into a sequence which can be matched up to the internal
# hidden state.
self.dense_constant = TimeDistributed(Dense(self.units, name="AttLstmInternal_DenseConstant"))
# Transforms the internal hidden state into something that can be used by the attention
# mechanism.
self.dense_state = Dense(self.units, name="AttLstmInternal_DenseState")
# Transforms the combined hidden state and converted input sequence into a vector of
# probabilities for attention.
self.dense_transform = Dense(1, name="AttLstmInternal_DenseTransform")
# We will augment the input into LSTMCell by concatenating the context vector. Modify
# input_shape to reflect this.
batch, input_dim = input_shape[0]
batch, timesteps, context_size = input_shape[-1]
lstm_input = (batch, input_dim + context_size)
# The LSTMCell superclass expects no constant input, so strip that out.
return super(AttentionLSTMCell, self).build(lstm_input)
# This must be called before call(). The "input sequence" is the output from the
# encoder. This function will do some pre-processing on that sequence which will
# then be used in subsequent calls.
def setInputSequence(self, input_seq):
self.input_seq = input_seq
self.input_seq_shaped = self.dense_constant(input_seq)
self.timesteps = tf.shape(self.input_seq)[-2]
# This is a utility method to adjust the output of this cell. When attention mode is
# turned on, the cell outputs attention probability vectors across the input sequence.
def setAttentionMode(self, mode_on=False):
self.attentionMode = mode_on
# This method sets up the computational graph for the cell. It implements the actual logic
# that the model follows.
def call(self, inputs, states, constants):
# Separate the state list into the two discrete state vectors.
# ytm is the "memory state", stm is the "carry state".
ytm, stm = states
# We will use the "carry state" to guide the attention mechanism. Repeat it across all
# input timesteps to perform some calculations on it.
stm_repeated = K.repeat(self.dense_state(stm), self.timesteps)
# Now apply our "dense_transform" operation on the sum of our transformed "carry state"
# and all encoder states. This will squash the resultant sum down to a vector of size
# [batch,timesteps,1]
# Note: Most sources I encounter use tanh for the activation here. I have found with this dataset
# and this model, relu seems to perform better. It makes the attention mechanism far more crisp
# and produces better translation performance, especially with respect to proper sentence termination.
combined_stm_input = self.dense_transform(
keras.activations.relu(stm_repeated + self.input_seq_shaped))
# Performing a softmax generates a log probability for each encoder output to receive attention.
score_vector = keras.activations.softmax(combined_stm_input, 1)
# In this implementation, we grant "partial attention" to each encoder output based on
# it's log probability accumulated above. Other options would be to only give attention
# to the highest probability encoder output or some similar set.
context_vector = K.sum(score_vector * self.input_seq, 1)
# Finally, mutate the input vector. It will now contain the traditional inputs (like the seq2seq
# we trained above) in addition to the attention context vector we calculated earlier in this method.
inputs = K.concatenate([inputs, context_vector])
# Call into the super-class to invoke the LSTM math.
res = super(AttentionLSTMCell, self).call(inputs=inputs, states=states)
# This if statement switches the return value of this method if "attentionMode" is turned on.
return (K.reshape(score_vector, (-1, self.timesteps)), res[1])
return res
# Custom implementation of the Keras LSTM that adds an attention mechanism.
# This is implemented by taking an additional input (using the "constants" of the RNN class into the LSTM: The encoder output vectors across the entire input sequence.
class LSTMWithAttention(RNN):
def __init__(self, units, **kwargs):
cell = AttentionLSTMCell(units=units)
self.units = units
super(LSTMWithAttention, self).__init__(cell, **kwargs)
def build(self, input_shape):
self.input_dim = input_shape[0][-1]
self.timesteps = input_shape[0][-2]
return super(LSTMWithAttention, self).build(input_shape)
# This call is invoked with the entire time sequence. The RNN sub-class is responsible
# for breaking this up into calls into the cell for each step.
# The "constants" variable is the key to our implementation. It was specifically added
# to Keras to accomodate the "attention" mechanism we are implementing.
def call(self, x, constants, **kwargs):
if isinstance(x, list):
self.x_initial = x[0]
self.x_initial = x
# The only difference in the LSTM computational graph really comes from the custom
# LSTM Cell that we utilize.
self.cell._dropout_mask = None
self.cell._recurrent_dropout_mask = None
return super(LSTMWithAttention, self).call(inputs=x, constants=constants, **kwargs)
Code defining encoder and decoder model:
# Encoder Layers
encoder_inputs = Input(shape=(None,len_input), name="attenc_inputs")
encoder = LSTM(units=units, return_sequences=True, return_state=True)
encoder_outputs, state_h, state_c = encoder((encoder_inputs))
encoder_states = [state_h, state_c]
#define inference decoder
encoder_model = Model(encoder_inputs, encoder_states)'atten_enc_model.h5')
# define training decoder
decoder_inputs = Input(shape=(None, n_output))
Attention_dec_lstm = LSTMWithAttention(units=units, return_sequences=True, return_state=True)
# Note that the only real difference here is that we are feeding attenc_outputs to the decoder now.
attdec_lstm_out, _, _ = Attention_dec_lstm(inputs=decoder_inputs,
decoder_dense1 = Dense(units, activation="relu")
decoder_dense2 = Dense(n_output, activation='softmax')
decoder_outputs = decoder_dense2(Dropout(rate=.10)(decoder_dense1(Dropout(rate=.10)(attdec_lstm_out))))
atten_model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
atten_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#Defining inference decoder
state_input_h = Input(shape=(units,), name="state_input_h")
state_input_c = Input(shape=(units,), name="state_input_c")
decoder_states_inputs = [state_input_h, state_input_c]
attenc_seq_out = Input(shape=encoder_outputs.get_shape()[1:], name="attenc_seq_out")
inf_attdec_inputs = Input(shape=(None,n_output), name="inf_attdec_inputs")
attdec_res, attdec_h, attdec_c = Attention_dec_lstm(inputs=inf_attdec_inputs,
decoder_states = [attdec_h, attdec_c]
decoder_model = Model(inputs=[inf_attdec_inputs, state_input_h, state_input_c, attenc_seq_out],
outputs=[attdec_res, attdec_h, attdec_c])
Code for model fit and save:
history =[encoder_input_data, decoder_input_data], decoder_target_data,
Code to load the encoder decoder model with custom Attention layer:
with open('atten_model_lstm.json') as mdl:
json_string =
model = model_from_json(json_string, custom_objects={'AttentionLSTMCell': AttentionLSTMCell, 'LSTMWithAttention': LSTMWithAttention})
This code to load is giving error :
TypeError: int() argument must be a string, a bytes-like object or a number, not 'AttentionLSTMCell'
Here's a solution inspired by the link in my comment:
# serialize model to JSON
atten_model_json = atten_model.to_json()
with open("atten_model.json", "w") as json_file:
# serialize weights to HDF5
print("Saved model to disk")
# Different part of your code or different file
# load json and create model
json_file = open('atten_model.json', 'r')
loaded_model_json =
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
Keras with tf.dataset fails while using tf.train works fine

Summary: according to the documentation, Keras should accept tf.dataset as input (I am using TF version 1.12.0). I can train my model if I manually do the training steps but using on the same model, I get an error I cannot resolve.
Here is a sketch of what I did: my dataset, which is too big to fit in the memory, consists of many files each with different number of rows of (100 features, label). I'd like to use to build my data pipeline:
def data_loader(filename):
'''load a single data file with many rows'''
features, labels = load_hdf5(filename)
return features, labels
def make_dataset(filenames, batch_size):
'''read files one by one, pick individual rows, batch them and repeat'''
dataset =
dataset = # Problem here! See edit for solution
lambda filename: tuple(tf.py_func(data_loader, [filename], [float32, tf.float32])))
dataset = dataset.flat_map(
lambda features, labels:, labels)))
dataset = dataset.batch(batch_size)
dataset = dataset.repeat()
dataset = dataset.prefetch(1000)
return dataset
training_set = make_dataset(training_files, batch_size=_BATCH_SIZE)
I'd like to try a very basic logistic regression model:
inputs = tf.keras.layers.Input(shape=(100,))
outputs = tf.keras.layers.Dense(1, activation='softmax')(inputs)
model = tf.keras.Model(inputs, outputs)
If I train it manually everything works fine, e.g.:
labels = tf.placeholder(tf.float32)
loss = tf.reduce_mean(tf.keras.backend.categorical_crossentropy(labels, outputs))
train_step = tf.train.GradientDescentOptimizer(.05).minimize(loss)
iterator = training_set.make_one_shot_iterator()
next_element = iterator.get_next()
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
for i in range(training_size // _BATCH_SIZE):
x, y ={inputs: x, labels: y})
However, if I instead try to use like this:
model.compile('adam', 'categorical_crossentropy', metrics=['acc']),
steps_per_epoch=training_size // _BATCH_SIZE,
I get an error message ValueError: Cannot take the length of Shape with unknown rank. inside the keras'es _standardize_user_data function.
I have tried quite a few things but could not resolve the issue. Any ideas?
Edit: based on #kvish's answer, the solution was to change the map from a lambda to a function that would specify the correct tensor dimensions, e.g.:
def data_loader(filename):
def loader_impl(filename):
features, labels, _ = load_hdf5(filename)
return features, labels
features, labels = tf.py_func(loader_impl, [filename], [tf.float32, tf.float32])
features.set_shape((None, 100))
labels.set_shape((None, 1))
return features, labels
and now, all needed to do is to call this function from map:
dataset =
Probably tf.py_func produces an unknown shape which Keras cannot infer. We can set the shape of the tensor returned by it using set_shape(your_shape) method and that would help Keras infer the shape of the result.

Tensorflow Estimator: Cache bottlenecks

When following the tensorflow image classification tutorial, at first it caches the bottleneck of each image:
def: cache_bottlenecks())
I have rewritten the training using tensorflow's Estimator. This really simplified all the code. However I want to cache the bottleneck features here.
Here is my model_fn. I want to cache the results of the dense layer so I can make changes to the actual training without having to compute the bottlenecks each time.
How can I accomplish that?
def model_fn(features, labels, mode, params):
is_training = mode == tf.estimator.ModeKeys.TRAIN
num_classes = len(params['label_vocab'])
module = hub.Module(params['module_spec'], trainable=is_training and params['train_module'])
bottleneck_tensor = module(features['image'])
with tf.name_scope('final_retrain_ops'):
logits = tf.layers.dense(bottleneck_tensor, units=num_classes, trainable=is_training) # save this?
def train_op_fn(loss):
optimizer = tf.train.AdamOptimizer()
return optimizer.minimize(loss, global_step=tf.train.get_global_step())
head = tf.contrib.estimator.multi_class_head(n_classes=num_classes, label_vocabulary=params['label_vocab'])
return head.create_estimator_spec(
features, mode, logits, labels, train_op_fn=train_op_fn
TF cannot work as you code. You should:
Export bottleneck to file from the raw net.
Use bottleneck result as input, use another net to train your data.
To expand on what #Feng said:
see TFRecords and TFExamples and Load Images
Something like this should work (untested):
# Serialize the data into two tfrecord files
feature_extractor = ...
features_file = tf.python_io.TFRecordWriter('features.tfrec')
label_file = tf.python_io.TFRecordWriter('labels.tfrec')
for images, labels in dataset:
features = feature_extractor(images)
# Parse the files and zip them together
def parse(type, shape):
_def parse(x):
result = tf.parse_tensor(x, out_type=shape)
result = tf.reshape(result, FEATURE_SHAPE)
return result
return parse
features_ds ='features.tfrec')
features_ds =, FEATURE_SHAPE), num_parallel_calls=AUTOTUNE)
labels_ds ='labels.tfrec')
labels_ds =, FEATURE_SHAPE), num_parallel_calls=AUTOTUNE)
ds =, labels_ds)
ds = ds.unbatch().shuffle().repeat().batch().prefetch()...
Determining input nodes when freezing Tensorflow graphs using

I'm using Tensorflow API as my input pipeline as follows:
train_dataset =,trn_y))
train_dataset =,num_parallel_calls=12)
train_dataset =
train_dataset = train_dataset.apply(
train_dataset = train_dataset.prefetch(buffer_size=600)
val_dataset =,val_y))
val_dataset =,num_parallel_calls=4)
val_dataset = val_dataset.repeat(1)
val_dataset = val_dataset.apply(
val_dataset = val_dataset.prefetch(buffer_size=200)
handle = tf.placeholder(tf.string, shape=[])
iterator =
handle, train_dataset.output_types,
images,labels = iterator.get_next()
train_iter = train_dataset.make_initializable_iterator()
val_iter = val_dataset.make_initializable_iterator()
Then use this code to switch between training and validation datasets:
# Define training and validation handlers
training_handle =
validation_handle =
loss =[train_op],feed_dict={handle:training_handle,
After training, I save weights, and then freeze the graph from a saved checkpoint((.meta) into the .pb format. Subsequently, run the tool provided in the tensorflow repo. This script requires the input_nodes_names to be defined. I am unable to determine which is the correct input node for the graph. Here are nodes for my graph:
The output nodes can be easily determined, but not the input nodes.
handle = tf.placeholder(tf.string, shape=[]) is your input, so the tensor is most likely 'Placeholder:0'.
However it would make more sense to write:
handle = tf.placeholder(tf.string, shape=[], name="input_placeholder")
