Value Error: Input shape error in Keras Image captioning

Value Error: Input shape error in Keras Image captioning - python

I'm new to Machine Learning and I've been working on this tutorial for sometime and it needs more than 45GB of RAM to run. So I've tried progressive Loading from this tutorial.
Here is the error what i'm getting
ValueError: Error when checking input: expected input_1 to have 2 dimensions, but got array with shape (13, 224, 224, 3)
here the model function
# define the captioning model
def define_model(vocab_size, max_length):
# feature extractor model
inputs1 = Input(shape=(4096,))
fe1 = Dropout(0.5)(inputs1)
fe2 = Dense(256, activation='relu')(fe1)
# sequence model
inputs2 = Input(shape=(max_length,))
se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
se2 = Dropout(0.5)(se1)
se3 = LSTM(256)(se2)
# decoder model
decoder1 = add([fe2, se3])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)
# tie it together [image, seq] [word]
model = Model(inputs=[inputs1, inputs2], outputs=outputs)
model.compile(loss='categorical_crossentropy', optimizer='adam')
# summarize model
print(model.summary())
plot_model(model, to_file='model.png', show_shapes=True)
return model
loading_photo function
# load a single photo intended as input for the VGG feature extractor model
def load_photo(filename):
image = load_img(filename, target_size=(224, 224))
# convert the image pixels to a numpy array
image = img_to_array(image)
# reshape data for the model
image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
# prepare the image for the VGG model
image = preprocess_input(image)[0]
# get image id
image_id = filename.split('/')[-1].split('.')[0]
return image, image_id
create_sequences and data_generator functions
# create sequences of images, input sequences and output words for an image
def create_sequences(tokenizer, max_length, desc, image):
Ximages, XSeq, y = list(), list(),list()
vocab_size = len(tokenizer.word_index) + 1
# integer encode the description
seq = tokenizer.texts_to_sequences([desc])[0]
# split one sequence into multiple X,y pairs
for i in range(1, len(seq)):
# select
in_seq, out_seq = seq[:i], seq[i]
# pad input sequence
in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
# encode output sequence
out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
# store
Ximages.append(image)
XSeq.append(in_seq)
y.append(out_seq)
Ximages, XSeq, y = array(Ximages), array(XSeq), array(y)
return [Ximages, XSeq, y]
# data generator, intended to be used in a call to model.fit_generator()
def data_generator(descriptions, tokenizer, max_length):
# loop for ever over images
directory = 'Flicker8k_Dataset'
while 1:
for name in listdir(directory):
# load an image from file
filename = directory + '/' + name
image, image_id = load_photo(filename)
# create word sequences
desc = descriptions[image_id]
in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc, image)
yield [[in_img, in_seq], out_word]
and finally
model = define_model(vocab_size, max_length)
# define checkpoint callback
filepath = 'model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
# fit model
model.fit_generator(data_generator(descriptions, tokenizer, max_length), steps_per_epoch=70000)

The error says that although the input data consists of 13 images of (224, 224, 3) shape, the input layer for the feature extractor model accepts a 4096-length vector.
To fix the problem, first you must reshape the images to 1D in def load_photo(filename):
image = image.reshape((1, 224*224*3))
Then in def define_model(vocab_size, max_length):,
inputs1 = Input(shape=(224*224*3, ))

Related

INVALID_ARGUMENT: Expected dimension in the range [0, 0), but got 0 [[{{node ArgMax}}]] [[IteratorGetNext]]

I'm facing an issue where using my own data (jpeg image) feed from my own local directory, I get this error when running the model.
(0) INVALID_ARGUMENT: Expected dimension in the range [0, 0), but got 0
[[{{node ArgMax}}]]
[[IteratorGetNext]]
[[IteratorGetNext/_1965]]
(1) INVALID_ARGUMENT: Expected dimension in the range [0, 0), but got 0
[[{{node ArgMax}}]]
[[IteratorGetNext]]
However, if I run it with tensorflow_datasets as input, I was able to get it to run no issues at all.
I wonder what is the reason behind it?
Background is, I'm running this on a HPC system.
Code snippets:
Input function from my own function:
list_ds = tf.data.Dataset.list_files(str(data_dir+'/*/*'), shuffle=False)
class_names = np.array(sorted([item.split('/')[-1] for item in glob.glob(data_dir + '/*')]))
val_size = int(image_count * 0.2)
def get_label(file_path):
# Convert the path to a list of path components
parts = tf.strings.split(file_path, os.path.sep)
one_hot = parts[-2] == class_names
one_hot=tf.cast(one_hot, tf.int32)
return tf.argmax(one_hot)
# return one_hot
def decode_img(img):
# Convert the compressed string to a 3D uint8 tensor
img = tf.io.decode_jpeg(img, channels=3)
img = tf.cast(img, tf.float32)
img = (img/127.5) - 1
# img = tf.keras.applications.mobilenet.preprocess_input(img)
# Resize the image to the desired size
return tf.image.resize(img, [image_param['img_height'], image_param["img_width"]])
def process_path(file_path):
label = get_label(file_path)
# Load the raw data from the file as a string
img = tf.io.read_file(file_path)
img = decode_img(img)
return img, label
ds = ds.map(
process_path,
# num_parallel_calls=num_parallel_calls
# if num_parallel_calls > 0
# else tf.data.experimental.AUTOTUNE,
)
ds = ds.batch(32, drop_remainder=True)
ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
ds = ds.cache()
return ds
Input function using TFDS:
IMG_SIZE = 160 # All images will be resized to 160x160
def preprocess(image, label):
image = tf.cast(image, tf.float32)
image = (image/127.5) - 1
image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
return image, label
def train_input_fn(batch_size):
# data = tfds.load('cats_vs_dogs', as_supervised=True)
data = tfds.load('stanford_dogs',
as_supervised=True,
data_dir='dog_breed')
train_data = data['train']
train_data = train_data.map(preprocess).shuffle(500).batch(batch_size)
return train_data
Model function:
keras_mobilenet_v2 = tf.keras.applications.MobileNetV2(
input_shape=(160, 160, 3), include_top=False)
keras_mobilenet_v2.trainable = False
estimator_model = tf.keras.Sequential([
keras_mobilenet_v2,
tf.keras.layers.GlobalAveragePooling2D(),
tf.keras.layers.Dense(1)
])
# Compile the model
estimator_model.compile(
optimizer=tf.keras.optimizers.legacy.Adam(),
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
est_mobilenet_v2 = tf.keras.estimator.model_to_estimator(keras_model=estimator_model)

How to feed the input images into ensemble of DNN models?

I am trying to ensemble 5 transfer learning pre-trained DNN models (the base models trained on imagenet) using the code below
def define_stacked_model(members):
# update all layers in all models to not be trainable
for i in range(len(members)):
model = members[i]
for layer in model.layers:
# make not trainable
layer.trainable = False
# rename to avoid 'unique layer name' issue
layer._name = 'ensemble_' + str(i+1) + '_' + layer.name
# define multi-headed input
ensemble_visible = [model.input for model in members]
# concatenate merge output from each model
ensemble_outputs = [model.output for model in members]
merge = concatenate(ensemble_outputs)
hidden = tf.keras.layers.Dense(10, activation='relu')(merge)
output = tf.keras.layers.Dense(1)(hidden)
model = tf.keras.Model(inputs=ensemble_visible, outputs=output)
# plot graph of ensemble
plot_model(model, show_shapes=True, to_file='model_graph.png')
# compile
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=["accuracy"])
return model
# define ensemble model
stacked_model = define_stacked_model(members)
stacked_model.summary()
each model accept input image with size 160X160. I read the images using:
IMG_WIDTH=160
IMG_HEIGHT=160
def create_dataset(img_folder):
img_data_array=[]
class_name=[]
for dir1 in os.listdir(img_folder):
for file in os.listdir(os.path.join(img_folder, dir1)):
image_path= os.path.join(img_folder, dir1, file)
image = cv2.imread( image_path, cv2.IMREAD_COLOR)
image = cv2.resize(image, (IMG_HEIGHT, IMG_WIDTH),interpolation = cv2.INTER_AREA)
image = np.array(image)
image = image.astype('float32')
image /= 255
img_data_array.append(image)
class_name.append(dir1)
return img_data_array, class_name
# extract the image array and class name
img_data, class_name = create_dataset(TRAIN_DIR)
target_dict={k: v for v, k in enumerate(np.unique(class_name))}
target_val= [target_dict[class_name[i]] for i in range(len(class_name))]
target_val = np.array(list(map(int,target_val))[0:300], np.float32)
note: I used 300 images only because of the availability of the memory.
after that I stacked the images using:
stacked_img_data = np.dstack([img_data[0:300],
img_data[0:300],
img_data[0:300],
img_data[0:300],
img_data[0:300]])
and finally trained the ensemble model using:
history = stacked_model.fit(x = stacked_img_data, y = target_val, epochs=10)
I faced this error:
ValueError: Layer model expects 5 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None, 160, 800, 3) dtype=float32>]
I think the above method is close to the correct way. but I am not sure how to solve the issue.

Well you can make this work only by changing this history = stacked_model.fit(x = stacked_img_data, y = target_val, epochs=10) to this
history = stacked_model.fit(x = [img_data[0:300],img_data[0:300],img_data[0:300],img_data[0:300],img_data[0:300]], y = target_val, epochs=10)
That being said you can also do this
input = tf.keras.layers.Input(shape=(160,160,3))
ensemble_outputs = [model(input) for model in members]
.
.
model = tf.keras.Model(inputs=input, outputs=output)
and only pass the image one time

Efficient net incompatible sizes for mixed data

I have some code for a mixed model, one that trains on an efficient net and the rest on some external data that I have combined. The following is an example for the model:
def create_model():
# Define parameters
inputShape = (256,256,3)
inputDim = 8
# define MLP network
model = Sequential()
model.add(Dense(8, input_dim=inputDim, activation="relu"))
model.add(Dense(4, activation="relu"))
cnnModel = Sequential()
cnnModel.add(EfficientNetB5(include_top = False, input_shape=inputShape))
cnnModel.add(Flatten())
cnnModel.add(Dense(units = 16, activation='relu'))
cnnModel.add(Dense(units = 4, activation='relu'))
# Concatenate them
fullModel = concatenate([cnnModel.output,model.output])
fullModel = Dense(4, activation="relu")(fullModel)
fullModel = Dense(1, activation="sigmoid")(fullModel)
model = Model(inputs=[cnnModel.input,model.input], outputs=fullModel)
return model
However, when I run this through the fit_generator function I recieve the following error:
batch_size = 16
train_steps = TrainData.shape[0]//batch_size
valid_steps = TrainData.shape[0]//batch_size
model = create_model()
opt = Adam(lr=1e-3, decay=1e-3 / 200)
model.compile(loss="binary_crossentropy", optimizer=opt)
print("[INFO] training model...")
model.fit_generator(
train_dl,
epochs=3,
steps_per_epoch = train_steps
)
model.save("models/final_model")
InvalidArgumentError: Incompatible shapes: [16,3,256,256] vs. [1,1,1,3]
[[node model_47/efficientnetb5/normalization_52/sub (defined at <ipython-input-262-76be6a4af4a4>:11) ]] [Op:__inference_train_function_1072272]
I'm unsure where this error is coming from, either in the data loader or in the efficient net. Any ideas?
Edit to include data loader:
def data_generator(image_dir, dataframe, min_max, binary, category, transforms = None, batch_size = 16):
i = 0
samples_per_epoch = dataframe.shape[0]
number_of_batches = samples_per_epoch/batch_size
while True:
batch = {'images': [], 'data': [], 'labels': []} # use a dict for multiple inputs
# Randomly sample images in dataframe
idx = i
img_path = f"{image_dir}/{dataframe.iloc[idx]['image_name']}.jpg"
img = Image.open(img_path)
if transforms:
img = transforms(**{"image": np.array(img)})["image"]
img = np.asarray( img, dtype="int32" )
# make data into tensors
dataframe2 = dataframe.iloc[idx]
data_cont = min_max.transform(np.array(dataframe2['age_approx']).reshape(1, -1))
data_bina = binary.transform(dataframe2['sex'])
data_cate = category.transform(dataframe2['anatom_site_general_challenge'])
data_total = np.concatenate((data_cont, data_bina, data_cate), axis = 1)
label = dataframe2['target']
batch['images'].append(img)
batch['data'].append(data_total)
batch['labels'].append(label)
batch['images'] = np.array(batch['images']) # convert each list to array
batch['data'] = np.array(batch_x['data'])
batch['labels'] = np.array(batch['labels'])
i += 1
if counter >= number_of_batches:
counter = 0
yield [batch['images'], batch['data']], batch['labels']
def get_data(train_df, valid_df, train_tfms, test_tfms, batch_size, min_max, binary, category):
train_dl = data_generator(image_dir='train/', dataframe = train_df, batch_size = batch_size, min_max = min_max, binary = binary, category = category, transforms = train_tfms)
valid_dl = data_generator(image_dir='train/', dataframe = valid_df, batch_size = batch_size*2, min_max = min_max, binary = binary, category = category, transforms = test_tfms)
return train_dl, valid_dl
I seem to have the same issue when I just used the images and the efficient net. It seems like using the Keras inbuilt image data loader functions is the only way I can get it to work (with just images).

How to measure perplexity in tensorflow?

I am using this tutorial code:
import tensorflow as tf
import numpy as np
import os
import time
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print('Length of text: {} characters'.format(len(text)))
# Take a look at the first 250 characters in text
print(text[:250])
# The unique characters in the file
vocab = sorted(set(text))
print('{} unique characters'.format(len(vocab)))
# Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)
text_as_int = np.array([char2idx[c] for c in text])
print('{')
for char,_ in zip(char2idx, range(20)):
print(' {:4s}: {:3d},'.format(repr(char), char2idx[char]))
print(' ...\n}')
# Show how the first 13 characters from the text are mapped to integers
print('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))
# The maximum length sentence you want for a single input in characters
# seq_length = 100
seq_length = 50
examples_per_epoch = len(text)//(seq_length+1)
# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
for i in char_dataset.take(5):
print(idx2char[i.numpy()])
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)
for item in sequences.take(5):
print(repr(''.join(idx2char[item.numpy()])))
def split_input_target(chunk):
input_text = chunk[:-1]
target_text = chunk[1:]
return input_text, target_text
dataset = sequences.map(split_input_target)
for input_example, target_example in dataset.take(1):
print('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
print('Target data:', repr(''.join(idx2char[target_example.numpy()])))
for i, (input_idx, target_idx) in enumerate(zip(input_example[:5], target_example[:5])):
print("Step {:4d}".format(i))
print(" input: {} ({:s})".format(input_idx, repr(idx2char[input_idx])))
print(" expected output: {} ({:s})".format(target_idx, repr(idx2char[target_idx])))
# Batch size
BATCH_SIZE = 64
# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)
dataset
# Length of the vocabulary in chars
vocab_size = len(vocab)
# The embedding dimension
embedding_dim = 128
#embedding_dim = 256
# Number of RNN units
rnn_units = 128
#rnn_units = 1024
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim,
batch_input_shape=[batch_size, None]),
tf.keras.layers.GRU(rnn_units,
return_sequences=True,
stateful=True,
recurrent_initializer='glorot_uniform'),
tf.keras.layers.GRU(rnn_units,
return_sequences=True,
stateful=True,
recurrent_initializer='glorot_uniform'),
tf.keras.layers.Dense(vocab_size)
])
return model
model = build_model(
vocab_size=len(vocab),
embedding_dim=embedding_dim,
rnn_units=rnn_units,
batch_size=BATCH_SIZE)
for input_example_batch, target_example_batch in dataset.take(1):
example_batch_predictions = model(input_example_batch)
print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
model.summary()
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()
sampled_indices
print("Input: \n", repr("".join(idx2char[input_example_batch[0].numpy()])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))
def loss(labels, logits):
return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
example_batch_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss: ", example_batch_loss.numpy().mean())
model.compile(optimizer='adam', loss=loss)
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_prefix,
save_weights_only=True)
#EPOCHS = 30
EPOCHS = 10
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])
tf.train.latest_checkpoint(checkpoint_dir)
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))
model.summary()
I have set the parameters so it runs quickly for testing. I would like to measure the perplexity of the model, say on the training set itself or some other test text. How can I do that?
To make the question completely self-contained, given the model made above, how would you compute the perplexity of the string "where"?

The solution can be found in this keras issue: https://github.com/keras-team/keras/issues/8267
def loss(labels, logits):
return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
def perplexity(labels, logits):
cross_entropy = loss(labels, logits)
perplexity = tf.keras.backend.exp(cross_entropy)
return perplexity

The last dimension of the inputs to `Dense` should be defined. Found `None`

I'm very new to tensorflow and trying to make a simple binary classifier for my own image set. They are all grayscale 226x226 PNG images. I keep getting the error "ValueError: The last dimension of the inputs to Dense should be defined. Found None". I have been stuck on this for days and can't seem to shape my model/dataset in a way that works. Can someone help a newb out? Any possibly relevant code should be below. Thanks in advance.
##img parser
def _parse_function(filename, label):
image_string = tf.read_file(filename)
image_decoded = tf.image.decode_png(image_string)
image_decoded = tf.image.resize_images(image_decoded,[226,226])
return image_decoded, label
#img processor function
#input: dir
#output: dataset
def imgPrcs(dir):
labelArr = [];
filenames = [];
src = dir;
for fname in os.listdir(src):
png = os.path.join(src, fname);
filenames.append(png);
if os.path.isfile(png):
#extract label
with open(png, 'rb') as fobj:
data = fobj.read()
data_arr = [];
for chunk_type, chunk_data in chunk_iter(data):
if chunk_type == b'iTXt':
data_arr.append(chunk_data.decode());
label = int(data_arr[1][-1:]);
#add label
labelArr.append(label);
labels = tf.constant(labelArr)
filename_q = tf.constant(filenames)
dataset = tf.data.Dataset.from_tensor_slices((filename_q, labels))
dataset = dataset.map(_parse_function)
#return variables
return dataset;
#create labels and datasets
print('Compiling images and labels...\n');
trainData = imgPrcs('./train/');
testData = imgPrcs('./test/');
valData = imgPrcs('./validate/');
#Create Model
print('Creating Model...\n');
model = keras.Sequential([
keras.layers.Flatten(input_shape=(226, 226, None)),
keras.layers.Dense(128, kernel_initializer='normal', activation='relu'),
keras.layers.Dense(1,kernel_initializer='normal', activation='sigmoid')
])
print('compile...\n')
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']);
print('train..\n')
#Train Model
model.fit(trainData.make_one_shot_iterator(), epochs=5, steps_per_epoch=385)
print('test')
#Test Model
test_loss, test_acc = model.evaluate(testData.make_one_shot_iterator());
print('Test accuracy:', test_acc);

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Value Error: Input shape error in Keras Image captioning - python

Related

INVALID_ARGUMENT: Expected dimension in the range [0, 0), but got 0 [[{{node ArgMax}}]] [[IteratorGetNext]]

How to feed the input images into ensemble of DNN models?

Efficient net incompatible sizes for mixed data

How to measure perplexity in tensorflow?

The last dimension of the inputs to `Dense` should be defined. Found `None`

Categories

Resources