Target array shape different to expected output using Tensorflow - python

I'm trying to make a CNN (still a beginner). When trying to fit the model I am getting this error:
ValueError: A target array with shape (10000, 10) was passed for output of shape (None, 6, 6, 10) while using as loss categorical_crossentropy. This loss expects targets to have the same shape as the output.
The shape of labels = (10000, 10)
the shape of the image data = (10000, 32, 32, 3)
Code:
import pickle
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Dense, Dropout, Activation, Flatten,
Conv2D, MaxPooling2D)
from tensorflow.keras.callbacks import TensorBoard
from keras.utils import to_categorical
import numpy as np
import time
MODEL_NAME = f"_________{int(time.time())}"
BATCH_SIZE = 64
class ConvolutionalNetwork():
'''
A convolutional neural network to be used to classify images
from the CIFAR-10 dataset.
'''
def __init__(self):
'''
self.training_images -- a 10000x3072 numpy array of uint8s. Each
a row of the array stores a 32x32 colour image.
The first 1024 entries contain the red channel
values, the next 1024 the green, and the final
1024 the blue. The image is stored in row-major
order, so that the first 32 entries of the array are the red channel values of the first row of the image.
self.training_labels -- a list of 10000 numbers in the range 0-9.
The number at index I indicates the label
of the ith image in the array data.
'''
# List of image categories
self.label_names = (self.unpickle("cifar-10-batches-py/batches.meta",
encoding='utf-8')['label_names'])
self.training_data = self.unpickle("cifar-10-batches-py/data_batch_1")
self.training_images = self.training_data[b'data']
self.training_labels = self.training_data[b'labels']
# Reshaping the images + scaling
self.shape_images()
# Converts labels to one-hot
self.training_labels = np.array(to_categorical(self.training_labels))
self.create_model()
self.tensorboard = TensorBoard(log_dir=f'logs/{MODEL_NAME}')
def unpickle(self, file, encoding='bytes'):
'''
Unpickles the dataset files.
'''
with open(file, 'rb') as fo:
training_dict = pickle.load(fo, encoding=encoding)
return training_dict
def shape_images(self):
'''
Reshapes the images and scales by 255.
'''
images = list()
for d in self.training_images:
image = np.zeros((32,32,3), dtype=np.uint8)
image[...,0] = np.reshape(d[:1024], (32,32)) # Red channel
image[...,1] = np.reshape(d[1024:2048], (32,32)) # Green channel
image[...,2] = np.reshape(d[2048:], (32,32)) # Blue channel
images.append(image)
for i in range(len(images)):
images[i] = images[i]/255
images = np.array(images)
self.training_images = images
print(self.training_images.shape)
def create_model(self):
'''
Creating the ConvNet model.
'''
self.model = Sequential()
self.model.add(Conv2D(64, (3, 3), input_shape=self.training_images.shape[1:]))
self.model.add(Activation("relu"))
self.model.add(MaxPooling2D(pool_size=(2,2)))
self.model.add(Conv2D(64, (3,3)))
self.model.add(Activation("relu"))
self.model.add(MaxPooling2D(pool_size=(2,2)))
# self.model.add(Flatten())
# self.model.add(Dense(64))
# self.model.add(Activation('relu'))
self.model.add(Dense(10))
self.model.add(Activation(activation='softmax'))
self.model.compile(loss="categorical_crossentropy", optimizer="adam",
metrics=['accuracy'])
def train(self):
'''
Fits the model.
'''
print(self.training_images.shape)
print(self.training_labels.shape)
self.model.fit(self.training_images, self.training_labels, batch_size=BATCH_SIZE,
validation_split=0.1, epochs=5, callbacks=[self.tensorboard])
network = ConvolutionalNetwork()
network.train()
Would appreciate the help, have been trying to fix for an hour.

You need to uncomment the Flatten layer when creating your model. Essentially what this layer does is that it takes a 4D input (batch_size, height, width, num_filters) and unrolls it into a 2D one (batch_size, height * width * num_filters). This is needed to get the output shape you want.

Un-comment the flatten layer before your output layer in create_model(self), conv layers don't work with 1D tensors/arrays, and so for you to get the output layer of the right shape to add a Flatten() layer right before your output layer, like this:
def create_model(self):
'''
Creating the ConvNet model.
'''
self.model = Sequential()
self.model.add(Conv2D(64, (3, 3), input_shape=self.training_images.shape[1:]), activation='relu')
#self.model.add(Activation("relu"))
self.model.add(MaxPooling2D(pool_size=(2,2)))
self.model.add(Conv2D(64, (3,3), activation='relu'))
#self.model.add(Activation("relu"))
self.model.add(MaxPooling2D(pool_size=(2,2)))
# self.model.add(Dense(64))
# self.model.add(Activation('relu'))
self.model.add(Flatten())
self.model.add(Dense(10, activation='softmax'))
#self.model.add(Activation(activation='softmax'))
self.model.compile(loss="categorical_crossentropy", optimizer="adam",
metrics=['accuracy'])
print ('model output shape:', self.model.output_shape)#prints out the output shape of your model
The code above will give you a model with an output shape of (None, 10).
Also please use activation as a layer parameter in the future.

Use model.summary() to inspect the output shapes of your model. Without the commented out Flatten() layer the shapes of your layers retain the original dimensions of the image and the shape of the output layer is (None, 6, 6, 10).
What you want to do here is roughly:
start with a shape of (batch_size, img width, img heigh, channels)
use convolutions to detect patterns through the image by applying a filter
reduce the img width and height with max pooling
then Flatten() the dimensions of the image so that instead of (width, heigh, features) you end up with just a set of features.
match against your classes.
The commented out code does step 4; when you remove the Flatten() layer you end up with the wrong set of dimensions at the end.

You have to get your model output into the same shape as your labels.
Perhaps the simplest solution would be to ensure the model ends with these layers:
model.add(Flatten())
## possibly an extra dense layer or 2 with 'relu' activation
model.add(Dense(10, activation=`softmax`))
This is amongst the most common 'endings' to a categorisation model and is arguably the most straightforward to understand.
It's not clear why you commented out this section:
# self.model.add(Flatten())
# self.model.add(Dense(64))
# self.model.add(Activation('relu'))
which would appear to give you the required output shape?

Related

How to treat batch of inputs in keras?

I have a CNN trained on input of shape (None,128,128,1) (grayscale images). I want to use this model weights as basis in another model to extract features (without training the first model) which accepts two inputs of shape [(None,128,128,10), (None,10,8)] here 10 is the number of images in single sample.
Basically it accepts 10 images of shape (128,128). I considered each image as channel since Conv layer in keras accepts 4D input.
My model would look something like this
def create_model():
trajectory_input = Input(shape=(10, 8), name='trajectory_input')
image_input = Input(shape=(128, 128, 10), name='image_input')
x_aware = (Conv2D(32, kernel_size=(3, 3), weights=model1_weights, activation='relu'))(image_input)
x = concatenate([trajectory_input , x_aware])
x_reg = (Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.001)))(x)
model = Model(inputs=[trajectory_input, image_input], outputs=[x_reg])
Is there any way in keras to treat this single sample as a batch of images instead of one single sample while extracting features from model1 (here x_aware layer)?
Assuming you want a [None, 126, 126, 32, 10] sized tensor at the end, you'll need to define a custom layer for this.
class CustomLayer(tf.keras.layers.Layer):
def __init__(self, conv_layer):
super(CustomLayer, self).__init__()
self.conv_layer = conv_layer
def build(self, input_shape):
pass
def call(self, x):
return tf.stack([self.conv_layer(tf.expand_dims(t, axis=-1)) for t in tf.unstack(x, axis=-1)], axis=-1)
and call it as,
x_aware = CustomLayer(conv_layer)(image_input)

Reshape Tensorflow model batch dimension into time series

I'm trying to reshape a Tensorflow model's input along the batch dimension. I want to combine some of the batch samples into a time-series so I can feed it into an LSTM layer.
Specifically, I have 1024 samples and I'd like to put them into groups of 64 timesteps with the result being 16 batches of 64 timesteps, each timestep having the original 24 features.
#input tensor is (1024, 24)
inputLayer = Input(shape=(24,))
#I want it to be (16, 64, 24)
reshapedLayer = layers.Reshape([64, 24])(inputLayer)
lstmLayer = layers.LSTM(128, activation='relu')(reshapedLayer)
This compiles but throws a runtime error
tensorflow.python.framework.errors_impl.InvalidArgumentError:
Input to reshape is a tensor with 24576 values, but the requested shape has 1572864
I understand what the error is telling me, but I'm not sure the right way to go about fixing it.
Perhaps this could work for you:
import tensorflow as tf
inputs = tf.keras.layers.Input(shape=(24,))
x = tf.reshape(inputs, (16, 64, 24))
x = tf.keras.layers.LSTM(128, activation='relu')(x)
model = tf.keras.Model(inputs=inputs, outputs=x)
# dummy data
inputs = tf.random.uniform(shape=(1024, 24))
outputs = model(inputs)
Replacing the Reshape layer with tf.reshape.

Is there a way in which I can turn the pixel values of an image into a 4D array (including the 1 channel) instead of a 3D array for Keras?

I made a CNN in scratch from python (or near enough) using a homemade gradient descent algorithm. It's because of this last point that the accuracy was only 50%. So I'm using keras to get access to the 'adam' optimiser.
The use of model.fit apparently requires an array with the number of images, the two dimensions and also the number of channels. In the following code, I'm not extracting the channels of the image, and therefore I only have 3 dimensions to the array and it doesn't work. How do I add the 4th (channel) dimension? I am using the following code atm.
import numpy as np
#%tensorflow_version 1.x
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
import glob
from PIL import Image
#!unzip not_dog4
images = []
image_data = []
for filename in glob.glob('not_dog/*.jpg'):
im = Image.open(filename)
images.append(im)
for image in images:
images2 = image.resize((28, 28))
gs_image = images2.convert(mode='L')
image_vector = np.array(gs_image)
image_data.append(image_vector)
image_data = np.array(image_data)
image_data_normalised = []
image_data = image_data.astype('float32')
image_data_normalised = image_data / 255 - 0.5
y = [1]
filter_size = 3
pool_filter_size = 2
model = Sequential()
model.add(Conv2D(num_filters, filter_size, strides=(1, 1), input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='valid'))
model.add(Flatten())
model.add(Dense(1, activation = 'softmax'))
#compile the model
model.compile('adam', loss = 'categorical_crossentropy')
model.fit(image_data_normalised, y, epochs=3)
The error specifically is: "Error when checking input: expected conv2d_6_input to have 4 dimensions, but got array with shape (120, 28, 28)"
That's 120 images, each 28 by 28 but it needs to be (120, 28, 28, 1) and specify that I have the one channel. How do I achieve this?
On a side note, the next part is the answer, or y. I am using two classes, to make a hot dog, not hot dog sort of thing. The answer to all the images is therefore one, or a probability of 1 for my y? Therefore can I just put 1 in an array and it will calculate the loss for the optimiser based on the current probability and the wanted probability, (1), like I have done?
You can unsqueeze another dimension like this:
image_data = np.array(image_data)[..., None]

Keras input_tensor shape for transfer learning

I am running a CNN for classification of medical scans using Keras and transfer learning with imagenet and InceptionV3. I am building the model with some practice data of size X_train = (624, 128, 128, 1) and Y_train = (624, 2).
I am trying to resize the input_tensor to suit the shape of my images (128 x 128 x 1) using the below code.
input_tensor = Input(shape=(128, 128, 1))
base_model = InceptionV3(input_tensor=input_tensor,weights='imagenet',include_top=False)
Doing this I get a value error:
ValueError: Dimension 0 in both shapes must be equal, but are 3 and 32. Shapes
are [3,3,1,32] and [32,3,3,3]. for 'Assign_753' (op: 'Assign') with input
shapes: [3,3,1,32], [32,3,3,3]
Is there a way to allow this model to accept my images in their format?
Edit:
For what its worth, here is the code to generate the training data.
X = []
Y = []
for subj, subj_slice in slices.items():
# X.extend([s[:, :, np.newaxis, np.newaxis] for s in slice])
subj_slice_norm = [((imageArray - np.min(imageArray)) / np.ptp(imageArray)) for imageArray in subj_slice]
X.extend([s[ :, :, np.newaxis] for s in subj_slice_norm])
subj_status = labels_df['deadstatus.event'][labels_df['PatientID'] == subj]
subj_status = np.asanyarray(subj_status)
#print(subj_status)
Y.extend([subj_status] * len(subj_slice))
X = np.stack(X, axis=0)
Y = to_categorical(np.stack(Y, axis=0))]
n_samp_train = int(X.shape[0]*0.8)
X_train, Y_train = X[:n_samp_train], Y[:n_samp_train]
Edit2:
I think the other alternative would be to take my X which is shape (780, 128, 128, 1), clone each of the 780 images and append two as dummies. Is this possible? Resulting in (780, 128, 128, 3).
We can use the existing keras layers to convert the existing image shape to the expected shape for the pre-trained model rather than using the numpy for replicating channels. As replicating channels before training may consume 3x the memory, but integrating this processing at runtime will save up a lot of memory.
You can proceed this way.
Step 1: Create a Keras Model that converts your input images to the shape that can be fed as the input for the base_model as follows:
from keras.models import Model
from keras.layers import RepeatVector, Input, Reshape
inputs = Input(shape=(128, 128, 1))
reshaped1 = Reshape(target_shape=((128 * 128 * 1,)))(inputs)
repeated = RepeatVector(n=3)(reshaped1)
reshaped2 = Reshape(target_shape=(3, 128, 128))(repeated)
input_model = Model(inputs=inputs, outputs=reshaped2)
Step 2: Define pre-trained model InceptionV3 as follows:
base_model = InceptionV3(input_tensor=input_model.output, weights='imagenet', include_top=False)
Step 3: Combine both the models as follows:
combined_model = Model(inputs=input_model.input, outputs=base_model.output)
The advantage of this method is that the keras model itself will take care of the image processing stuff like channel replication at runtime. Thus, we need not replicate the image channels by ourselves with numpy and the results will be memory efficient.

ValueError: Error when checking input: expected time_distributed_46_input to have 5 dimensions, but got array with shape (200, 200, 3)

I am tinkering with the the Timedistributed layer and having a hard time. I am trying to create a very simple model that takes a 200 x 200 rgb image and read the character written on it.
I keep getting the following error and I'm not sure how to fix it:
ValueError: Error when checking input: expected time_distributed_46_input to have 5 dimensions, but got array with shape (200, 200, 3)
Here is my keras code:
num_timesteps = len(chars) # length of sequence
img_width = 200
img_height = 200
img_channels = 3
def model():
# define CNN model
cnn = Sequential()
cnn.add(Conv2D(64, (3,3), activation='relu', padding='same', input_shape=(img_width,img_height,img_channels)))
cnn.add(MaxPooling2D(pool_size=(3, 3)))
cnn.add(Flatten())
# define LSTM model
model = Sequential()
model.add(TimeDistributed(cnn, input_shape=(num_timesteps, img_width,img_height,img_channels)))
model.add(LSTM(num_timesteps))
model.add(Dense(26))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
I then fit the model like so:
model().fit_generator(generator=images_generator(), steps_per_epoch=20, epochs=2)
where I generate the images like so:
def image_sample():
rand_str = random_str()
blank=Image.new("RGB", (200,200),(255,255,255))
font = ImageFont.truetype("StatePlate.ttf", 100)
draw = ImageDraw.Draw(blank)
draw.text((30, 40),rand_str,(0,0,0), font=font)
draw = ImageDraw.Draw(blank)
# datagen = ImageDataGenerator(rotation_range=90)
# datagen.fit(blank)
return (np.asarray(blank), one_hot_char(rand_str))
def one_hot_char(char):
zeros = np.zeros(len(chars))
zeros[chars.index(char)] = 1
return zeros
def images_generator():
yield image_sample()
Any help is appreciated! Thanks.
Currently, a single image is returned by the generator. The input generated by the generator should have shape:[batch_size, num_timesteps, img_width, img_height, img_channels].
A quick fix for this dummy data would be changing np.asarray(blank) to np.asarray([[blank]]).

Categories

Resources