Cifar100 only has 16 training images and 16 training labels - python

I'm using Tensorflow with Python 3.7, and I am trying to make an image classifier with CIFAR-100. I want to stay away from Keras as much as possible because it only has a limited amount of datasets that I can use. This is my code:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
import PIL.Image as Image
from tensorflow import keras
tf.compat.v1.enable_eager_execution()
shape = (224, 224)
labels = '/home/pi/tf/cifar_labels.txt'
labels = np.array(open(labels).read().splitlines())
img = '/home/pi/tf/lobster.jpeg'
img = Image.open(img).resize(shape)
img = np.array(img)/255.0
img = np.reshape(img, (224, 224, 3))
train = tfds.load(name="cifar100", split="train")
test = tfds.load(name="cifar100", split="test")
train = train.shuffle(1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
test = test.shuffle(1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
for features in train:
train_images, train_labels = features["image"], features["label"]
for features in test:
test_images, test_labels = features["image"], features["label"]
model = keras.Sequential([
keras.layers.Flatten(input_shape=(32, 32, 3)),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(100, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
history = model.fit(train_images, train_labels, epochs=200, verbose=2)
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
print('\nTest accuracy:', test_acc)
I'm guessing that there is something wrong with the for features in train for loop. When I print the len of the training images/labels, I get 16. Due to this, my model is getting a training accuracy of 0% and a loss of 16.1181%. Can anybody help?

To use CIFAR-100 in your keras model directly you should call tfds.load function with as_supervised=True parameter. It will then load the dataset with only 'image' and 'label' keys. You can see that CIFAR-100 dataset contains three keys:
FeaturesDict({
'coarse_label': ClassLabel(shape=(), dtype=tf.int64, num_classes=20),
'image': Image(shape=(32, 32, 3), dtype=tf.uint8),
'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=100),
})
Therefore it cannot be fed into model.fit() directly. With as_supervised set as True, the returned dataset will only contain (u'image', u'label') keys.
To sum up,
import tensorflow_datasets as tfds
from tensorflow import keras
tf.compat.v1.enable_eager_execution()
train= tfds.load(name="cifar100", split="train", as_supervised=True)
test = tfds.load(name="cifar100", split="test", as_supervised=True)
train = train.shuffle(1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
test = test.shuffle(1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
model = keras.Sequential([
keras.layers.Flatten(input_shape=(32, 32, 3)),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(100, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
history = model.fit(train, epochs=200, verbose=1)
test_loss, test_acc = model.evaluate(test, verbose=1)
print('\nTest accuracy:', test_acc)
Note:
To use the dataset without as_supervised set to True, you can use model.train_on_batch function. e.g.
import tensorflow_datasets as tfds
from tensorflow import keras
tf.compat.v1.enable_eager_execution()
train= tfds.load(name="cifar100", split="train")
test = tfds.load(name="cifar100", split="test")
train = train.shuffle(1024).repeat(200).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
test = test.shuffle(1024).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
model = keras.Sequential([
keras.layers.Flatten(input_shape=(32, 32, 3)),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(100, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
for epoch in range(200):
for features in train:
image_batch, label_batch = features["image"], features["label"]
loss, acc = model.train_on_batch(image_batch, label_batch)
for features in test:
image_batch, label_batch = features["image"], features["label"]
loss, acc = model.test_on_batch(image_batch, label_batch)

Related

Why can't I save my model with tensorflow lite?

Once the training is finished, what I need is to save and convert the model to later export it, but I get the following error:
converter = tf.lite.TFLiteConverter.from_keras_model_file('models/modelo.h5')
AttributeError: type object 'TFLiteConverterV2' has no attribute 'from_keras_model_file'
to be honest I found a problem similar to this on the web but it doesn't suit my problem. Also who gives the answer is not very explicit.
here my code:
import tensorflow as tf
from tensorflow import keras
#dataset
mnist = keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
#class myCallback(tf.keras.callbacks.Callback):
# def on_epoch_end(self, epoch, logs={}):
# If you are using Tensorflow 1.x, replace 'accuracy' for 'acc' in the next line
# if(logs.get('accuracy')>0.99):
# print("\nReached 99.0% accuracy so cancelling training!")
# self.model.stop_training = True
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# Train the model
model.fit(x_train,
y_train,
epochs=25,)
# callbacks=[myCallback()])
# Evaluate the model
model.evaluate(x_test, y_test)
# Save the model
model.save('models/modelo.h5')
# Convert the model.
converter = tf.lite.TFLiteConverter.from_keras_model_file('models/modelo.h5')
tflite_model = converter.convert()
open("models/converted_mnist_model.tflite", "wb").write(tflite_model)

how do i use k-fold with flow_from_directory

It's a school projet.
I have split my dataset using Datagen
after compiling and fitting my models i want to apply the K Fold cross validation or use k-fold with flow_from_directory
from tensorflow import keras
# Forming datasets
datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1/255, validation_split=0.3)
# Training and validation dataset
train1 = datagen.flow_from_directory('C:/Users/hamza/Desktop/kkk/TrainD', target_size=(224,224), subset='training')
val = datagen.flow_from_directory('C:/Users/hamza/Desktop/kkk/TrainD', target_size=(224,224), subset='validation')
# Test dataset for evaluation
datagen2 = keras.preprocessing.image.ImageDataGenerator(rescale=1/255)
test = datagen2.flow_from_directory('C:/Users/hamza/Desktop/kkk/TestD')
from keras.layers import Dense,GlobalMaxPool2D,Dropout
from keras.models import Model
input_shape = (224,224,3)
# Function to initialize model (ResNet152V2)
base_model = keras.applications.MobileNetV2(input_shape=input_shape,
include_top=False
)
base_model.trainable = False
x = base_model.output
x = GlobalMaxPool2D()(x)
x = Dense(1024, activation='relu')(x)
pred = Dense(3, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=pred)
model.summary()
from keras.optimizers import SGD
# Model Compiling
model.compile(loss='categorical_crossentropy', optimizer= SGD(lr=0.01, momentum=0.9), metrics='accuracy')
# Model Fitting
history=model.fit(train1, batch_size=32, epochs=20, validation_data=val)

Accuracy is zero for cifar10 dataset with Keras Sequential Model

My accuracy is zero for all the 15 epochs in spite of using multiple Conv2D and Max Pooling Layers. I am using ImageDataGenerator for Data Augmentation.
Complete code is given below:
# importing all the required libraries
import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
# Loading the Data from the in built library
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
# Normalize the Pixel Data
train_images = train_images/255.0
test_images = test_images/255.0
# Instantiate the Image Data Generator Class with the Data Augmentation
datagen = ImageDataGenerator(width_shift_range = 0.2, height_shift_range = 0.2,
rotation_range = 20, horizontal_flip = True,
vertical_flip = True, validation_split = 0.2)
# Apply the Data Augmentation to the Training Images
datagen.fit(train_images)
# Create the Generator for the Training Images
train_gen = datagen.flow(train_images, train_labels, batch_size = 32,
subset = 'training')
# Create the Generator for the Validation Images
val_gen = datagen.flow(train_images, train_labels, batch_size = 8,
subset = 'validation')
num_classes = 10
# One Hot Encoding of Labels using to_categorical
train_labels = to_categorical(train_labels, num_classes)
test_labels = to_categorical(test_labels, num_classes)
img_height = 32
img_width = 32
# Building the Keras Model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)))
model.add(MaxPool2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
#model.add(Dropout(rate = 0.2))
model.add(Dense(units = num_classes, activation = 'softmax'))
model.summary()
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam',
metrics = ['accuracy'])
steps_per_epoch = len(train_images) * 0.8//32
history = model.fit(train_gen, validation_data = val_gen,
steps_per_epoch = steps_per_epoch, epochs = 15)
Your problem is you ran this code
train_gen = datagen.flow(train_images, train_labels, batch_size = 32,
subset = 'training')
# Create the Generator for the Validation Images
val_gen = datagen.flow(train_images, train_labels, batch_size = 8,
subset = 'validation')
but only after this did you convert the labels to categorical. So take the code
num_classes = 10
# One Hot Encoding of Labels using to_categorical
train_labels = to_categorical(train_labels, num_classes)
test_labels = to_categorical(test_labels, num_classes)
and place it PRIOR to the train_gen and val_gen code. On a finer point you have the code
datagen.fit(train_images)
You only need to fit the generator if you have any of the parameters
featurewise_center, samplewise_center, featurewise_std_normalization, or
samplewise_std_normalization set to true.
Transform your label to one hot right before the .flow.
...
# One Hot Encoding of Labels using to_categorical
train_labels = to_categorical(train_labels, num_classes)
test_labels = to_categorical(test_labels, num_classes)
# Create the Generator for the Training Images
train_gen = datagen.flow(train_images, train_labels, batch_size = 32,
subset = 'training')
# Create the Generator for the Validation Images
val_gen = datagen.flow(train_images, train_labels, batch_size = 8,
subset = 'validation')
...

Our training/validation loss curves are great but the testing performance suffers

We are currently working on an image classification task for detecting tuberculosis from chest x-ray images. You can see our code below. We used 0.7 for the train set, 0.2 for the validation set, and 0.1 for the test set. Our training and validation loss is here
But when we try it on our test data set, this is what we got:
Is there something wrong with our code? Thank you in advance.
from tensorflow import keras
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.applications.mobilenet_v2 import preprocess_input
from keras.layers import Dense, Flatten
from keras.models import Sequential
from keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from datetime import datetime, date
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
#Loading a pre-trained model
image_size = 224
base_model = MobileNetV2(input_shape=(image_size,image_size,3), weights='imagenet', include_top=False)
for layer in base_model.layers:
layer.trainable = False
model = Sequential()
model.add(base_model)
model.add(Flatten())
model.add(Dense(1000, activation='relu'))
model.add(Dense(2, activation="sigmoid"))
loss_func = BinaryCrossentropy()
opt = Adam(learning_rate=0.001)
model.compile(loss=loss_func,
optimizer=opt,
metrics=['accuracy'])
#Training
test_path = '...'
val_path = '...'
datagen = ImageDataGenerator(rescale = 1./255,horizontal_flip = True, shear_range = 0.2, zoom_range=0.2)
batch_size=32
validation_size=8
train_set = datagen.flow_from_directory(test_path,
target_size = (image_size, image_size),
batch_size=batch_size,
class_mode = 'categorical')
validation_set = datagen.flow_from_directory(val_path,
target_size = (image_size, image_size),
batch_size=validation_size,
class_mode = 'categorical')
#Fitting the data to the model
model_name = 'MobileNetV2'
date_today= date.today().strftime('%m_%d_%Y')
checkpoint = ModelCheckpoint(filepath=f'Models/{model_name}_{date_today}.h5',
monitor='val_loss',
mode='min',
verbose=1,
save_best_only=True)
model_history = model.fit(train_set,
validation_data=validation_set,
epochs=100,
steps_per_epoch=len(train_set)//batch_size,
validation_steps=len(validation_set)//validation_size,
callbacks=[checkpoint],
verbose=1)
#Testing the model on the test set
test_path = '...'
test_datagen = ImageDataGenerator()
test_set = test_datagen.flow_from_directory(test_path,
target_size = (image_size, image_size),
class_mode = 'categorical')
predictions = model.predict(test_set, verbose=1)
y_pred = np.argmax(predictions, axis=1)
class_labels = list(test_set.class_indices.keys())
print('Classification Report')
clsf = classification_report(test_set.classes, y_pred, target_names=class_labels)
print(clsf)
print('\n')
print('Confusion Matrix')
cfm = confusion_matrix(test_set.classes, y_pred)
print(cfm)
The code is correct but, there is one little mistake I found and that is you have assigned 2 units in sigmoid output layer. That's not correct; there should be 1 unit because it's a binary classification problem. Like this:
model.add(Dense(1, activation="sigmoid"))
Tuberculosis is a complex object with sophisticated features. Therefore, the testing set may produce unexpected results. To circumvent this, you must modify your network and incorporate additional training images. You can experiment with transfer learning, but if the network from which you want to transfer the parameters was trained on objects entirely unrelated to tuberculosis, it might not be appropriate.

Keras correct input shape for multilayer perceptron

I'm trying to make a basic MLP example in keras. My input data has the shape train_data.shape = (2000,75,75) and my testing data has the shape test_data.shape = (500,75,75). 2000 and 500 are the numbers of samples of training and test data (in other words, the shape of the data is (75,75), but there are 2000 and 500 pieces of training and testing data). The output should have two classes.
I'm unsure what value to use for the input_shape parameter on the first layer of the network. Using the code from the mnist example in the keras repository, I have (updated):
from six.moves import cPickle
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.utils import np_utils
from keras.optimizers import RMSprop
# Globals
NUM_CLASSES = 2
NUM_EPOCHS = 10
BATCH_SIZE = 250
def loadData():
fData = open('data.pkl','rb')
fLabels = open('labels.pkl','rb')
data = cPickle.load(fData)
labels = cPickle.load(fLabels)
train_data = data[0:2000]
train_labels = labels[0:2000]
test_data = data[2000:]
test_labels = labels[2000:]
return (train_data, train_labels, test_data, test_labels)
# Load data and corresponding labels for model
train_data, train_labels, test_data, test_labels = loadData()
train_labels = np_utils.to_categorical(train_labels, NUM_CLASSES)
test_labels = np_utils.to_categorical(test_labels, NUM_CLASSES)
print(train_data.shape)
print(test_data.shape)
model = Sequential()
model.add(Dense(512, input_shape=(5625,)))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(2))
model.add(Activation('softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
history = model.fit(train_data, train_labels, validation_data=(test_data, test_labels),
batch_size=BATCH_SIZE, nb_epoch=NUM_EPOCHS,
verbose=1)
score = model.evaluate(test_data, test_labels, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
where 5625 is 75 * 75 (emulating the MNIST example). The error I get is:
Error when checking model input: expected dense_input_1 to have 2 dimensions, but got array with shape (2000, 75, 75)
Any ideas?
From keras MLP example, https://github.com/fchollet/keras/blob/master/examples/mnist_mlp.py
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
And the model input
model = Sequential()
model.add(Dense(512, input_shape=(784,)))
So you should reshape your train and test to (2000,75*75) and (500,75*75) with
train_data = train_data.reshape(2000, 75*75)
test_data = test_data.reshape(500, 75*75)
and then set the model input shape as you did
model.add(Dense(512, input_shape=(75*75,)))

Categories

Resources