I am trying to train the mnist dataset on ResNet50 using the Keras library.
The shape of mnist is (28, 28, 1) however resnet50 required the shape to be (32, 32, 3)
How can I convert the mnist dataset to the required shape?
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], x_train.shape[2], 1)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], x_test.shape[2], 1)
x_train = x_train/255.0
x_test = x_test/255.0
from keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
model = models.Sequential()
# model.add(InputLayer(input_shape=(28, 28)))
# model.add(Reshape(target_shape=(32, 32, 3)))
# model.add(Conv2D())
model.add(conv_base)
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Dense(10, activation='softmax'))
model.compile(optimizer=optimizers.RMSprop(lr=2e-5), loss='binary_crossentropy', metrics=['acc'])
history = model.fit(x_train, y_train, epochs=5, batch_size=20, validation_data=(x_test, y_test))
ValueError: Input 0 is incompatible with layer sequential_10: expected shape=(None, 32, 32, 3), found shape=(20, 28, 28, 1)
You need to resize the MNIST data set. Note that minimum size actually depends on the ImageNet model. For example: Xception requires at least 72, where ResNet is asking for 32. Apart from that, the MNIST is a grayscale image, but it may conflict if you're using the pretrained weight of these models. So, good and safe side is to resize and convert grayscale to RGB.
Full working code for you.
Data Set
We will resize MNIST from 28 to 32. Also, make 3 channels instead of keeping 1.
import tensorflow as tf
import numpy as np
(x_train, y_train), (_, _) = tf.keras.datasets.mnist.load_data()
# expand new axis, channel axis
x_train = np.expand_dims(x_train, axis=-1)
# [optional]: we may need 3 channel (instead of 1)
x_train = np.repeat(x_train, 3, axis=-1)
# it's always better to normalize
x_train = x_train.astype('float32') / 255
# resize the input shape , i.e. old shape: 28, new shape: 32
x_train = tf.image.resize(x_train, [32,32]) # if we want to resize
# one hot
y_train = tf.keras.utils.to_categorical(y_train , num_classes=10)
print(x_train.shape, y_train.shape)
(60000, 32, 32, 3) (60000, 10)
ResNet 50
input = tf.keras.Input(shape=(32,32,3))
efnet = tf.keras.applications.ResNet50(weights='imagenet',
include_top = False,
input_tensor = input)
# Now that we apply global max pooling.
gap = tf.keras.layers.GlobalMaxPooling2D()(efnet.output)
# Finally, we add a classification layer.
output = tf.keras.layers.Dense(10, activation='softmax', use_bias=True)(gap)
# bind all
func_model = tf.keras.Model(efnet.input, output)
Train
func_model.compile(
loss = tf.keras.losses.CategoricalCrossentropy(),
metrics = tf.keras.metrics.CategoricalAccuracy(),
optimizer = tf.keras.optimizers.Adam())
# fit
func_model.fit(x_train, y_train, batch_size=128, epochs=5, verbose = 2)
Epoch 1/5
469/469 - 56s - loss: 0.1184 - categorical_accuracy: 0.9690
Epoch 2/5
469/469 - 21s - loss: 0.0648 - categorical_accuracy: 0.9844
Epoch 3/5
469/469 - 21s - loss: 0.0503 - categorical_accuracy: 0.9867
Epoch 4/5
469/469 - 21s - loss: 0.0416 - categorical_accuracy: 0.9888
Epoch 5/5
469/469 - 21s - loss: 0.1556 - categorical_accuracy: 0.9697
<tensorflow.python.keras.callbacks.History at 0x7f316005a3d0>
Related
I have 2 versions of models built using keras that seem to build well, but when it comes to compiling and fitting, I receive the same error for both of them. I'm not sure what the problem is.
def build_cnn():
model = models.Sequential([
layers.Conv2D(32, (3, 3), input_shape=(32, 32, 3), padding='same', name='conv1'),
layers.MaxPooling2D((2, 2), name='maxpooling1'),
layers.Conv2D(64, (3, 3), activation='relu', name='conv2'),
layers.MaxPooling2D((2, 2), name = 'maxpooling2'),
layers.Conv2D(64, (3, 3), activation='relu', name = 'conv3'),
layers.Flatten(name = 'flatten'),
layers.Dense(64, activation='relu', name='dense1'),
layers.Dense(10, name='dense2')
], name='CNN')
return model
def build_cnn2():
model = models.Sequential([
tf.keras.layers.Input(shape=(32,32,3)),
tf.keras.layers.Conv2D(32, (3,3), padding='same', name='conv2d'),
tf.keras.layers.MaxPool2D(pool_size=(2,2), name='maxpooling'),
tf.keras.layers.Flatten(name='flatten'),
tf.keras.layers.Dense(10, activation='softmax', name='dense'),
], name='conv2d')
return model
def train(model):
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['acc'])
return model.fit(x_train, y_train,
epochs=5,
validation_split=0.1)
model2 = build_cnn()
log2 = train(model2)
I receive this order for both model2 = build_cnn() and model2 = build_cnn2():
ValueError: Input 0 of layer CNN is incompatible with the layer: : expected min_ndim=4, found ndim=2. Full shape received: (None, 30)
According to the error, it seems like you didn't preprocess your data properly - as it says, it expects 4D - (None, h, w, c) but yours somehow (None, 30).
Also, one of your model's last activation is None, another is set as softmax but yet you set the loss function binary_crossentropy rather than CategoricalCrossentropy.
Here is a possible solution for you (by resolving your above issue).
def build_cnn():
model = Sequential([
layers.Conv2D(32, (3, 3), input_shape=(32, 32, 3),
padding='same', name='conv1'),
layers.MaxPooling2D((2, 2), name='maxpooling1'),
layers.Conv2D(64, (3, 3), activation='relu', name='conv2'),
layers.MaxPooling2D((2, 2), name = 'maxpooling2'),
layers.Conv2D(64, (3, 3), activation='relu', name = 'conv3'),
layers.Flatten(name = 'flatten'),
layers.Dense(64, activation='relu', name='dense1'),
layers.Dense(10, activation='softmax', name='dense2')
], name='CNN')
return model
def train(model, x_train, y_train):
model.compile(optimizer='adam',
loss=tf.keras.losses.CategoricalCrossentropy(),
metrics=['acc'])
return model.fit(x_train, y_train,
epochs=5, verbose=2,
validation_split=0.1)
model2 = build_cnn()
I use mnist for demonstration. Its shape is (28 x 28) but as your model takes (32, 32, 3) - the mnist needs to be preprocessed. Hopefully, you can adapt to your case.
DataSet
(x_train, y_train), (_, _) = tf.keras.datasets.mnist.load_data()
print(x_train.shape, y_train.shape)
# expand new axis, channel axis
x_train = np.expand_dims(x_train, axis=-1)
print(x_train.shape)
# need 3 channel (instead of 1)
x_train = np.repeat(x_train, 3, axis=-1)
print(x_train.shape)
# it's always better to normalize
x_train = x_train.astype('float32') / 255
print(x_train.shape)
# resize the input shape , i.e. old shape: 28, new shape: 32
x_train = tf.image.resize(x_train, [32,32]) # if we want to resize
print(x_train.shape)
# train set / target
y_train = tf.keras.utils.to_categorical(y_train , num_classes=10)
print(y_train.shape)
(60000, 28, 28) (60000,)
(60000, 28, 28, 1)
(60000, 28, 28, 3)
(60000, 28, 28, 3)
(60000, 32, 32, 3)
(60000, 10)
Now, you can train your model.
log2 = train(model2, x_train , y_train)
Epoch 1/5
4ms/step - loss: 0.2792 - acc: 0.9133 - val_loss: 0.0676 - val_acc: 0.9815
Epoch 2/5
4ms/step - loss: 0.0454 - acc: 0.9864 - val_loss: 0.0400 - val_acc: 0.9883
Epoch 3/5
4ms/step - loss: 0.0336 - acc: 0.9892 - val_loss: 0.0415 - val_acc: 0.9900
Epoch 4/5
4ms/step - loss: 0.0235 - acc: 0.9926 - val_loss: 0.0359 - val_acc: 0.9907
Epoch 5/5
4ms/step - loss: 0.0163 - acc: 0.9948 - val_loss: 0.0295 - val_acc: 0.9918
I'm testing simple networks on Keras with TensorFlow backend and I ran into an issue with using sigmoid activation function
The network isn't learning for first 5-10 epochs, and then everything is fine.
I tried using initializers and regularizers, but that only made it worse.
I use the network like this:
import numpy as np
import keras
from numpy import expand_dims
from keras.preprocessing.image import ImageDataGenerator
from matplotlib import pyplot
# load the image
(x_train, y_train), (x_val, y_val), (x_test, y_test) = netowork2_ker.load_data_shared()
# expand dimension to one sample
x_train = expand_dims(x_train, 2)
x_train = np.reshape(x_train, (50000, 28, 28))
x_train = expand_dims(x_train, 3)
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
datagen = ImageDataGenerator(
rescale=1./255,
width_shift_range=[-1, 0, 1],
height_shift_range=[-1, 0, 1],
rotation_range=10)
epochs = 20
batch_size = 50
num_classes = 10
model = keras.Sequential()
model.add(keras.layers.Conv2D(64, (3, 3), padding='same',
input_shape=x_train.shape[1:],
activation='sigmoid'))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Conv2D(100, (3, 3),
activation='sigmoid'))
model.add(keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(100,
activation='sigmoid'))
#model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(num_classes,
activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
steps_per_epoch=len(x_train) / batch_size, epochs=epochs,
verbose=2, shuffle=True)
With the code above I get results like these:
Epoch 1/20
- 55s - loss: 2.3098 - accuracy: 0.1036
Epoch 2/20
- 56s - loss: 2.3064 - accuracy: 0.1038
Epoch 3/20
- 56s - loss: 2.3068 - accuracy: 0.1025
Epoch 4/20
- 56s - loss: 2.3060 - accuracy: 0.1079
...
For 7 epochs (different every time) and then the loss rapidly goes downward and i achieve 0.9623 accuracy in 20 epochs.
But if I change activation from sigmoid to relu it works great and gives me 0.5356 accuracy in the first epoch.
This issue makes sigmoid almost unusable for me and I'd like to know, I can do something about it. Is this a bug or am I doing something wrong?
Activation function suggestion:
In practice, the sigmoid non-linearity has recently fallen out of favor and it is rarely ever used. ReLU is the most common choice, if there are a large fraction of “dead” units in network, try Leaky ReLU and tanh. Never use sigmoid.
Reasons for not using the sigmoid:
A very undesirable property of the sigmoid neuron is that when the neuron’s activation saturates at either tail of 0 or 1, the gradient at these regions is almost zero. In addition, Sigmoid outputs are not zero-centered.
I am trying the the training and evaluation example on the tensorflow website.
Specifically, this part:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32') / 255
x_test = x_test.reshape(10000, 784).astype('float32') / 255
y_train = y_train.astype('float32')
y_test = y_test.astype('float32')
def get_uncompiled_model():
inputs = keras.Input(shape=(784,), name='digits')
x = layers.Dense(64, activation='relu', name='dense_1')(inputs)
x = layers.BatchNormalization()(x)
x = layers.Dense(64, activation='relu', name='dense_2')(x)
outputs = layers.Dense(10, activation='softmax', name='predictions')(x)
model = keras.Model(inputs=inputs, outputs=outputs)
return model
def get_compiled_model():
model = get_uncompiled_model()
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),
loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'])
return model
sample_weight = np.ones(shape=(len(y_train),))
sample_weight[y_train == 5] = 2.
# Create a Dataset that includes sample weights
# (3rd element in the return tuple).
train_dataset = tf.data.Dataset.from_tensor_slices(
(x_train, y_train, sample_weight))
# Shuffle and slice the dataset.
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)
model = get_compiled_model()
model.fit(train_dataset, epochs=3)
It appears that if I add the batch normalization layer (this line: x = layers.BatchNormalization()(x)) I get the following error:
InvalidArgumentError: The second input must be a scalar, but it has shape [64]
[[{{node batch_normalization_2/cond/ReadVariableOp/Switch}}]]
Any ideas?
The same code works for me.
The only lines I changed are :
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=1e-3)
to model.compile(optimizer=keras.optimizers.RMSprop(lr=1e-3)
(which is version specific)
Then
model.fit(train_dataset, epochs=3) to model.fit(train_dataset, epochs=3, steps_per_epoch=30)
Reason : When using iterators as input to a model, you should specify the steps_per_epoch argument
If you just want to use sample weights, you don't have to use tf.data.Dataset, you can simply run:
model.fit(x=x_train, y=y_train, sample_weight=sample_weight, batch_size=64, epochs=3)
and it works for me (when I change learning_rate to lr as #ASHu2 mentioned).
It gets 97% accuracy after 3 epochs:
...
57408/60000 [===========================>..] - ETA: 0s - loss: 0.1010 - sparse_categorical_accuracy: 0.9709
58816/60000 [============================>.] - ETA: 0s - loss: 0.1011 - sparse_categorical_accuracy: 0.9708
60000/60000 [==============================] - 2s 37us/sample - loss: 0.1007 - sparse_categorical_accuracy: 0.9709
I used TF 1.14.0 on windows.
The problem was solved when I updated tensorflow from version 1.14.1 to 2.0.0-rc1.
I trained a feedforward neural network in Keras module, but there are some problems with it. The problem is the incorrect prediction of images with self-written digits.
(X_train, y_train), (X_test, y_test) = mnist.load_data()
RESHAPED = 784
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=OPTIMIZER,
metrics=['accuracy'])
history = model.fit(X_train, Y_train,
batch_size=BATCH_SIZE, epochs=NB_EPOCH,
verbose=VERBOSE, validation_split=VALIDATION_SPLIT)
score = model.evaluate(X_test, Y_test, verbose=VERBOSE)
//output
....
Epoch 20/20
48000/48000 [==============================] - 2s 49us/step - loss: 0.0713 -
acc: 0.9795 - val_loss: 0.1004 - val_acc: 0.9782
10000/10000 [==============================] - 1s 52us/step
Test score: 0.09572517980986121
Test accuracy: 0.9781
Next, I upload my image, of dimension 28x28, which was self-written, for example - 2, and executed this script:
img_array = imageio.imread('2.png', as_gray=True)
predictions=model.predict(img_array.reshape(1,784))
print (np.argmax(predictions))
//output for example 3, but I expect - 2
I tried other pictures with different numbers, which also gave wrong predictions. What is wrong? Model shows-Test accuracy: 0.9781. Help me please)))
You are using the MNIST Dataset. This set was originally created for the american post office. You'll have to deal with regional handwriting variation so make sure you have written american numbers.
On the other hand errors are normal. Make a bigger set of handwritten digits and make some tests.
This is a common problem of data mismatch , in order to have a prefect accuracy on your own images , you need to make some preprocessing before , if you noticed one thing in the mnist dataset all images are white in black background , this is one of the modification you should make to your own image , for more information check this awesome medium article :
https://medium.com/#o.kroeger/tensorflow-mnist-and-your-own-handwritten-digits-4d1cd32bbab4
I'm trying to convert this tflearn DCNN sample (using image preprocessing and augmemtation) to keras:
Tflearn sample:
import tflearn
from tflearn.data_utils import shuffle, to_categorical
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation
# Data loading and preprocessing
from tflearn.datasets import cifar10
(X, Y), (X_test, Y_test) = cifar10.load_data()
X, Y = shuffle(X, Y)
Y = to_categorical(Y, 10)
Y_test = to_categorical(Y_test, 10)
# Real-time data preprocessing
img_prep = ImagePreprocessing()
img_prep.add_featurewise_zero_center()
img_prep.add_featurewise_stdnorm()
# Real-time data augmentation
img_aug = ImageAugmentation()
img_aug.add_random_flip_leftright()
img_aug.add_random_rotation(max_angle=25.)
# Convolutional network building
network = input_data(shape=[None, 32, 32, 3],
data_preprocessing=img_prep,
data_augmentation=img_aug)
network = conv_2d(network, 32, 3, activation='relu')
network = max_pool_2d(network, 2)
network = conv_2d(network, 64, 3, activation='relu')
network = conv_2d(network, 64, 3, activation='relu')
network = max_pool_2d(network, 2)
network = fully_connected(network, 512, activation='relu')
network = dropout(network, 0.5)
network = fully_connected(network, 10, activation='softmax')
network = regression(network, optimizer='adam',
loss='categorical_crossentropy',
learning_rate=0.001)
# Train using classifier
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=50, shuffle=True, validation_set=(X_test, Y_test),
show_metric=True, batch_size=96, run_id='cifar10_cnn')
This yielded the following results after 50 epochs:
Training Step: 26050 | total loss: 0.35260 | time: 144.306s
| Adam | epoch: 050 | loss: 0.35260 - acc: 0.8785 | val_loss: 0.64622 - val_acc: 0.8212 -- iter: 50000/50000
I then tried to convert it to Keras using the same DCNN layers, parameters and image preprocessing/augmentation:
import numpy as np
from keras.datasets import cifar10
from keras.callbacks import TensorBoard
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, UpSampling2D, AtrousConvolution2D
from keras.layers.advanced_activations import LeakyReLU, PReLU
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
import matplotlib
from matplotlib import pyplot as plt
np.random.seed(1337)
batch_size = 96 # how many images to process at once
nb_classes = 10 # how many types of objects we can detect in this set
nb_epoch = 50 # how long we train the system
img_rows, img_cols = 32, 32 # image dimensions
nb_filters = 32 # number of convolutional filters to use
pool_size = (2, 2) # size of pooling area for max pooling
kernel_size = (3, 3) # convolution kernel size
(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 3)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 3)
input_shape = (img_rows, img_cols, 3)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(Y_train, nb_classes)
Y_test = np_utils.to_categorical(Y_test, nb_classes)
datagen = ImageDataGenerator(featurewise_center=True,
featurewise_std_normalization=True,
horizontal_flip=True,
rotation_range=25)
datagen.fit(X_train)
model = Sequential()
model.add(Conv2D(nb_filters, kernel_size, padding='valid', input_shape=input_shape, activation='relu'))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Conv2D(nb_filters*2, kernel_size, activation='relu'))
model.add(Conv2D(nb_filters*2, kernel_size, activation='relu'))
model.add(MaxPooling2D(pool_size=pool_size))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
# Set up TensorBoard
tb = TensorBoard(log_dir='./logs')
history = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size), epochs=nb_epoch, shuffle=True, verbose=1, validation_data=(X_test, Y_test), callbacks=[tb])
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print("Accuracy: %.2f%%" % (score[1]*100))
plt.plot(history.epoch,history.history['val_acc'],'-o',label='validation')
plt.plot(history.epoch,history.history['acc'],'-o',label='training')
plt.legend(loc=0)
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.grid(True)
plt.show()
This yielded far worse validation accuracy results:
Epoch 50/50
521/521 [==============================] - 84s 162ms/step - loss: 0.4723 - acc: 0.8340 - val_loss: 3.2970 - val_acc: 0.2729
Test score: 3.2969648239135743
Accuracy: 27.29%
Can anyone help me understand why? Have I misapplied/misunderstood image preprocessing/augmentation in Keras?
In your Keras model, you have forgotten to normalize the validation data as well. You can do this either by using datagen.mean and datagen.std computed over the training data:
# normalize test data; add a small constant to avoid division by zero,
# you can alternatively use `keras.backend.epsilon()`
X_test = (X_test - datagen.mean) / (datagen.std + 1e-8)
or you can use the datagen.standardize() method to normalize test data:
X_test = datagen.standardize(X_test)
Look at this question on SO for more info: How does data normalization work in keras during prediction?
Don't forget that you should normalize test data by the statistics computed over the training data. NEVER EVER normalize test data by its own statistics.
Caveat: It seems that standardize alters its argument as well... yes, you can confirm this in the source code.