How to see which test data is classified incorrectly - python

The accuracy at the output is not 100%, hence there are texts that are classified by the network incorrectly. How can I look at these texts after the network?
import numpy as np
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, Activation,Dropout
from keras.preprocessing.text import Tokenizer
import matplotlib.pyplot as plt
from keras import optimizers
from keras.layers import Conv1D, GlobalMaxPooling1D
np.random.seed(42)
max_features = 10000
maxlen = 400
batch_size = 64
embedding_dims = 200
filters = 150
kernel_size = 5
hidden_dims = 50
epochs =5
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=1000)
print(x_train.shape)
print(x_test.shape)
print(x_train[0])
print(y_train[0])
tokenizer = Tokenizer(num_words=1000)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
print(x_train[0])
num_classes = 2
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print(y_train.shape)
print(y_test.shape)
model = Sequential()
model.add(Dense(512,input_dim = 1000,activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes,activation='sigmoid'))
model.summary()
opt = optimizers.Adam(lr=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=['accuracy'])
clf = model.fit(x_train, y_train, batch_size=128, epochs=5, validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=1)
print("Accuracy: ", score[1])
I tried this code, but got an error
y_pred = model.predict(x_test)
# bolean mask
mask = y_pred != y_test
# print rows that was classified incorrectly
print(x_test[mask])
print(x_test[mask]) IndexError: boolean index did not match indexed
array along dimension 1; dimension is 1000 but corresponding boolean
dimension is 2

I changed your complete code so that it runs through with only one class (since we are studying a binary problem) and you can study the wrongly classified samples. The result proves that you are using a completly unsuitable model for your task.
import numpy as np
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, Activation,Dropout
from keras.preprocessing.text import Tokenizer
import matplotlib.pyplot as plt
from keras import optimizers
from keras.layers import Conv1D, GlobalMaxPooling1D
import pandas as pd
np.random.seed(42)
max_features = 10000
maxlen = 400
batch_size = 64
embedding_dims = 200
filters = 150
kernel_size = 5
hidden_dims = 50
epochs =5
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=1000)
print(x_train.shape)
print(x_test.shape)
print(x_train[0])
print(y_train[0])
tokenizer = Tokenizer(num_words=1000)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
print(x_train[0])
num_classes = 1
#y_train = keras.utils.to_categorical(y_train, num_classes)
#y_test = keras.utils.to_categorical(y_test, num_classes)
print(y_train.shape)
print(y_test.shape)
model = Sequential()
model.add(Dense(512,input_dim = 1000,activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes,activation='sigmoid'))
model.summary()
opt = optimizers.Adam()
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=['accuracy'])
clf = model.fit(x_train, y_train, batch_size=128, epochs=5, validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=1)
print("Accuracy: ", score[1])
y_pred = model.predict(x_test)
df_test_pred = pd.concat([pd.DataFrame(x_test), pd.DataFrame(y_test, columns= ['test']), pd.DataFrame(y_pred, columns= ['pred'])], axis=1)
df_wrong= df_test_pred[df_test_pred['test'] != df_test_pred['pred']]

Related

How can I see the scores of every class after run the predict_classes

I have a classified network for the MNIST dataset (csv) with 10 labels which are numbers (0,1,2,3,4,5,6,7,8,9) and after training the network, I run the predict_classes for test_data. I want to know for each of the data in test_set what is the score of every label(0,1,2,3,4,5,6,7,8,9) in y_pred.
for example if predict_classes say that for first data the label is "7" what is the score of 7 and what is the scores of other labels such (0,1,2,3,4,5,6,8,9)
How can I write its code?
from keras import models
import numpy as np
from keras import layers
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
mnist = tf.keras.datasets.mnist
#Load dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#normalizing data
x_train = x_train / 255.0
x_test = x_test / 255.0
# bulid model
model = Sequential()
model.add(LSTM(15, input_shape=(x_train.shape[1:]), return_sequences=True, activation="tanh", unroll=True))
model.add(LSTM(15, dropout=0.2, activation="tanh", unroll=True))
#model.add(LSTM(1, activation='tanh'))
#model.add(LSTM(2, activation='tanh'))
model.add(Dense(5, activation='tanh' ))
model.add(Dense(10, activation='sigmoid'))
model.summary()
opt = tf.keras.optimizers.Adam(lr=1e-3, decay=1e-5)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt,
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=2, validation_data=(x_test, y_test))
y_pred = model.predict_classes(x_test)
print(y_pred)
Instead of using model.predict_classes(), you can use model.predict() (https://www.tensorflow.org/api_docs/python/tf/keras/Model#predict).
This returns an array with the probability for all of the possible classes.

Keras model using GridSearchCV stuck in infinite loop

I have a keras model and I use sklearn.model_selection.GridSearchCV for tuning the hyperparameters, but it gets stuck in an infinite loop.
This is my model:
from keras import Sequential
from keras.layers import Dense, Activation
def make_model(optimizer='rmsprop'):
model = Sequential()
model.add(Dense(9, activation='relu', input_dim=28 * 28))
model.add(Dense(27 , activation='relu'))
model.add(Dense(27 , activation='relu'))
model.add(Dense(81 , activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics='acc')
return model
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
model = KerasClassifier(build_fn=make_model)
param_grid=dict(
optimizer=['rmsprop'],
epochs=[25, 40],
batch_size=[45],
validation_split=[.2])
grid_model = GridSearchCV(estimator=model, param_grid=param_grid)
And when I call fit on the model, instead of running with 25 and 40 epochs it will get stuck in an infinite loop.
I used keras.datasets.fashion_mnist as my dataset as below:
from keras.utils.np_utils import to_categorical
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
y_train = to_categorical(train_labels, num_classes=10)
y_test = to_categorical(test_labels, num_classes=10)
x_train = train_images / 255.0
x_test = test_images / 255.0
x_train = x_train.reshape(60000, -1)
x_test = x_test.reshape(10000, -1)
I have used epochs=[2, 3] to representing the loop, and rest of the code as the same as before.
It's the result:

Keras model not compiling

I am trying to build a Keras model for a classification model and I get and error while I am trying to fit the data.
ValueError: Shapes (None, 99) and (None, 2) are incompatible
Code:
import warnings
warnings.filterwarnings(action = 'ignore')
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
df = pd.read_csv('train.csv')
del df['ST_CASE']
df
target_column = ['MVISOBSC']
predictors = list(set(list(df.columns))-set(target_column))
df[predictors] = df[predictors]/df[predictors].max()
X = df[predictors].values
y = df[target_column].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
print(X_train.shape); print(X_test.shape)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
model = Sequential()
model.add(Dense(500, activation='relu', input_dim=6))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
try:
model.fit(X_train, y_train, epochs = 20)
except Exception as e:
print(e)
Shape values:
X_train = (1282, 6)
X_test = (550, 6)
y_train = (1282)
y_test = (550)
I have also tried reshaping the X_train and X_test, but it does not have any effect on the error.
The no. of units in the last Dense layer must match the dimensionality of the outputs.
# Reshape the labels
y_train = np.expand_dims( y_train , axis=1 )
y_test = np.expand_dims( y_test , axis=1 )
model = Sequential()
model.add(Dense(500, activation='relu', input_dim=6))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='softmax'))
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])

Python CNN LSTM (Value Error strides should be of length 1, 1 or 3 but was 2)

I've been trying to train a convlstm model on mnist dataset to broaden my knowledge in model development. I cannot escape the error that I have included in the title. Any help or hint is appreciated!
I know the default for strides is (1,1) but am unsure how 2 is being set.
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, TimeDistributed, Reshape
from keras.utils import to_categorical
from keras.layers.convolutional import Conv2D, Conv3D
from keras.layers.pooling import MaxPooling2D, MaxPool3D
from keras.layers.core import Flatten
def prep_pixels(train, test):
# convert from integers to floats
train_norm = train.astype('float32')
test_norm = test.astype('float32')
# normalize to range 0-1
train_norm = train_norm / 255.0
test_norm = test_norm / 255.0
# return normalized images
return train_norm, test_norm
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape((x_train.shape[0], 28, 28, 1))
x_test = x_test.reshape((x_test.shape[0], 28, 28, 1))
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
x_train, x_test = prep_pixels(x_train, x_test)
model = Sequential()
model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1))))
model.add(TimeDistributed((MaxPooling2D((2, 2)))))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(32, activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))
opt = tf.keras.optimizers.Adam(lr=1e-3, decay=1e-5)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.fit(x_train, y_train, epochs=1, validation_data=(x_test, y_test))
ERROR
model.fit(x_train, y_train, epochs=1, validation_data=(x_test, y_test))
strides = _get_sequence(strides, n, channel_index, "strides")
ValueError: strides should be of length 1, 1 or 3 but was 2
It seems like you haven't created a windowed dataset for your ConvLSTM. So you might want to do this before calling model.fit
d_train = tf.keras.preprocessing.sequence.TimeseriesGenerator(x_train, y_train, length=5, batch_size=64) # window size = 5
d_test = tf.keras.preprocessing.sequence.TimeseriesGenerator(x_test, y_test, length=5)
model.fit(d_train, epochs=1, validation_data=d_test)
To be consistent with your loss function, you will need to disable return sequences (or add another layer that does not return sequences).
model.add(tf.keras.layers.LSTM(32, activation='relu', return_sequences=False))

Keras correct input shape for multilayer perceptron

I'm trying to make a basic MLP example in keras. My input data has the shape train_data.shape = (2000,75,75) and my testing data has the shape test_data.shape = (500,75,75). 2000 and 500 are the numbers of samples of training and test data (in other words, the shape of the data is (75,75), but there are 2000 and 500 pieces of training and testing data). The output should have two classes.
I'm unsure what value to use for the input_shape parameter on the first layer of the network. Using the code from the mnist example in the keras repository, I have (updated):
from six.moves import cPickle
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.utils import np_utils
from keras.optimizers import RMSprop
# Globals
NUM_CLASSES = 2
NUM_EPOCHS = 10
BATCH_SIZE = 250
def loadData():
fData = open('data.pkl','rb')
fLabels = open('labels.pkl','rb')
data = cPickle.load(fData)
labels = cPickle.load(fLabels)
train_data = data[0:2000]
train_labels = labels[0:2000]
test_data = data[2000:]
test_labels = labels[2000:]
return (train_data, train_labels, test_data, test_labels)
# Load data and corresponding labels for model
train_data, train_labels, test_data, test_labels = loadData()
train_labels = np_utils.to_categorical(train_labels, NUM_CLASSES)
test_labels = np_utils.to_categorical(test_labels, NUM_CLASSES)
print(train_data.shape)
print(test_data.shape)
model = Sequential()
model.add(Dense(512, input_shape=(5625,)))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(2))
model.add(Activation('softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=RMSprop(),
metrics=['accuracy'])
history = model.fit(train_data, train_labels, validation_data=(test_data, test_labels),
batch_size=BATCH_SIZE, nb_epoch=NUM_EPOCHS,
verbose=1)
score = model.evaluate(test_data, test_labels, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
where 5625 is 75 * 75 (emulating the MNIST example). The error I get is:
Error when checking model input: expected dense_input_1 to have 2 dimensions, but got array with shape (2000, 75, 75)
Any ideas?
From keras MLP example, https://github.com/fchollet/keras/blob/master/examples/mnist_mlp.py
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
And the model input
model = Sequential()
model.add(Dense(512, input_shape=(784,)))
So you should reshape your train and test to (2000,75*75) and (500,75*75) with
train_data = train_data.reshape(2000, 75*75)
test_data = test_data.reshape(500, 75*75)
and then set the model input shape as you did
model.add(Dense(512, input_shape=(75*75,)))

Categories

Resources