Keras model not compiling - python

I am trying to build a Keras model for a classification model and I get and error while I am trying to fit the data.
ValueError: Shapes (None, 99) and (None, 2) are incompatible
Code:
import warnings
warnings.filterwarnings(action = 'ignore')
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
df = pd.read_csv('train.csv')
del df['ST_CASE']
df
target_column = ['MVISOBSC']
predictors = list(set(list(df.columns))-set(target_column))
df[predictors] = df[predictors]/df[predictors].max()
X = df[predictors].values
y = df[target_column].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
print(X_train.shape); print(X_test.shape)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
model = Sequential()
model.add(Dense(500, activation='relu', input_dim=6))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
try:
model.fit(X_train, y_train, epochs = 20)
except Exception as e:
print(e)
Shape values:
X_train = (1282, 6)
X_test = (550, 6)
y_train = (1282)
y_test = (550)
I have also tried reshaping the X_train and X_test, but it does not have any effect on the error.

The no. of units in the last Dense layer must match the dimensionality of the outputs.
# Reshape the labels
y_train = np.expand_dims( y_train , axis=1 )
y_test = np.expand_dims( y_test , axis=1 )
model = Sequential()
model.add(Dense(500, activation='relu', input_dim=6))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='softmax'))
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])

Related

ValueError: Input 0 of layer "sequential_1" is incompatible with the layer: expected shape=(None, 614, 8), found shape=(None, 8)

This is my first time creating an AI and I keep getting this error, idk what it means or how to sort it.
Using google's colab.reserach to run this. (python)
Code:
import pandas as pd
dataset = pd.read_csv('cancer.csv')
x = dataset.drop(columns=["diagnosis(1=m, 0=b)"])
y = dataset["diagnosis(1=m, 0=b)"]
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
import tensorflow as tf
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(256, input_shape=x_train.shape, activation='sigmoid'))
model.add(tf.keras.layers.Dense(256, activation='sigmoid'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=1000)
the last line where I try to train the AI, I get the error
any help would be appreciated thanks.

Python CNN LSTM (Value Error strides should be of length 1, 1 or 3 but was 2)

I've been trying to train a convlstm model on mnist dataset to broaden my knowledge in model development. I cannot escape the error that I have included in the title. Any help or hint is appreciated!
I know the default for strides is (1,1) but am unsure how 2 is being set.
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, TimeDistributed, Reshape
from keras.utils import to_categorical
from keras.layers.convolutional import Conv2D, Conv3D
from keras.layers.pooling import MaxPooling2D, MaxPool3D
from keras.layers.core import Flatten
def prep_pixels(train, test):
# convert from integers to floats
train_norm = train.astype('float32')
test_norm = test.astype('float32')
# normalize to range 0-1
train_norm = train_norm / 255.0
test_norm = test_norm / 255.0
# return normalized images
return train_norm, test_norm
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape((x_train.shape[0], 28, 28, 1))
x_test = x_test.reshape((x_test.shape[0], 28, 28, 1))
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
x_train, x_test = prep_pixels(x_train, x_test)
model = Sequential()
model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1))))
model.add(TimeDistributed((MaxPooling2D((2, 2)))))
model.add(TimeDistributed(Flatten()))
model.add(LSTM(32, activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))
opt = tf.keras.optimizers.Adam(lr=1e-3, decay=1e-5)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.fit(x_train, y_train, epochs=1, validation_data=(x_test, y_test))
ERROR
model.fit(x_train, y_train, epochs=1, validation_data=(x_test, y_test))
strides = _get_sequence(strides, n, channel_index, "strides")
ValueError: strides should be of length 1, 1 or 3 but was 2
It seems like you haven't created a windowed dataset for your ConvLSTM. So you might want to do this before calling model.fit
d_train = tf.keras.preprocessing.sequence.TimeseriesGenerator(x_train, y_train, length=5, batch_size=64) # window size = 5
d_test = tf.keras.preprocessing.sequence.TimeseriesGenerator(x_test, y_test, length=5)
model.fit(d_train, epochs=1, validation_data=d_test)
To be consistent with your loss function, you will need to disable return sequences (or add another layer that does not return sequences).
model.add(tf.keras.layers.LSTM(32, activation='relu', return_sequences=False))

How to see which test data is classified incorrectly

The accuracy at the output is not 100%, hence there are texts that are classified by the network incorrectly. How can I look at these texts after the network?
import numpy as np
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, Activation,Dropout
from keras.preprocessing.text import Tokenizer
import matplotlib.pyplot as plt
from keras import optimizers
from keras.layers import Conv1D, GlobalMaxPooling1D
np.random.seed(42)
max_features = 10000
maxlen = 400
batch_size = 64
embedding_dims = 200
filters = 150
kernel_size = 5
hidden_dims = 50
epochs =5
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=1000)
print(x_train.shape)
print(x_test.shape)
print(x_train[0])
print(y_train[0])
tokenizer = Tokenizer(num_words=1000)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
print(x_train[0])
num_classes = 2
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
print(y_train.shape)
print(y_test.shape)
model = Sequential()
model.add(Dense(512,input_dim = 1000,activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes,activation='sigmoid'))
model.summary()
opt = optimizers.Adam(lr=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=['accuracy'])
clf = model.fit(x_train, y_train, batch_size=128, epochs=5, validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=1)
print("Accuracy: ", score[1])
I tried this code, but got an error
y_pred = model.predict(x_test)
# bolean mask
mask = y_pred != y_test
# print rows that was classified incorrectly
print(x_test[mask])
print(x_test[mask]) IndexError: boolean index did not match indexed
array along dimension 1; dimension is 1000 but corresponding boolean
dimension is 2
I changed your complete code so that it runs through with only one class (since we are studying a binary problem) and you can study the wrongly classified samples. The result proves that you are using a completly unsuitable model for your task.
import numpy as np
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, Activation,Dropout
from keras.preprocessing.text import Tokenizer
import matplotlib.pyplot as plt
from keras import optimizers
from keras.layers import Conv1D, GlobalMaxPooling1D
import pandas as pd
np.random.seed(42)
max_features = 10000
maxlen = 400
batch_size = 64
embedding_dims = 200
filters = 150
kernel_size = 5
hidden_dims = 50
epochs =5
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=1000)
print(x_train.shape)
print(x_test.shape)
print(x_train[0])
print(y_train[0])
tokenizer = Tokenizer(num_words=1000)
x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
print(x_train[0])
num_classes = 1
#y_train = keras.utils.to_categorical(y_train, num_classes)
#y_test = keras.utils.to_categorical(y_test, num_classes)
print(y_train.shape)
print(y_test.shape)
model = Sequential()
model.add(Dense(512,input_dim = 1000,activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes,activation='sigmoid'))
model.summary()
opt = optimizers.Adam()
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=['accuracy'])
clf = model.fit(x_train, y_train, batch_size=128, epochs=5, validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=1)
print("Accuracy: ", score[1])
y_pred = model.predict(x_test)
df_test_pred = pd.concat([pd.DataFrame(x_test), pd.DataFrame(y_test, columns= ['test']), pd.DataFrame(y_pred, columns= ['pred'])], axis=1)
df_wrong= df_test_pred[df_test_pred['test'] != df_test_pred['pred']]

Shaping neural network classification output dimensions?

I am receiving the following error when I fit the network - ValueError: Error when checking target: expected dense_6 to have shape (2,) but got array with shape (22,)
As far as I can tell the shape should be correct given how the dataset is split? Any help is greatly appreciated, thanks!
The dataset can be found here: https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data
from keras.layers import Dense
from keras.models import Sequential
import keras.utils
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
# seed weights
np.random.seed(3)
# import dataset
data = pd.read_csv('agaricus-lepiota.csv', delimiter=',')
# encode labels as integers so the can be one-hot-encoded which takes int matrix
le = preprocessing.LabelEncoder()
data = data.apply(le.fit_transform)
# one-hot-encode string data (now type int)
ohe = preprocessing.OneHotEncoder(sparse=False)
data = ohe.fit_transform(data)
X = data[:, 1:23]
Y = data[:, 0:1]
# split into test and train set
x_train, y_train, x_test, y_test = train_test_split(X, Y, test_size=.2, random_state=5)
# create model
model = Sequential()
model.add(Dense(500, input_dim=22, activation='relu'))
model.add(Dense(300, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(2, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=1000, batch_size=25)
I found 2 errors in your code.
1)
x_train, y_train, x_test, y_test = train_test_split(X, Y, test_size=.2, random_state=5)
must be
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=.2, random_state=5)
check this to learn more about the function.
2)
You have only one column in y_train. But the last layer in your model adds two columns. So instead of
model = Sequential()
model.add(Dense(500, input_dim=22, activation='relu'))
model.add(Dense(300, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(2, activation='sigmoid'))
use this:
model = Sequential()
model.add(Dense(500, input_dim=22, activation='relu'))
model.add(Dense(300, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

Shape mismatch in LSTM in keras

I am trying to run a LSTM using Keras on my custom features set. I have train and test features in separate files. Each csv file contains 11 columns with last column as class label. There are total 40 classes in my dataset. The problem is I am not able to figure out the correct input_shape to the first layer. I had explored all the stackoverflow and github but still not able to solve this
Below is my complete code.
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
numpy.random.seed(7)
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=X_train.shape))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)
Whatever I change in input_shape parameter wither I get error in first LSTM layer of in fit method.
you don't have a time dimension in your input.
Input for RNN should be (batch_size, time_step, features) while your input has dimension (batch_size, features).
If you want to use your 10 columns one at a time you should reshape the array with
numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
Try this code:
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
train_dataset = numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
test_dataset = numpy.reshape(test_dataset, (-1, train_dataset.shape[1], 1))
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)

Categories

Resources