Shaping neural network classification output dimensions? - python

I am receiving the following error when I fit the network - ValueError: Error when checking target: expected dense_6 to have shape (2,) but got array with shape (22,)
As far as I can tell the shape should be correct given how the dataset is split? Any help is greatly appreciated, thanks!
The dataset can be found here: https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data
from keras.layers import Dense
from keras.models import Sequential
import keras.utils
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
# seed weights
np.random.seed(3)
# import dataset
data = pd.read_csv('agaricus-lepiota.csv', delimiter=',')
# encode labels as integers so the can be one-hot-encoded which takes int matrix
le = preprocessing.LabelEncoder()
data = data.apply(le.fit_transform)
# one-hot-encode string data (now type int)
ohe = preprocessing.OneHotEncoder(sparse=False)
data = ohe.fit_transform(data)
X = data[:, 1:23]
Y = data[:, 0:1]
# split into test and train set
x_train, y_train, x_test, y_test = train_test_split(X, Y, test_size=.2, random_state=5)
# create model
model = Sequential()
model.add(Dense(500, input_dim=22, activation='relu'))
model.add(Dense(300, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(2, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=1000, batch_size=25)

I found 2 errors in your code.
1)
x_train, y_train, x_test, y_test = train_test_split(X, Y, test_size=.2, random_state=5)
must be
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=.2, random_state=5)
check this to learn more about the function.
2)
You have only one column in y_train. But the last layer in your model adds two columns. So instead of
model = Sequential()
model.add(Dense(500, input_dim=22, activation='relu'))
model.add(Dense(300, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(2, activation='sigmoid'))
use this:
model = Sequential()
model.add(Dense(500, input_dim=22, activation='relu'))
model.add(Dense(300, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(25, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

Related

Keras model not compiling

I am trying to build a Keras model for a classification model and I get and error while I am trying to fit the data.
ValueError: Shapes (None, 99) and (None, 2) are incompatible
Code:
import warnings
warnings.filterwarnings(action = 'ignore')
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
df = pd.read_csv('train.csv')
del df['ST_CASE']
df
target_column = ['MVISOBSC']
predictors = list(set(list(df.columns))-set(target_column))
df[predictors] = df[predictors]/df[predictors].max()
X = df[predictors].values
y = df[target_column].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
print(X_train.shape); print(X_test.shape)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
model = Sequential()
model.add(Dense(500, activation='relu', input_dim=6))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
try:
model.fit(X_train, y_train, epochs = 20)
except Exception as e:
print(e)
Shape values:
X_train = (1282, 6)
X_test = (550, 6)
y_train = (1282)
y_test = (550)
I have also tried reshaping the X_train and X_test, but it does not have any effect on the error.
The no. of units in the last Dense layer must match the dimensionality of the outputs.
# Reshape the labels
y_train = np.expand_dims( y_train , axis=1 )
y_test = np.expand_dims( y_test , axis=1 )
model = Sequential()
model.add(Dense(500, activation='relu', input_dim=6))
model.add(Dense(100, activation='relu'))
model.add(Dense(50, activation='relu'))
model.add(Dense(1, activation='softmax'))
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])

Unable to pass appropriate shape to tensor flow model with tf.data.Dataset.from_generator

When using tf.data.Dataset.from_generator API to generate train and test datasets. Not able to pass the appropriate shape to the Tensorflow model.
Following is my code
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
import pandas as pd
def fetchValuesFromDatabase(path):
df = pd.read_csv(path, header=None)
rows_from_csv = df.values[:]
rows_list = rows_from_csv.tolist()
rows_list = rows_list[1:]
def castFunction(val):
try:
return int(val)
except:
return int(float(val))
result_column = [list(map(lambda x: castFunction(x), value[-1])) for value in rows_list]
train_columns = [list(map(lambda x: castFunction(x), value[3:-1])) for value in rows_list]
print(train_columns)
X_train, X_test, y_train, y_test = train_test_split(train_columns, result_column, test_size=0.20, shuffle=True)
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(32)
return train_dataset, test_dataset
def createModel():
model = Sequential()
model.add(Dense(10, input_shape=(10,), activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = createModel()
train_dataset, test_dataset = fetchValuesFromDatabase("ModifiedHrTrainData.csv")
model.fit(train_dataset, epochs=10, validation_data=test_dataset)
Getting following error.
ValueError: Input 0 of layer sequential is incompatible with the layer: expected axis -1 of input shape to have value 10 but received input with shape [10, 1]
The same error doesn't occur if tf.data.Dataset.from_tensor_slices is used and passed to the model. Need help in achieving the same with tf.data.Dataset.from_generator API.
Following is the dataset link
https://mega.nz/file/DZkVWSTT#MhjiuFcDMbe80gZ34AkMCjWD3h3y87ytpn9q4AT1bu4
Please help me understand the issue.
Add keras_input function to your model tf.keras.Input(shape=(10,))`
model = tf.keras.models.Sequential()
model.add(tf.keras.Input(shape=(10,)))
model.add(Dense(10, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model

text binary classification error:logits and labels must have the same shape

I'm trying to bulid a model to classify my text to hate (1) or not (0) using nn.
Information about the data, it's consists of tweets and class label (hate (1) or not (0)):
sentences = df['comment']
y = df['isHate']
sentences_train, sentences_test, train_y, test_y = train_test_split(sentences, y, test_size=0.25, random_state=42)
the text get through a lot of Word Embeddings and I applied pad sequences on the tweets and LabelEncoder on the labels.
the problem is when I do the run I get this error:
ValueError: logits and labels must have the same shape ((None, 1) vs (None, 2))
the code of the model:
emb_dim = 16
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim= emb_dim, input_length=maxlen))
model.add(Flatten())
model.add(Dense(2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
the problem happened in this part:
history = model.fit(X_train, y_train,
batch_size=32,
epochs=15,
validation_data=(X_test, y_test))
Any help?
In your code:
model.add(Dense(1, activation='sigmoid'))
Your last dense layer has only 1 unit but your labels are one hot encoded which consist of 2 classes. So you need to change:
model.add(Dense(2, activation='softmax'))
You also need to change your loss function, because they are one-hot-encoded:
loss='categorical_crossentropy'

pandas dataframe to tensorflow input

i want to use a pandas dataset as an input to a neural net.
my neural net model is:
def build_model():
model = Sequential()
model.add(Dense(128, activation = "relu"))
model.add(Dropout(0.2))
model.add(Dense(64, activation = "relu"))
model.add(Dropout(0.1))
model.add(Dense(32, activation = "softmax"))
model.compile(
optimizer='adam',
loss=['binary_crossentropy'],
metrics=['accuracy']
)
return model
tensorboard = TensorBoard(log_dir=f"logs/{time.time()}", histogram_freq=1)
model = build_model()
history = model.fit(
x_train,
y_train,
epochs=5,
batch_size=32,
validation_data=(
x_val,
y_val
),
callbacks=[
tensorboard
]
)
and i pass my dataframe as input as such:
y_val, x_val, y_train, x_train = test_data.drop(['gender',
'comorbidities_count', 'comorbidities_significant_count',
'medication_count'],axis=1),test_data.drop(['fried'],axis=1),training_data.drop([ 'gender', 'comorbidities_count', 'comorbidities_significant_count',
'medication_count'],axis=1),training_data.drop(['fried'],axis=1)
but i get this error:
ValueError: Please provide as model inputs either a single array or a list of arrays.
Does anyone know hot to turn this dataframe into an array so i can feed it? Or is there some other issue i am not in knowledge of?
Use
y_val, x_val, y_train, x_train = test_data.drop(['gender',
'comorbidities_count', 'comorbidities_significant_count',
'medication_count'],axis=1).to_numpy().astype(np.float32) ,test_data.drop(['fried'],axis=1).to_numpy().astype(np.float32) ,training_data.drop([ 'gender', 'comorbidities_count', 'comorbidities_significant_count',
'medication_count'],axis=1).to_numpy().astype(np.float32) ,training_data.drop(['fried'],axis=1).to_numpy().astype(np.float32)
The .to_numpy() function of a pd dataframe turns it into a numpy array.

Why did this Keras python program fail?

I followed a tutorial on youtube and I accidentally didn't add model.add(Dense(6, activation='relu')) on Keras and I got 36% accuracy. After I added this code it rised to 86%. Why did this happen?
This is the code
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
np.random.seed(3)
classifications = 3
dataset = np.loadtxt('wine.csv', delimiter=",")
X = dataset[:,1:14]
Y = dataset[:,0:1]
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.66,
random_state=5)
y_train = keras.utils.to_categorical(y_train-1, classifications)
y_test = keras.utils.to_categorical(y_test-1, classifications)
model = Sequential()
model.add(Dense(10, input_dim=13, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(6, activation='relu')) # This is the code I missed
model.add(Dense(6, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(2, activation='relu'))
model.add(Dense(classifications, activation='softmax'))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=
['accuracy'])
model.fit(x_train, y_train, batch_size=15, epochs=2500, validation_data=
(x_test, y_test))
Number of layers is an hyper parameter just like learning rate,no of neurons.
These play an important role in determining the accuracy.
So in your case.
model.add(Dense(6, activation='relu'))
This layer played the key roll.
We cannot understand what exactly these layers are actually doing.
The best we can do is to do hyper parameter tuning to get the best combination of hyper parameters.
In my opinion, maybe it's the ratio of your training set to your test set. You have 66% of your test set, so it's possible that training with this model will be under fitting. So one less layer of dense will have a greater change in the accuracy . You put test_size = 0.2 and try again the change in the accuracy of the missing layer.

Categories

Resources