I am new to programming. I am trying to classify two classes (Crash, Non-Crash) based on two features (Length, Traffic_Volume) using 1 dimensional CNN. When I am trying to train the following model,
# Training and Testing Data
X_train, y_train = train[['Traffic_Volume', 'length']].values, train['Crash'].values
X_test, y_test = SH[['Traffic_Volume', 'length']].values, SH['Crash'].values
print ('Training data shape : ', X_train.shape, y_train.shape)
print ('Testing data shape : ', X_test.shape, y_test.shape)
# Training data shape : (316, 2) (316,)
# Testing data shape : (343, 2) (343,)
# Fit and Evaluate a Model
def baseline_model(n_features=343, seed=100):
numpy.random.seed(seed)
# set_random_seed(seed)
tensorflow.random.set_seed(seed)
# create model
model = Sequential()
model.add(Conv1D(32, 3, padding = "same", input_shape=(343, 2)))
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))#
model.add(Dense(64, activation='relu'))#
model.add(Dense(2))
model.add(Activation('softmax'))
# Compile model
numpy.random.seed(seed)
tensorflow.random.set_seed(seed)
model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])
print (model.summary())
return model
# Classification
n_features=2
n_classes=2
batch_size=10
from multi_adaboost_CNN import AdaBoostClassifier as Ada_CNN
n_estimators =10
epochs =1
bdt_real_test_CNN = Ada_CNN(
base_estimator=baseline_model(n_features=n_features),
n_estimators=n_estimators,
learning_rate=1,
epochs=epochs)
bdt_real_test_CNN.fit(X_train, y_train, batch_size)
y_pred_CNN = bdt_real_test_CNN.predict(X_train)
print('\n Training accuracy of bdt_real_test_CNN (AdaBoost+CNN): {}'.format(accuracy_score(bdt_real_test_CNN.predict(X_train),y_train)))
I found this ValueError:
ValueError: Input 0 of layer sequential is incompatible with the layer: : expected min_ndim=3, found ndim=2. Full shape received: (None, 2)
I want to know what I should change to get an efficient model (Data.shape, n_features, n_classes, etc.)?
Related
I'm trying to make a binary Classification by combining CNN (con1D) with GRU. my dataset dataset is like that :
X_train shape : (223461, 5)
y_train shape :(223461,)
the X_train is like that and the Y_train is a labels (0,1) like that
first I convert that train dataset :
dataset = X_train.values
dataset=dataset[1:]
dataset = dataset.astype('float32')
dataset
the same for y-train:
dataset_target = y_train.values
dataset_target=dataset_target[1:]
dataset_target = dataset_target.astype('float32')
dataset_target
now the shapes are dataset.shape =(223460, 5)
, dataset_target.shape = (223460,)
than my model structure is :
verbose, epochs, batch_size = 0, 100, 64
n_timesteps, n_features, n_outputs = dataset.shape[0], dataset.shape[1], dataset_target.shape[0]
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape= (n_timesteps,n_features)))
model.add(MaxPooling1D(pool_size=2))
model.add(GRU(64))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(1, activation='sigmoid'))
opt = Adam(learning_rate=0.01)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt , metrics=['accuracy'])
model.summary()
and when I want to fit dataset to my model:
# fit network
model.fit(dataset, dataset_target, epochs=epochs, batch_size=batch_size, verbose=1)
# evaluate model
_, accuracy = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=1)
#accuracy
I get an error Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 223460, 5), found shape=(64, 5)
Is the first axis of the dataset (233460 samples) actually time steps, and do you have 5 'channels' of data? In that case, it would help if you slice the dataset along the first axis and then assign to each 'slice' single label, for example, the last value related to the slice from the y_train. In that case, n_timesteps would be the length of the slice, and the shape of the dataset something like (n_samples, n_timesteps, 5). Basically, Conv1D expects each training sample to be 2D, but in you case it's 1D, because the first dimension is just a number of samples.
I might have interpreted the dataset the wrong way. In that case, please clarify how it works so I would fix my suggestion.
Here's the example:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Conv1D, MaxPooling1D, GRU, \
Dropout, Flatten, Dense
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
import numpy as np
X_train = np.random.normal(0, 1, (223461, 5))
y_train = np.random.randint(0, 2, 223461)
dataset = X_train[1:]
dataset_target = y_train[1:]
n_timesteps = 10
# Slice dataset and target
dataset = np.stack(np.split(dataset, n_timesteps)[:-1])
dataset_target = np.stack([y[-1] for y in np.split(dataset_target, n_timesteps)[:-1]])
Define and train the model:
def get_model(dataset, n_timesteps):
verbose, epochs, batch_size = 0, 100, 64
n_timesteps, n_features = dataset.shape[1], dataset.shape[2]
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape = (n_timesteps, n_features)))
model.add(MaxPooling1D(pool_size=2))
model.add(GRU(64))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(1, activation='sigmoid'))
opt = Adam(learning_rate=0.01)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt , metrics=['accuracy'])
model.summary()
return model
verbose, epochs, batch_size = 0, 1, 64
model = get_model(dataset, n_timesteps)
model.fit(dataset, dataset_target, epochs=epochs, batch_size=batch_size, verbose=1)
Hope it helps!
I am trying to build a 1D CNN for numerical dataset. My dataset has 520 rows and 13 features. Here is the code below.
It gives
"ValueError: Input 0 of layer sequential_21 is incompatible with the layer: : expected min_ndim=3, found ndim=2. Full shape received: (1, 13)" error.
How do I need to set input shape, or do I have to reshape X_train? Any help is highly appreciated.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
n_features = 13
model = Sequential()
model.add(Conv1D(filters=1, kernel_size=1, activation='relu', input_shape=(1, n_features)))
model.add(Conv1D(filters=1, kernel_size=1, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=1))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=20, batch_size=1)
yhat_classes = model.predict_classes(testX, verbose=0)
The problem is that batching your numpy dataset yields rows. Now you use batch size 1, so the generator yields 1 row resulting in a array of shape (1, n_features) but you want shape (batch_size, 1, n_features).
Adding a dimension to the dataset before spliting it should fix the problem
X = X.reshape(-1, 1, n_features)
I'm trying to bulid a model to classify my text to hate (1) or not (0) using nn.
Information about the data, it's consists of tweets and class label (hate (1) or not (0)):
sentences = df['comment']
y = df['isHate']
sentences_train, sentences_test, train_y, test_y = train_test_split(sentences, y, test_size=0.25, random_state=42)
the text get through a lot of Word Embeddings and I applied pad sequences on the tweets and LabelEncoder on the labels.
the problem is when I do the run I get this error:
ValueError: logits and labels must have the same shape ((None, 1) vs (None, 2))
the code of the model:
emb_dim = 16
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim= emb_dim, input_length=maxlen))
model.add(Flatten())
model.add(Dense(2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
the problem happened in this part:
history = model.fit(X_train, y_train,
batch_size=32,
epochs=15,
validation_data=(X_test, y_test))
Any help?
In your code:
model.add(Dense(1, activation='sigmoid'))
Your last dense layer has only 1 unit but your labels are one hot encoded which consist of 2 classes. So you need to change:
model.add(Dense(2, activation='softmax'))
You also need to change your loss function, because they are one-hot-encoded:
loss='categorical_crossentropy'
I'm trying to build a 1D CNN model by processing ECG signals to diagnose sleep apnea.
I am using the sklearn library and encountered an error in train_test_split. Here is my code:
# loading the file
with open("ApneaData.csv") as csvDataFile:
csvReader = csv.reader(csvDataFile)
for line in csvReader:
lis.append(line[0].split()) # create a list of lists
# making a list of all x-variables
for i in range(1, len(lis)):
data.append(list(map(int, lis[i])))
# a list of all y-variables (either 0 or 1)
target = Extract(data) # sleep apn or not
# converting to numpy arrays
data = np.array(data)
target = np.array(target)
# stacking data into 3D
loaded = dstack(data)
change = dstack(target)
trainX, testX, trainy, testy = train_test_split(loaded, change, test_size=0.3)
# the model
verbose, epochs, batch_size = 0, 10, 32
n_timesteps, n_features, n_outputs = trainX.shape[0], trainX.shape[1], trainy.shape[0]
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fitting the model
model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
I get the error:
ValueError: Error when checking input: expected conv1d_15_input to have shape (11627, 6001) but got array with shape (6001, 1)
I don't understand what I'm doing wrong? Any help would be much appreciated.
I think that n_timesteps and n_features should be shape[1] and shape[2], the first dimension is your number of samples
First,
# a list of all y-variables (either 0 or 1)
target = Extract(data) # sleep apn or not
This suggets you're doing a binary classification, and it seems you haven't applied one-hot-encoding. So, you last layer should be sigmoid.
the first dimension denotes number of samples. So,
trainX = tranX.reshape(trainX.shape[0], trainX.shape[1], -1)
(add a third dimension if not there already)
n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], 1
Finally, change your model.
model.add(Dense(n_outputs, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
I am trying to run a LSTM using Keras on my custom features set. I have train and test features in separate files. Each csv file contains 11 columns with last column as class label. There are total 40 classes in my dataset. The problem is I am not able to figure out the correct input_shape to the first layer. I had explored all the stackoverflow and github but still not able to solve this
Below is my complete code.
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
numpy.random.seed(7)
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=X_train.shape))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)
Whatever I change in input_shape parameter wither I get error in first LSTM layer of in fit method.
you don't have a time dimension in your input.
Input for RNN should be (batch_size, time_step, features) while your input has dimension (batch_size, features).
If you want to use your 10 columns one at a time you should reshape the array with
numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
Try this code:
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
train_dataset = numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
test_dataset = numpy.reshape(test_dataset, (-1, train_dataset.shape[1], 1))
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)