I'm trying to build a 1D CNN model by processing ECG signals to diagnose sleep apnea.
I am using the sklearn library and encountered an error in train_test_split. Here is my code:
# loading the file
with open("ApneaData.csv") as csvDataFile:
csvReader = csv.reader(csvDataFile)
for line in csvReader:
lis.append(line[0].split()) # create a list of lists
# making a list of all x-variables
for i in range(1, len(lis)):
data.append(list(map(int, lis[i])))
# a list of all y-variables (either 0 or 1)
target = Extract(data) # sleep apn or not
# converting to numpy arrays
data = np.array(data)
target = np.array(target)
# stacking data into 3D
loaded = dstack(data)
change = dstack(target)
trainX, testX, trainy, testy = train_test_split(loaded, change, test_size=0.3)
# the model
verbose, epochs, batch_size = 0, 10, 32
n_timesteps, n_features, n_outputs = trainX.shape[0], trainX.shape[1], trainy.shape[0]
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fitting the model
model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
I get the error:
ValueError: Error when checking input: expected conv1d_15_input to have shape (11627, 6001) but got array with shape (6001, 1)
I don't understand what I'm doing wrong? Any help would be much appreciated.
I think that n_timesteps and n_features should be shape[1] and shape[2], the first dimension is your number of samples
First,
# a list of all y-variables (either 0 or 1)
target = Extract(data) # sleep apn or not
This suggets you're doing a binary classification, and it seems you haven't applied one-hot-encoding. So, you last layer should be sigmoid.
the first dimension denotes number of samples. So,
trainX = tranX.reshape(trainX.shape[0], trainX.shape[1], -1)
(add a third dimension if not there already)
n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], 1
Finally, change your model.
model.add(Dense(n_outputs, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
Related
I'm trying to make a binary Classification by combining CNN (con1D) with GRU. my dataset dataset is like that :
X_train shape : (223461, 5)
y_train shape :(223461,)
the X_train is like that and the Y_train is a labels (0,1) like that
first I convert that train dataset :
dataset = X_train.values
dataset=dataset[1:]
dataset = dataset.astype('float32')
dataset
the same for y-train:
dataset_target = y_train.values
dataset_target=dataset_target[1:]
dataset_target = dataset_target.astype('float32')
dataset_target
now the shapes are dataset.shape =(223460, 5)
, dataset_target.shape = (223460,)
than my model structure is :
verbose, epochs, batch_size = 0, 100, 64
n_timesteps, n_features, n_outputs = dataset.shape[0], dataset.shape[1], dataset_target.shape[0]
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape= (n_timesteps,n_features)))
model.add(MaxPooling1D(pool_size=2))
model.add(GRU(64))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(1, activation='sigmoid'))
opt = Adam(learning_rate=0.01)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt , metrics=['accuracy'])
model.summary()
and when I want to fit dataset to my model:
# fit network
model.fit(dataset, dataset_target, epochs=epochs, batch_size=batch_size, verbose=1)
# evaluate model
_, accuracy = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=1)
#accuracy
I get an error Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 223460, 5), found shape=(64, 5)
Is the first axis of the dataset (233460 samples) actually time steps, and do you have 5 'channels' of data? In that case, it would help if you slice the dataset along the first axis and then assign to each 'slice' single label, for example, the last value related to the slice from the y_train. In that case, n_timesteps would be the length of the slice, and the shape of the dataset something like (n_samples, n_timesteps, 5). Basically, Conv1D expects each training sample to be 2D, but in you case it's 1D, because the first dimension is just a number of samples.
I might have interpreted the dataset the wrong way. In that case, please clarify how it works so I would fix my suggestion.
Here's the example:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.layers import Conv1D, MaxPooling1D, GRU, \
Dropout, Flatten, Dense
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
import numpy as np
X_train = np.random.normal(0, 1, (223461, 5))
y_train = np.random.randint(0, 2, 223461)
dataset = X_train[1:]
dataset_target = y_train[1:]
n_timesteps = 10
# Slice dataset and target
dataset = np.stack(np.split(dataset, n_timesteps)[:-1])
dataset_target = np.stack([y[-1] for y in np.split(dataset_target, n_timesteps)[:-1]])
Define and train the model:
def get_model(dataset, n_timesteps):
verbose, epochs, batch_size = 0, 100, 64
n_timesteps, n_features = dataset.shape[1], dataset.shape[2]
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape = (n_timesteps, n_features)))
model.add(MaxPooling1D(pool_size=2))
model.add(GRU(64))
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(1, activation='sigmoid'))
opt = Adam(learning_rate=0.01)
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=opt , metrics=['accuracy'])
model.summary()
return model
verbose, epochs, batch_size = 0, 1, 64
model = get_model(dataset, n_timesteps)
model.fit(dataset, dataset_target, epochs=epochs, batch_size=batch_size, verbose=1)
Hope it helps!
I am new to programming. I am trying to classify two classes (Crash, Non-Crash) based on two features (Length, Traffic_Volume) using 1 dimensional CNN. When I am trying to train the following model,
# Training and Testing Data
X_train, y_train = train[['Traffic_Volume', 'length']].values, train['Crash'].values
X_test, y_test = SH[['Traffic_Volume', 'length']].values, SH['Crash'].values
print ('Training data shape : ', X_train.shape, y_train.shape)
print ('Testing data shape : ', X_test.shape, y_test.shape)
# Training data shape : (316, 2) (316,)
# Testing data shape : (343, 2) (343,)
# Fit and Evaluate a Model
def baseline_model(n_features=343, seed=100):
numpy.random.seed(seed)
# set_random_seed(seed)
tensorflow.random.set_seed(seed)
# create model
model = Sequential()
model.add(Conv1D(32, 3, padding = "same", input_shape=(343, 2)))
model.add(Activation('relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))#
model.add(Dense(64, activation='relu'))#
model.add(Dense(2))
model.add(Activation('softmax'))
# Compile model
numpy.random.seed(seed)
tensorflow.random.set_seed(seed)
model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])
print (model.summary())
return model
# Classification
n_features=2
n_classes=2
batch_size=10
from multi_adaboost_CNN import AdaBoostClassifier as Ada_CNN
n_estimators =10
epochs =1
bdt_real_test_CNN = Ada_CNN(
base_estimator=baseline_model(n_features=n_features),
n_estimators=n_estimators,
learning_rate=1,
epochs=epochs)
bdt_real_test_CNN.fit(X_train, y_train, batch_size)
y_pred_CNN = bdt_real_test_CNN.predict(X_train)
print('\n Training accuracy of bdt_real_test_CNN (AdaBoost+CNN): {}'.format(accuracy_score(bdt_real_test_CNN.predict(X_train),y_train)))
I found this ValueError:
ValueError: Input 0 of layer sequential is incompatible with the layer: : expected min_ndim=3, found ndim=2. Full shape received: (None, 2)
I want to know what I should change to get an efficient model (Data.shape, n_features, n_classes, etc.)?
I'm trying to bulid a model to classify my text to hate (1) or not (0) using nn.
Information about the data, it's consists of tweets and class label (hate (1) or not (0)):
sentences = df['comment']
y = df['isHate']
sentences_train, sentences_test, train_y, test_y = train_test_split(sentences, y, test_size=0.25, random_state=42)
the text get through a lot of Word Embeddings and I applied pad sequences on the tweets and LabelEncoder on the labels.
the problem is when I do the run I get this error:
ValueError: logits and labels must have the same shape ((None, 1) vs (None, 2))
the code of the model:
emb_dim = 16
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim= emb_dim, input_length=maxlen))
model.add(Flatten())
model.add(Dense(2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
model.summary()
the problem happened in this part:
history = model.fit(X_train, y_train,
batch_size=32,
epochs=15,
validation_data=(X_test, y_test))
Any help?
In your code:
model.add(Dense(1, activation='sigmoid'))
Your last dense layer has only 1 unit but your labels are one hot encoded which consist of 2 classes. So you need to change:
model.add(Dense(2, activation='softmax'))
You also need to change your loss function, because they are one-hot-encoded:
loss='categorical_crossentropy'
I changed the data type but I could not resolve the error.
I tried One-Hot Encoding but it doesn't work too.
I don't know what's wrong:(
seed = 0
np.random.seed(seed)
tf.set_random_seed(seed)
df = pd.read_csv('HW01_dataset_tae.txt', sep=',' ,header=None, names = ["Native", "Instructor", "Course", "Semester", "Class Size", "Evaluation"])
dataset = df.values # dataframe to int64
X = dataset[:,0:5] # attribute
Y_Eva = dataset[:,5] # class
e = LabelEncoder()
e.fit(Y_Eva)
Y = e.transform(Y_Eva)
K = 10
kFold = StratifiedKFold(n_splits=K, shuffle=True, random_state=seed)
accuracy = []
for train_index, test_index in kFold.split(X,Y):
model = Sequential()
model.add(Dense(16, input_dim=5, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.compile(loss='mean_squared_error',
optimizer='adam',
metrics=['accuracy'])
model.fit(X[train_index], Y[train_index], epochs=100, batch_size=2)
the error ; Error when checking target: expected dense_2 to have shape (3,) but got array with shape (1,)
is detected at here ; model.fit(X[train_index], Y[train_index], epochs=100, batch_size=2).
What shout I do?
I solved the problem.
At this code,
model.fit(X[train_index], Y[train_index], epochs=100, batch_size=2)
the number of rows in 'Y[train_index]' must be three because the classes are three.
The error came out since each Y[train_index] has only one row.
So, I used One-Hot Encoding and changed the code like this.
e = LabelEncoder()
e.fit(Y_Eva)
Y = e.transform(Y_Eva)
Y_encoded = np_utils.to_categorical(Y) # changed code
K = 10
kFold = StratifiedKFold(n_splits=K, shuffle=True, random_state=seed)
accuracy = []
for train_index, test_index in kFold.split(X,Y):
model = Sequential()
model.add(Dense(32, input_dim=5, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.fit(X[train_index], Y_encoded[train_index], epochs=100, batch_size=2) # changed code
Finally, I was able to run the code.
TensorFlow has made some documenation on the dense layer, and if you then instead of saying input_dim says input_shape you can specify the prefered shape.
https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense
model = Sequential()
model.add(Dense(16, input_shape=(5,))) # Then your data has to be of shape (batch x 5)
When you then are adding another dense layer, you actaully don't have to provide the input_sahpe
model.add(Dense(10))
Note: First time posting. I've tried to be thorough in my description
I've been trying to set up what I thought would be a very simple CNN by following this tutorial:
https://machinelearningmastery.com/cnn-models-for-human-activity-recognition-time-series-classification/
My Xtrain dataset is a time series as a numpy array with 34396 rows (samples) and 600 columns (time steps). My Ytrain dataset is just an array containing labels 0,1, or 2 (as ints). I'm just trying to use the CNN to perform multi-classification.
I'm running into an issue getting errors like
Input 0 is incompatible with layer conv1d_39: expected ndim=3, found
ndim=4
when input_shape=(n_timesteps,n_features,n_outputs)
or
Error when checking input: expected conv1d_40_input to have 3
dimensions, but got array with shape (34396, 600)
when input_shape=(n_timesteps,n_features)
I've been searching online for hours now but I can't seem to find a solution to my problem. I think its a simple problem with my data format and the input_shape values but I haven't been able to fix it.
I've tried setting input_shape to
(None, 600, 1)
(34396,600, 1)
(34396,600)
(None,600)
among various other combinations.
train_df = pd.read_csv('training.csv')
test_df = pd.read_csv('test.csv')
x_train=train_df.iloc[:,2:].values
y_train=train_df.iloc[:,1].values
x_test=train_df.iloc[:,2:].values
y_test=train_df.iloc[:,1].values
n_rows=len(x_train)
n_cols=len(x_train[0])
def evaluate_model(trainX, trainy, testX, testy):
verbose, epochs, batch_size = 0, 10, 32
n_timesteps, n_features, n_outputs = trainX.shape[0], trainX.shape[1], 3
print(n_timesteps, n_features, n_outputs)
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features,n_outputs)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
_, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
return accuracy
evaluate_model(x_train,y_train,x_test,y_test)
As given in the keras doc, for Conv1D, for example input_shape=(10, 128) for time series sequences of 10 time steps with 128 features per step.
So for your case since you have 600 timesteps each of 1 feature it should be input_shape=(600,1).
Also you have to feed your labels y's as one-hot-encoded.
Working code
from keras.utils import to_categorical
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(600,1)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam', metrics=['accuracy'])
x = np.random.randn(100,600)
y = np.random.randint(0,10, size=(100))
# Reshape to no:of sample, time_steps, 1 and convert y to one hot encoding
model.fit(x.reshape(100,600,1), to_categorical(y))
# Same as model.fit(np.expand_dims(x, 2), to_categorical(y))
Output:
Epoch 1/1
100/100 [===========================] - 0s 382us/step - loss: 2.3245 - acc: 0.0800