I am trying to train a neural network to predict the next point of a sine wave using MLPRegressor, looking at performances I cannot get the R2 to be above 0.90, any help would be greatly appreciated!
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd
# Create an `np.array` containing a sequence of 10 values of the sine function.
seq = np.array([np.sin(2*np.pi*t/10) for t in range(10)])
#generate 100 pairs of two-point sequences from the values in `seq`.
num_sequences = 100
x_train = np.array([])
y_train = np.array([])
for i in range(num_sequences):
rand = np.random.randint(10)
x_train = np.append(x_train, [[seq[rand], seq[np.mod(rand+1, 10)]]])
y_train = np.append(y_train, seq[np.mod(rand+1, 10)])
x_train = np.resize(x_train, (100, 2))
x_test = np.array([seq, np.roll(seq, -1)])
y_test = np.array(np.roll(seq, -2))
x_test = np.reshape(x_test, (-1, 2))
# Place the data in a `pandas` `DataFrame`.
pdata = pd.DataFrame({'x1':x_train[:,0], 'x2':x_train[:,1],'y':y_train})
reg_model = MLPRegressor(hidden_layer_sizes = (64,64,64), activation = "relu", random_state = 2, max_iter = 400)
reg_model.fit(x_train, y_train)
y_pred = reg_model.predict(x_test)
print(' R2 = ', r2_score(y_pred, y_test))
print('MAE = ', mean_absolute_error(y_pred, y_test))
print('MSE = ', mean_squared_error(y_pred, y_test))
Your data was not correctly set up.
You can use this code to create a sine wave:
sin_wave = np.sin(np.arange(-1000, 1000, 0.1))
Then use this code to create data for training and testing:
input_seq = np.array([sin_wave[x:x+2] for x in range(len(sin_wave)-2)])
output = np.array([sin_wave[x+2] for x in range(len(sin_wave)-2)])
x_train = input_seq[:1500]
y_train = output[:1500]
x_test = input_seq[1500:]
y_test = output[1500:]
Finally, use this code to build, train and evaluate your model:
reg_model = MLPRegressor(hidden_layer_sizes = (64,64,64), activation = "relu", random_state = 2, max_iter = 400)
reg_model.fit(x_train, y_train)
y_pred = reg_model.predict(x_test)
print(' R2 = ', r2_score(y_pred, y_test))
print('MAE = ', mean_absolute_error(y_pred, y_test))
print('MSE = ', mean_squared_error(y_pred, y_test))
The complete code:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np
sin_wave = np.sin(np.arange(-1000, 1000, 0.1))
input_seq = np.array([sin_wave[x:x+2] for x in range(len(sin_wave)-2)])
output = np.array([sin_wave[x+2] for x in range(len(sin_wave)-2)])
# Split into Train and test
x_train = input_seq[:1500]
y_train = output[:1500]
x_test = input_seq[1500:]
y_test = output[1500:]
reg_model = MLPRegressor(hidden_layer_sizes = (64,64,64), activation = "relu", random_state = 2, max_iter = 400)
reg_model.fit(x_train, y_train)
y_pred = reg_model.predict(x_test)
print(' R2 = ', r2_score(y_pred, y_test))
print('MAE = ', mean_absolute_error(y_pred, y_test))
print('MSE = ', mean_squared_error(y_pred, y_test))
Related
I am trying to generate a LSTM model using Keras. I create a simple sine wave example which contain more thang 1000 point to predict the next point. But the result is not good as i expected. When i fit the model the result is moves between 0~1 not like the sine wave. I have tried to change parameter like epoch, batchsize, learning rate, but it is not better.
model predict image
What am I doing wrong?
import joblib
import numpy as np
import matplotlib.pyplot as plt
import copy
import gc
import os
import sys
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from keras.callbacks import Callback
learning_rate = 0.001
len_train = 30
total_predict = 300
len_test = 400
epoch = 100
batch_size = 32
workers = -1
class Callback_Class(Callback):
def load_data(self, x_test, y_test):
self.x_test = x_test
self.y_test = np.array(y_test)
def model_predict(self, data_close):
output_predict = []
for i in range(total_predict):
if (i==0):
data_close_ = data_close.reshape(-1, len_train, 1)
else:
data_close_ = np.delete(data_close_, 0)
data_close_ = np.append(data_close_, pred_close)
data_close_ = data_close_.reshape(-1, len_train, 1)
pred_close = model.predict(data_close_)
pred_close = pred_close.ravel()
pred_close = np.array(pred_close).reshape(len(pred_close), 1)
pred_cl = sc.inverse_transform(pred_close)
output_predict.append(pred_cl)
output_predict = np.array(output_predict)
return output_predict
def on_epoch_end(self, epoch, logs=None):
if (epoch % 20 == 0):
output_predict = self.model_predict(self.x_test)
fig, ax = plt.subplots(figsize=(12,6))
ax.grid(True)
plt.title(f"Model predict")
plt.plot(output_predict.ravel(), color="red", label='Predict')
plt.plot(self.y_test.ravel(), color="blue", label='REAL')
fig.tight_layout()
plt.legend(loc='lower left')
plt.savefig(f'Demo_lstm_epoch_{epoch}.png')
plt.clf()
plt.close()
def lstm_reg(input_shape=(60, 1), unit=40, clustering_params=None):
inputs = Input(input_shape)
lstm1f = Bidirectional(LSTM(units=32, return_sequences=True))(inputs)
lstm1f = Bidirectional(LSTM(units=32, return_sequences=False))(lstm1f)
outputs = Dense(units=1, activation='linear')(lstm1f)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mean_squared_error', metrics=["accuracy"])
return model
def create_data_train(data_time_series):
data_time_series = np.array(data_time_series).ravel()
X_train = []
y_train = []
for i in range(len_train, len(data_time_series)):
X_train.append(data_time_series[i-len_train:i])
y_train.append(data_time_series[i])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
return X_train, y_train
x = np.linspace(-20*np.pi, 20*np.pi, 2001)
sin_alpha = np.sin(x).ravel()
sin_alpha_train = np.array(copy.deepcopy(sin_alpha))[:len(sin_alpha)-len_test]
sin_alpha_train = np.array(sin_alpha_train).reshape(len(sin_alpha_train), 1)
sc = MinMaxScaler(feature_range=(0, 1))
sin_alpha_train = sc.fit_transform(sin_alpha_train)
X_train, y_train = create_data_train(sin_alpha_train)
joblib.dump(sc, f'Demo_MinMaxScaler.gz')
sc = joblib.load(f"Demo_MinMaxScaler.gz")
X_test = np.array(copy.deepcopy(sin_alpha))[len(sin_alpha)-len_test:len(sin_alpha)-len_test+len_train]
X_test = np.array(X_test).reshape(len(X_test), 1)
X_test = sc.fit_transform(X_test)
y_test = np.array(copy.deepcopy(sin_alpha))[len(sin_alpha)-len_test+len_train:len(sin_alpha)-len_test+len_train+total_predict]
model = lstm_reg(input_shape=(len_train, 1), unit=int(2*(len_train+len(y_train))/3))
model.summary()
callback_class = Callback_Class()
callback_class.load_data(X_test, y_test)
model.fit(X_train, y_train, epochs=epoch, use_multiprocessing=True, verbose=1, callbacks=[callback_class], workers=workers, batch_size=batch_size)
It seems like you are normalizing your features and your labels in these lines
sc = MinMaxScaler(feature_range=(0, 1))
sin_alpha_train = sc.fit_transform(sin_alpha_train)
X_train, y_train = create_data_train(sin_alpha_train)
Try it without scaling your label set. Due to your output layer using the linear activation function, which is correct as you're working on a regression problem, the model should be able to handle non scaled labels. The model only learns your data in a range of 0 to 1 while your sine wave goes from -1 to 1.
Here is my code.
import pandas as pd
import numpy as np
import json
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler
training_data = pd.read_csv('/Users/aus10/Desktop/MLB_Data/Test_Training_Data/MLB_Training_Data.csv')
df_model = training_data.copy()
scaler = StandardScaler()
features = [['OBS', 'Runs']]
for feature in features:
df_model[feature] = scaler.fit_transform(df_model[feature])
test_data = pd.read_csv('/Users/aus10/Desktop/MLB_Data/Test_Training_Data/Test_Data.csv')
X = training_data.iloc[:,1] #independent columns
y = training_data.iloc[:,-1] #target column
X = X.values.reshape(-1,1)
results = []
# fit final model
model = XGBRegressor(objective="reg:squarederror", random_state=42)
model.fit(X, y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=4)
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('MSE train: %.3f, test: %.3f' % (
round(mean_squared_error(y_train, y_train_pred),2),
round(mean_squared_error(y_test, y_test_pred),2)
))
print('R^2 train: %.3f, test: %.3f' % (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))
# define one new data instance
index = 0
count = 0
while count < len(test_data):
team = test_data.loc[index].at['Team']
OBS = test_data.loc[index].at['OBS']
Xnew = [[ OBS ]]
# make a prediction
ynew = model.predict(Xnew)
# show the inputs and predicted outputs
results.append(
{
'Team': team,
'Runs': (round(ynew[0],2))
})
index += 1
count += 1
sorted_results = sorted(results, key=lambda k: k['Runs'], reverse=True)
df = pd.DataFrame(sorted_results, columns=[
'Team', 'Runs'])
writer = pd.ExcelWriter('/Users/aus10/Desktop/MLB_Data/ML/Results/Projected_Runs_XGBoost.xlsx', engine='xlsxwriter') # pylint: disable=abstract-class-instantiated
df.to_excel(writer, sheet_name='Sheet1', index=False)
df.style.set_properties(**{'text-align': 'center'})
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.width', 1000)
writer.save()
and the error I'm getting is TypeError: Input data can not be a list.
The data coming from test_data is a csv with a team name and obs which is a float
like this NYY 0.324
Every way to solve it I've seen is just to put it in a 2d array like I did - Xnew = [[ OBS ]],
but I'm still getting the error.
Is there something else I need to do to the test_data coming in? I tried using values.reshape, but that didn't fix it either.
You need to transform your Xnew:
Xnew = np.array(Xnew).reshape((1,-1))
I simulated successfully my classification function to predict the single value of output binary by ANN utilizing pandas and sklearn libraries. Now I want to simulate my model to predict another feature which is not binary, as the input columns are (0,1,4,6,7,8,11,12,13,14) and the output column is (15) of my data set. A typical example of the input data is [4096,0.07324,1.7,20,5.2,64,0.142,0.5,35,30,584.232] as some values are float. How can I predict 584.232 by the first ten numbers utilizing logistic regression?
thank you all.
dataset = pd.read_csv("DataSet.csv")
X = dataset.iloc[:, [0,1,4,6,7,8,11,12,13,14]].values
y = dataset.iloc[:, 15].values
for avoiding type error, I converted the input values into float using the following way:
dataset['ColumnsName'] = dataset['ColumnsName'].astype(float)
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
labelEncoder_X_delay_1 = LabelEncoder()
X[:, 1] = labelEncoder_X_1.fit_transform(X[:, 1])
labelEncoder_X_delay_2 = LabelEncoder()
X[:, 2] = labelEncoder_X_2.fit_transform(X[:, 2])
# normalizing the input
X = X.T
X = X / np.amax(X, axis=1)
X = X.T
# splitting the dataset into the training set and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =
train_test_split(X, y, test_size = 0.2, random_state = 0)
# feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# fitting logestic regression to the training set
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
but after compiling the code up to now, it gives the error:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
Traceback (most recent call last):
File "<ipython-input-5-f18c8875152f>", line 3, in <module>
classifier.fit(X_train, y_train)
File "C:\Users\ali\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1528, in fit
check_classification_targets(y)
File "C:\Users\ali\anaconda3\lib\site-packages\sklearn\utils\multiclass.py", line 169, in check_classification_targets
raise ValueError("Unknown label type: %r" % y_type)
ValueError: Unknown label type: 'continuous'
I have already converted the predefined columns from string to float!
dataset = pd.read_csv("DataSet.csv")
X = dataset.iloc[:, [0,1,4,6,7,8,11,12,13,14]].values
y = dataset.iloc[:, 15].values
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
labelEncoder_X_delay_1 = LabelEncoder()
X[:, 1] = labelEncoder_X_1.fit_transform(X[:, 1])
labelEncoder_X_delay_2 = LabelEncoder()
X[:, 2] = labelEncoder_X_2.fit_transform(X[:, 2])
# normalizing the input
X = X.T
X = X / np.amax(X, axis=1)
X = X.T
# splitting the dataset into the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
# Activation Function
model = Sequential()
model.add(Dense(6, input_dim=9, activation= "relu"))
model.add(Dense(6, activation= "relu"))
model.add(Dense(6, activation= "relu"))
model.add(Dense(1))
# splitting the dataset into the training set and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =
train_test_split(X, y, test_size = 0.2, random_state = 0)
# feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# fitting logestic regression to the training set
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)
I have created a very simple Artificial Neural Network in Python. In the example below I get an accuracy based off of values in a confusion matrix. These are results of the confusion matrix:
array([[3990, 2],
[ 56, 172]])
I am interested in finding the rows where it was marked as false positive(2) and false negative(56).
The following is my code:
#Import the dataset
X = DBF2.iloc[:, 1:2].values
y = DBF2.iloc[:, 2].values
#Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
X[:, 0] = labelencoder_X_1.fit_transform(X[:, 0])
#Create dummy variables
onehotencoder = OneHotEncoder(categorical_features = [0])
X = onehotencoder.fit_transform(X).toarray()
#Remove 2 variables to avoid falling into the dummy variable trap
X = np.delete(X, [0], axis=1)
#Splitting the dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2,
random_state = 0)
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
#Make the ANN
import keras
from keras.models import Sequential
from keras.layers import Dense
#Initialising the ANN
classifier = Sequential()
#Adding the input layer and the first hidden layer
classifier.add(Dense(units=200, kernel_initializer='uniform', activation='relu', input_dim=400))
#Adding a second hidden layer
classifier.add(Dense(units=200, kernel_initializer='uniform',
activation='relu'))
#Adding the output layer
classifier.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid'))
#Compiling the ANN
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
#Fitting the ANN to the training set
classifier.fit(X_train, y_train, batch_size=10, epochs=20)
#Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
#Making the confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
#Find accuracy of test set
TruePos = cm[0,0]
FalsePos = cm[0,1]
TrueNeg = cm[1,1]
FalseNeg = cm[1,0]
accuracy = float(TruePos + TrueNeg) / float(TruePos + FalsePos + TrueNeg + FalseNeg)
accuracy = accuracy*100
print "Test Accuracy: " ,accuracy
In order to do that, you can use a mask on ypred and ytest:
X_test[(y_test == 1) & (y_pred[:,0].T == 0)]
X_test[(y_test == 0) & (y_pred[:,0].T == 1)]
Or if you don't care about separating FN from FP:
X_test[(y_test != y_pred[:,0].T).T]
A longer but smoother option would be to feed the actual and predicted values into 2 separate lists and generate miss-classifications.
Index the miss-classifications and then use the "loc" function to extract that certain row!
Mark all row:
def mark_ravel_data(y_true:np.array, predicted:np.array)-> np.array:
#np.hstack(y_true, predicted)
#numba.jit
def mark_ravel(y_true, predicted):
#print (rg, ' val:', y_true, '>>> pred:', predicted,)
return y_true | predicted << 1
tp = mark_ravel(True , True )
tn = mark_ravel(False , True )
fp = mark_ravel(True , False )
fn = mark_ravel(False , False )
print (f'tp:{tp} tn:{tn} fp:{fp} fn:{fn}')
v_mark_ravel = np.vectorize(mark_ravel, otypes=[int])
return v_mark_ravel(y_true=y_true, predicted=predicted)
print ('tp, tn, fp, fn, tp,')
mark_ravel_data(np.array([1,0,1,0, 1]), np.array([1,1,0,0,1]))
output:
tp, tn, fp, fn, tp,
tp:3 tn:2 fp:1 fn:0
array([3, 2, 1, 0, 3])
This is the code I used for fault detection. I need to design a CNN for fault detection with non-image dataset and I am unable to do so. Do I need to shape my input into 4D? I am getting the above error. Actually I have different Training and Testing samples. As Training, I have 480*52 and as Testing 960*52, So if I use them both, I am having another error saying target destination has different dimension.
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from keras.models import Sequential,Input,Model
from keras.layers import Dense,Dropout,Flatten
from keras.layers import Conv1D,MaxPooling1D
from keras.layers import LeakyReLU
# importing Dataset
from lib_read import read_data
folderk = 'TE_process/'
train, test = read_data( folderk )
#training datasets
#i have 10 files of datasets each of rows a480 and columns 52
#53 column i have assigned labels manually for training datasets
X_train = train.iloc[:,:-1].values
y_train = train.iloc[:,52].values
#testing datasets
#i have 10 files for testing easch of rows 960 and columns 52.
#53 column i have assigned label using the code from lib_read
#X_test = test.iloc[:,:-1].values
#y_test = test.iloc[:,52].values
# for 4_faults dataset: f1_small, f8_medium, f13_incipient, f17_big + no-faults
#Encoding the trianing datsets
labelencoder_X= LabelEncoder()
#Train
X_train[:,0] = labelencoder_X.fit_transform(X_train[:,0])
X_train[:,1] = labelencoder_X.fit_transform(X_train[:,1])
X_train[:,2] = labelencoder_X.fit_transform(X_train[:,2])
X_train[:,3] = labelencoder_X.fit_transform(X_train[:,3])
X_train[:,4] = labelencoder_X.fit_transform(X_train[:,4])
X_train[:,5] = labelencoder_X.fit_transform(X_train[:,5])
X_train[:,6] = labelencoder_X.fit_transform(X_train[:,6])
X_train[:,7] = labelencoder_X.fit_transform(X_train[:,7])
X_train[:,8] = labelencoder_X.fit_transform(X_train[:,8])
X_train[:,9] = labelencoder_X.fit_transform(X_train[:,9])
X_train[:,10] = labelencoder_X.fit_transform(X_train[:,10])
X_train[:,11] = labelencoder_X.fit_transform(X_train[:,11])
X_train[:,12] = labelencoder_X.fit_transform(X_train[:,12])
X_train[:,13] = labelencoder_X.fit_transform(X_train[:,13])
X_train[:,14] = labelencoder_X.fit_transform(X_train[:,14])
X_train[:,15] = labelencoder_X.fit_transform(X_train[:,15])
X_train[:,16] = labelencoder_X.fit_transform(X_train[:,16])
X_train[:,17] = labelencoder_X.fit_transform(X_train[:,17])
X_train[:,18] = labelencoder_X.fit_transform(X_train[:,18])
X_train[:,19] = labelencoder_X.fit_transform(X_train[:,19])
X_train[:,20] = labelencoder_X.fit_transform(X_train[:,20])
X_train[:,21] = labelencoder_X.fit_transform(X_train[:,21])
X_train[:,22] = labelencoder_X.fit_transform(X_train[:,22])
X_train[:,23] = labelencoder_X.fit_transform(X_train[:,23])
X_train[:,24] = labelencoder_X.fit_transform(X_train[:,24])
X_train[:,25] = labelencoder_X.fit_transform(X_train[:,25])
X_train[:,26] = labelencoder_X.fit_transform(X_train[:,26])
X_train[:,27] = labelencoder_X.fit_transform(X_train[:,27])
X_train[:,28] = labelencoder_X.fit_transform(X_train[:,28])
X_train[:,29] = labelencoder_X.fit_transform(X_train[:,29])
X_train[:,30] = labelencoder_X.fit_transform(X_train[:,30])
X_train[:,31] = labelencoder_X.fit_transform(X_train[:,31])
X_train[:,32] = labelencoder_X.fit_transform(X_train[:,32])
X_train[:,33] = labelencoder_X.fit_transform(X_train[:,33])
X_train[:,34] = labelencoder_X.fit_transform(X_train[:,34])
X_train[:,35] = labelencoder_X.fit_transform(X_train[:,35])
X_train[:,36] = labelencoder_X.fit_transform(X_train[:,36])
X_train[:,37] = labelencoder_X.fit_transform(X_train[:,37])
X_train[:,38] = labelencoder_X.fit_transform(X_train[:,38])
X_train[:,39] = labelencoder_X.fit_transform(X_train[:,39])
X_train[:,40] = labelencoder_X.fit_transform(X_train[:,40])
X_train[:,41] = labelencoder_X.fit_transform(X_train[:,41])
X_train[:,42] = labelencoder_X.fit_transform(X_train[:,42])
X_train[:,43] = labelencoder_X.fit_transform(X_train[:,43])
X_train[:,44] = labelencoder_X.fit_transform(X_train[:,44])
X_train[:,45] = labelencoder_X.fit_transform(X_train[:,45])
X_train[:,46] = labelencoder_X.fit_transform(X_train[:,46])
X_train[:,47] = labelencoder_X.fit_transform(X_train[:,47])
X_train[:,48] = labelencoder_X.fit_transform(X_train[:,48])
X_train[:,49] = labelencoder_X.fit_transform(X_train[:,49])
X_train[:,50] = labelencoder_X.fit_transform(X_train[:,50])
X_train[:,51] = labelencoder_X.fit_transform(X_train[:,51])
labelencoder_yt = LabelEncoder()
y_train = labelencoder_yt.fit_transform(y_train)
yt_encoded = OneHotEncoder(categorical_features=[0])
y_train = yt_encoded.fit_transform(y_train.reshape(-1,1)).toarray()
#Spliting the datasets
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size= 0.2, random_state=0)
#num_train,height,width,depth=X_train.shape
#num_test=X_test.shape[0]
#Standardize
ss_X=StandardScaler()
X_train = ss_X.fit_transform(X_train)
X_test = ss_X.transform(X_test)
#tryning to rshape the datasets from 2D to 4D
import numpy as np
X_train=np.array(X_train)
X_train=X_train.reshape(3856,52)
#X_train = X_train.reshape(X_train.shape[0], 1, 20, 52)
#X_test = X_test.reshape(X_test.shape[0], 1, 480, 52)
#X_train = X_train.astype('float32')
#X_test = X_test.astype('float32')
#initializing CNN
fault_classifier = Sequential()
# Adding the input layer
fault_classifier.add(Conv1D(64, kernel_size=(3), activation="relu",input_shape=(3856,52)))
fault_classifier.add(LeakyReLU(0.1))
fault_classifier.add(Conv1D(64, kernel_size=(3), activation="relu"))
fault_classifier.add(LeakyReLU(0.1))
fault_classifier.add(MaxPooling1D((2)))
#fault_classifier.add(Conv2D(128, kernel_size=(3,3), activation="relu",input_shape=(50,20,1)))
#fault_classifier.add(LeakyReLU(0.1))
#fault_classifier.add(MaxPooling2D((2,1)))
#fully connected layer
fault_classifier.add(Flatten())
fault_classifier.add(Dense(300, activation="relu"))
fault_classifier.add(LeakyReLU(0.1))
fault_classifier.add(Dense(10, activation='softmax'))
# sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
fault_classifier.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['accuracy'])
#Fit
history = fault_classifier.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=10)
# Predicting the Test set results
y_pred = fault_classifier.predict(X_test)
y_pred = (y_pred > 0.5)
pred_acc = accuracy_score(y_test, y_pred)