I am new to machine learning and created a neural network for regression output. I have ~95000 training examples and ~24000 test examples. I want to know how can I evaluate my model and get train and test errors? How to know the accuracy of this regression model? My Y variable values ranges between 100-200 and X have 9 input features in the dataset.
Here is my code:
import pandas as pd
from keras.layers import Dense, Activation,Dropout
from keras.models import Sequential
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib import pyplot
# Importing the dataset
# Importing the dataset
dataset = pd.read_csv('data2csv.csv')
X = dataset.iloc[:,1:10].values
y = dataset.iloc[:, :1].values
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2, random_state = 0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Initialising the ANN
model = Sequential()
# Adding the input layer and the first hidden layer
model.add(Dense(10, activation = 'relu', input_dim = 9))
# Adding the second hidden layer
model.add(Dense(units = 5, activation = 'sigmoid'))
model.add(Dropout(0.2))
# Adding the third hidden layer
model.add(Dense(units = 5, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(units = 5, activation = 'relu'))
model.add(Dense(units = 5, activation = 'relu'))
# Adding the output layer
model.add(Dense(units = 1))
#model.add(Dense(1))
# Compiling the ANN
model.compile(optimizer = 'adam', loss = 'mean_squared_error',metrics=['mae','mse','mape','cosine'])
# Fitting the ANN to the Training set
history=model.fit(X_train, y_train,validation_data=(X_val, y_val) ,batch_size = 1000, epochs = 100)
test_loss = model.evaluate(X_test,y_test)
loss = history.history['loss']
acc = history.history['mean_absolute_error']
val_loss = history.history['val_loss']
val_acc = history.history['val_mean_absolute_error']
mape_loss=history.history['mean_absolute_percentage_error']
cosine_los=history.history['cosine_proximity']
pyplot.plot(history.history['mean_squared_error'])
pyplot.plot(history.history['mean_absolute_error'])
pyplot.plot(history.history['mean_absolute_percentage_error'])
pyplot.plot(history.history['cosine_proximity'])
pyplot.show()
epochs = range(1, len(loss)+1)
plt.plot(epochs, loss, 'ro', label='Training loss')
plt.legend()
plt.show()
y_pred = model.predict(X_test)
plt.plot(y_test, color = 'red', label = 'Real data')
plt.plot(y_pred, color = 'blue', label = 'Predicted data')
plt.title('Prediction')
plt.legend()
plt.show()
[]
My test loss after model.evaluate. Note that here there are 5 loss functions as shown in the code.
1) 84.69654303799824
2) 7.030169963975834
3) 84.69654303799824
4) 5.241855282313331
5) -0.9999999996023872
To evaluate your model you can use evaluate method:
test_loss = model.evaluate(X_test, y_test)
It returns the loss on the given test data computed using the same loss function you used during training (i.e. mean_squared_error).
Further, If you want to get training loss at the end of each epoch you can use History object which is returned by fit method:
history = model.fit(...)
loss = history.history['loss']
The loss is a list containing the loss values of training at the end of each epoch. If you have used validation data when training the model (i.e. model.fit(..., validation_data=(X_val, y_val)) or have used any other metric like mean_absolute_error (i.e. model.compile(..., metrics=['mae'])), you can also access their values:
acc = history.history['mae']
val_loss = history.history['val_loss']
val_acc = history.history['val_mae']
Bonus: To plot the training loss curve:
epochs = range(1, len(loss)+1)
plt.plot(epochs, loss, 'ro', label='Training loss')
plt.legend()
plt.show()
To show validation loss while training:
model.fit(X_train, y_train, batch_size = 1000, epochs = 100, validation_data = (y_train,y_test))
I don't think you can easily get accuracy by plotting, since your input is 9 dimensional, you could plot the predicted y for each feature, just turn off the lines that join the dots i.e. plt.plot(x,y,'k.') note 'k' so no line, but I'm not sure if that will be useful.
Related
Every time I change the dataset, it gives a different accuracy. Sometimes it gives 97%, 50%, and 92%. It is a text classification. Why does this happen? The other 95% comes from 2 datasets that are the same size and give almost the same result.
#Split DatA
X_train, X_test, label_train, label_test = train_test_split(X, Y, test_size=0.2,random_state=42)
#Size of train and test data:
print("Training:", len(X_train), len(label_train))
print("Testing: ", len(X_test), len(label_test))
#Function defined to test the models in the test set
def test_model(model, epoch_stop):
model.fit(X_test
, Y_test
, epochs=epoch_stop
, batch_size=batch_size
, verbose=0)
results = model.evaluate(X_test, Y_test)
return results
#############3
maxlen = 300
#Bidirectional LSTM model
embedding_dim = 100
dropout = 0.5
opt = 'adam'
####################
#embed_dim = 128 #dimension of the word embedding vector for each word in a sequence
lstm_out = 196 #no of lstm layers
lstm_model = Sequential()
#Adding dropout
#lstm_model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))##############################
lstm_model = Sequential()
lstm_model.add(layers.Embedding(input_dim=num_words,
output_dim=embedding_dim,
input_length=X_train.shape[1]))
#lstm_model.add(Bidirectional(LSTM(lstm_out, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)))
#lstm_model.add(Bidirectional(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2)))
#lstm_model.add(Bidirectional(LSTM(64, return_sequences=True)))
lstm_model.add(Bidirectional(LSTM(64, return_sequences=True)))
lstm_model.add(layers.GlobalMaxPool1D())
#Adding a regularized dense layer
lstm_model.add(layers.Dense(32,kernel_regularizer=regularizers.l2(0.001),activation='relu'))
lstm_model.add(layers.Dropout(0.25))
lstm_model.add(Dense(3,activation='softmax'))
lstm_model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(lstm_model.summary())
#TRANING
history = lstm_model.fit(X_train, label_train,
epochs=4,
verbose=True,**strong text**
validation_data=(X_test, label_test),
batch_size=64)
loss, accuracy = lstm_model.evaluate(X_train, label_train, verbose=True)
print("Training Accuracy: {:.4f}".format(accuracy))
loss_val, accuracy_val = lstm_model.evaluate(X_test, label_test, verbose=True)
print("Testing Accuracy: {:.4f}".format(accuracy_val))
ML models will base their predictions on the data previously trained on, it is only natural that the outcome will differ in case the training data is changed. Also it might be the case that a different dataset may perform better using different hyperparameters.
I would like to integrate a custom loss function for my LSTM in python. The code shows my approach so far.
How would I best implement the loss function shown in the images? How would I handle the constraint <0?
Thanks for any help!
Code
# Importing the libraries
ep=25 #Epochs
bs=32 #Batch-Size
vs=0.2 #Validation-Split
r=ep+1 #Range
# Importing the training set
dataset_train = pd.read_csv(r'C:\Users\Name\Desktop\Recurrent Neural Networks\JPM_train.csv',delimiter =';')
training_set = dataset_train.iloc[:, 1:2].values
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)
# Creating a data structure with 60 timesteps and 1 output
X_train = []
y_train = []
X_val=[]
y_val=[]
for i in range(60, 1516):
X_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, 0])
X_train, y_train, X_val, y_val = np.array(X_train), np.array(y_train), np.array(X_val), np.array(y_val)
# Reshaping
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
def custom_loss(y_true, y_pred):
if(#HERE):
loss=(predicted_stock_price-real_stock_price)^2
else:
loss=0
return loss
# Initialising the RNN
model = Sequential()
# Adding the first LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1)))
model.add(Dropout(0.2))
# Adding a second LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
# Adding a third LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50, return_sequences = True))
model.add(Dropout(0.2))
# Adding a fourth LSTM layer and some Dropout regularisation
model.add(LSTM(units = 50))
model.add(Dropout(0.2))
# Adding the output layer
model.add(Dense(units = 1))
# Compiling the RNN
model.compile(optimizer = 'adam', loss = custom_loss ,metrics=['accuracy'])
# Fitting the RNN to the Training set
history=model.fit(X_train, y_train, epochs = ep, batch_size = bs, validation_split=vs)
# Getting the real stock price of 2017
dataset_test = pd.read_csv(r'C:\Users\Name\Desktop\Recurrent Neural Networks\JPM_test.csv',delimiter =';')
real_stock_price = dataset_test.iloc[:, 1:2].values
dataset_total = pd.concat((dataset_train['Preis'], dataset_test['Preis']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
for i in range(60, 80):
X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = model.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
history_dict = history.history
print(history_dict.keys())
accuracy = history_dict['accuracy']
validation_accuracy = history_dict['val_accuracy']
loss = history_dict['loss']
validation_loss = history_dict['val_loss']
gs = gridspec.GridSpec(2, 2)
#plt.tight_layout()
#plt.subplots_adjust(hspace=1.0)
fig = plt.figure(figsize=(16,16))
# Visualising the results
ax = plt.subplot(gs[1, :]) # row 1, span all columns
plt.plot(real_stock_price, color = 'red', label = 'Real Google Stock Price')
plt.plot(predicted_stock_price, color = 'blue', label = 'Predicted Google Stock Price')
plt.title('Google Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()
Only the Custom loss function
def custom_loss(y_true, y_pred):
if(#HERE):
loss=(predicted_stock_price-real_stock_price)^2
else:
loss=0
return loss
Pictures of the targeted loss function
Here is the link to the original text:
https://www.researchgate.net/publication/342094242_Deep_Stock_Predictions
You can use this loss function that calculates the current prediction (t1) minus the previous real_stock_price (t-1) :
def custom_loss(y_true, y_pred):
if((y_true[0]-y_true[1])*(y_pred-y_true[1])):
loss=(y_pred -y_true[0] )^2
else:
loss=0
return loss
I think that the derivatives in the backpropagation will not be affected by this shifting of time.
I want to make predictions on the entire test set, here the test set is only 20% of datasetA, I understand that this is because its only for training purposes, when I save the weights and then make predictions on another datasetB, will it also split the test-set datasetB.
How can I make predictions on the entire test-set datasetB using the weights of datasetA that it was trained on.
Thanks.
x = dataset.iloc[:, :-1].values
# Dependent Variable:
y = dataset.iloc[:, -1].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Initialising the ANN
classifier = Sequential()
# Adding the input layer and the first hidden layer
classifier.add(Dense(units = 27, kernel_initializer = 'uniform', activation = 'relu', input_dim = 6))
# Adding the second hidden layer
classifier.add(Dense(units = 27, kernel_initializer = 'uniform', activation = 'relu'))
# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 10, epochs = 20)
#making predictions on test data
classifier.predict(X_test)
If I am understanding correctly, you want to use your trained model on a completely new dataset?
Keras provides several ways to do this, but I think the most common one would be to export your trained model into a .hd5 file using the command
model.save("filepath/model.hd5")
Now you can load in and use your model to wherever you want using the commands
model = model.load("filepath/model.hd5")
score = model.evaluate(X, Y)
where X is the feature columns of Dataset B and Y is the response to get your scoring. If dataset B is in the same instance, you can always just use
model.predict(X)
Where X is now the feature columns of dataset B
From what I understand you are asking 2 questions here:
First, the splitting of "dataset B" into a train and test set is done manually by you in the line
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0).
If, when you use your "dataset B", you want to test your classifier on ALL the data points of "dataset B", you do not have to do this train test split, and can simply pass the X values of "dataset B" to your classifier.
As for how to do this, as per your second question, it is the same as what you have already done with "dataset A"'s test set:
classifier.predict(X) will make predictions using the fit it already learned on "dataset A", assuming you do not recompile or call .fit() again.
here is my code
def create_dataset(signal_data, look_back=1):
dataX, dataY = [], []
for i in range(len(signal_data) - look_back):
dataX.append(signal_data[i:(i + look_back), 0])
dataY.append(signal_data[i + look_back, 0])
return np.array(dataX), np.array(dataY)
df = pd.read_csv('time_series.csv')
signal_data = df.Close.values.astype('float32')
signal_data = signal_data.reshape(len(df), 1)
scaler = MinMaxScaler(feature_range=(0, 1))
signal_data = scaler.fit_transform(signal_data)
train_size = int(len(signal_data) * 0.80)
test_size = len(signal_data) - train_size)
# val_size = len(signal_data) - train_size - test_size
train = signal_data[0:train_size]
# val = signal_data[train_size:train_size+val_size]
test = signal_data[train_size+val_size:len(signal_data)]
x_train, y_train = create_dataset(train, look_back)
# x_val, y_val = create_dataset(val, look_back)
x_test, y_test = create_dataset(test, look_back)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# x_val = np.reshape(x_val, (x_val.shape[0], x_val.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
now I want to add df.Open and df.High and df.Low and df.Volume
how can I implement this code?
Should I just add to the signal data? I'm wondering how to add data so that I can train multiple features in the signal data.
I don't know where and how to implement it. I need your help.
Your valuable opinions and thoughts will be very much appreciated.
I made several modifications to your code. This should work. In summary:
I got fixed the lines of code where you were barcoding the selection of the variable 0. Now, the target variable stands on the last position and the others in the previous ones
I fixed the reshapes some of them were not needed and the others were fixed to keep all the dimensions
I fixed the model input shape, now you have 5 variables instead of 1
My general recommendations:
I would not use MinMaxScaler, it is dangerous because a single outlier can disturb all your distribution. Instead, use StandardScaler. More info here: http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
I would scale the data afterwards, when the train_x, test_x and their y respective counterparts are built. The reason why is because you are computing the statistics for scaling the data using the train and test set, i.e. future information. This is by all the means different to what you'll find when you try to run your code in a real situation. I.e. you'll have to scale the new data with past statistics. It is better to build a test set as close to the reality as possible.
How do you know that your model is big enough to model your data? I would get rid of the dropouts and run the model to see if it can overfit the data. If the model can overfit to the train data, it means that the model is big enough and you can start regularising your model to enhance generalisation. More info in this book: https://www.deeplearning.ai/machine-learning-yearning/
In the model metrics you choose accuracy, which is a classification metric. I would use one according to my type of problem (regression): for example "Mean Absolute Error".
I hope I managed to help you :D
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, Conv2D, Reshape, TimeDistributed, Flatten, Conv1D,ConvLSTM2D, MaxPooling1D
from keras.layers.core import Dense, Activation, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
import matplotlib.pyplot as plt
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)
def create_dataset(signal_data, look_back=1):
dataX, dataY = [], []
for i in range(len(signal_data) - look_back):
dataX.append(signal_data[i:(i + look_back), :])
dataY.append(signal_data[i + look_back, -1])
return np.array(dataX), np.array(dataY)
look_back = 20
df = pd.read_csv('kospi.csv')
signal_data = df[["Open", "Low", "High", "Volume", "Close"]].values.astype('float32')
scaler = MinMaxScaler(feature_range=(0, 1))
signal_data = scaler.fit_transform(signal_data)
train_size = int(len(signal_data) * 0.80)
test_size = len(signal_data) - train_size - int(len(signal_data) * 0.05)
val_size = len(signal_data) - train_size - test_size
train = signal_data[0:train_size]
val = signal_data[train_size:train_size+val_size]
test = signal_data[train_size+val_size:len(signal_data)]
x_train, y_train = create_dataset(train, look_back)
x_val, y_val = create_dataset(val, look_back)
x_test, y_test = create_dataset(test, look_back)
model = Sequential()
model.add(LSTM(128, input_shape=(None, 5),return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(128, input_shape=(None, 5)))
model.add(Dropout(0.3))
model.add(Dense(128))
model.add(Dropout(0.3))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
model.summary()
hist = model.fit(x_train, y_train, epochs=20, batch_size=32, verbose=2, validation_data=(x_val, y_val))
trainScore = model.evaluate(x_train, y_train, verbose=0)
model.reset_states()
print('Train Score: ', trainScore)
valScore = model.evaluate(x_val, y_val, verbose=0)
model.reset_states()
print('Validataion Score: ', valScore)
testScore = model.evaluate(x_test, y_test, verbose=0)
model.reset_states()
print('Test Score: ', testScore)
p = model.predict(x_test)
print(mean_squared_error(y_test, p))
import matplotlib.pyplot as pplt
pplt.plot(y_test)
pplt.plot(p)
pplt.legend(['testY', 'p'], loc='upper right')
pplt.show()
I have built a regression model using ANN relating 8 input parameters and 1 output parameter.
code
X = data.iloc[:,:-1]
y = data.iloc[:,8:9]*100
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train_us, X_test_us, y_train_us, y_test_us = train_test_split(X, y, test_size = 0.2, random_state = 0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_Y = StandardScaler()
X_train = sc_X.fit_transform(X_train_us)
X_test = sc_X.transform(X_test_us)
y_train = sc_Y.fit_transform(y_train_us)
y_test = sc_Y.transform(y_test_us)
# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
def base_model():
# Initialising the ANN
regressor = Sequential()
# Adding the input layer and the first hidden layer
regressor.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 8))
# Adding the second hidden layer
regressor.add(Dense(units = 4, kernel_initializer = 'uniform', activation = 'relu'))
# Adding the output layer
regressor.add(Dense(units = 1, kernel_initializer = 'uniform'))
# Compiling the ANN
regressor.compile(optimizer = 'adam', loss = 'mse', metrics = ['mae'])
return regressor
# Fitting the ANN to the Training set
regressor = KerasRegressor(build_fn=base_model, epochs=500, batch_size=32)
regressor.fit(X_train,y_train)
# Predicting the Test & Train set with regressor built
y_pred = regressor.predict(X_test)
y_pred = sc_Y.inverse_transform(y_pred)
y_test = sc_Y.inverse_transform(y_test)
#calculate r2_score
from sklearn.metrics import r2_score
score_test = r2_score(y_test,y_pred)
I get an r2_score of 98%.Unit of my output variable is currently metres. If I multiply it by 100 and change it to centi-meters and train the model and calculate the r2_score it is 91%.
Why is my r2_score changing with the unit of the dependent variable. Shouldn't scaling take care of this?
Thanks!!