I use LSTM model in Keras to predict time series data. I use MinMaxScaler to normalize data and create model like this code.
sc = MinMaxScaler()
train_sc = sc.fit_transform(train)
test_sc = sc.transform(test)
X_train = train_sc[:-1]
y_train = train_sc[1:]
X_test = test_sc[:-1]
y_test = test_sc[1:]
X_train_t = X_train[:, None]
X_test_t = X_test[:, None]
K.clear_session()
model = Sequential()
model.add(LSTM(12, input_shape=(1, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train_t, y_train,epochs=200, batch_size=1, verbose=1)
y_pred = model.predict(X_test_t)
real_pred = sc.inverse_transform(y_pred)
real_test = sc.inverse_transform(y_test)
The output show like this image. when put test value in model.predict() then it show predict value. So, I think predict value should compare to the next test value like this table.
The picture show that the predicted value is not match to the actual value. It look like output copy value from input to show. How to fix it?
Related
Hi I want to use ResNet for Text data. I tried to look some code example lot of other data at the end I wrote the following code. But I'm not sure it's the correct way for ResNet or not.
NOTE::: this part is optional if i recieve an opinion on it. it will be great but I'm going to try it once the above one is corrected. if it is correct way then I want it to implement it in this way ----> ResNet should contain 18 layers in total whereas these layers should be divided into four stages and each stage should consist of two convolutional blocks. Each convolutional block should contain two convolutional layers with batch normalization and ReLU non_linearity in-between. Then, ResNet should pass the output from the convolutional layers to two fully-connected layers that will use the reduced data to classify the initial data to a given website class. Last but not least, you should use Adam optimizer and categorical cross-entropy (typically used for multi-class classification problems). Make sure that you identify and use the optimal hyper-parameters for your ResNet.
import pandas as pd
import os
import numpy as np
from sklearn import metrics
from scipy.stats import zscore
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
class ResNet_class():
def __init__(self):
# Cross-Validate
self.no_of_folds = int(input('enter no of K_fold: '))
self.kf = KFold(self.no_of_folds, shuffle=True, random_state=42) # Use for KFold classification
self.EPOCHS = int(input('enter no of epochs: '))
def check_test(self):
df = pd.read_csv(
"https://data.heatonresearch.com/data/t81-558/jh-simple-dataset.csv",
na_values=['NA','?'])
df = pd.concat([df,pd.get_dummies(df['job'],prefix="job")],axis=1)
df.drop('job', axis=1, inplace=True)
df = pd.concat([df,pd.get_dummies(df['area'],prefix="area")],axis=1)
df.drop('area', axis=1, inplace=True)
df = pd.concat([df,pd.get_dummies(df['product'],prefix="product")],axis=1)
df.drop('product', axis=1, inplace=True)
med = df['income'].median()
df['income'] = df['income'].fillna(med)
df['income'] = zscore(df['income'])
df['aspect'] = zscore(df['aspect'])
df['save_rate'] = zscore(df['save_rate'])
df['subscriptions'] = zscore(df['subscriptions'])
x_columns = df.columns.drop('age').drop('id')
x = df[x_columns].values
y = df['age'].values
oos_y = []
oos_pred = []
fold = 0
for train, test in self.kf.split(x):
fold += 1
print(f"Fold #{fold}")
x_train = x[train]
y_train = y[train]
x_test = x[test]
y_test = y[test]
model = Sequential()
model.add(Dense(20, input_dim=x.shape[1], activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, validation_data=(x_test, y_test), verbose=0,
epochs=self.EPOCHS)
pred = model.predict(x_test)
oos_y.append(y_test)
oos_pred.append(pred)
score = np.sqrt(metrics.mean_squared_error(pred, y_test))
print(f"Fold score (RMSE): {score}")
oos_y = np.concatenate(oos_y)
oos_pred = np.concatenate(oos_pred)
score = np.sqrt(metrics.mean_squared_error(oos_pred, oos_y))
print(f"Final, out of sample score (RMSE): {score}")
oos_y = pd.DataFrame(oos_y)
oos_pred = pd.DataFrame(oos_pred)
oosDF = pd.concat([df, oos_y, oos_pred], axis=1)
resnet = ResNet_class()
resnet.check_test()
I am currently using Keras to provide a sequential model for my data, but am thinking my data is skewed to much because of one of my categories contains 7 values and one of those accounts for 85% of the data. I am thinking I need to standardize the columns by adjusting the weights. Will the prepossessing function from sklean be able to help with this?
Below is the current code I have so far:
# load the dataset as a pandas DataFrame
data = read_csv(filename)
data = pd.DataFrame(data,columns= ['A','B','C'])
datas = data
# retrieve numpy array
dataset = data.values
# split into input (X) and output (y) variables
X = dataset[:, :-1]
y = dataset[:,-1]
# format all fields as string
X = X.astype(str)
# reshape target to be a 2d array
y = y.reshape((len(y), 1))
# load the dataset
def load_dataset(filename):
# load the dataset as a pandas DataFrame
data = read_csv(filename, header=None)
# retrieve numpy array
dataset = data.values
# split into input (X) and output (y) variables
X = dataset[:, :-1]
y = dataset[:,-1]
# format all fields as string
X = X.astype(str)
# reshape target to be a 2d array
y = y.reshape((len(y), 1))
return X, y
# split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
stdscaler = preprocessing.StandardScaler().fit(X_train)
X_scaled = stdscaler.transform(x)
X_train_scaled = stdscaler.transform(X_train)
X_test_scaled = stdscaler.transform(X_test)
print('Train', X_train.shape, y_train.shape)
print('Test', X_test.shape, y_test.shape)
# prepare input data
def prepare_inputs(X_train, X_test):
ohe = OneHotEncoder(handle_unknown='ignore')
ohe.fit(X_train)
X_train_enc = ohe.transform(X_train)
X_test_enc = ohe.transform(X_test)
return X_train_enc, X_test_enc
# prepare target
def prepare_targets(y_train, y_test):
le = LabelEncoder()
le.fit(y_train)
y_train_enc = le.transform(y_train)
y_test_enc = le.transform(y_test)
return y_train_enc, y_test_enc
# prepare input data
X_train_enc, X_test_enc = prepare_inputs(X_train_scaled, X_test_scaled)
# prepare output data
y_train_enc, y_test_enc = prepare_targets(y_train, y_test)
# define the model
model =Sequential()
model.add(Dense(10, input_dim=X_train_enc.shape[1], activation='relu', kernel_initializer='he_normal'))
model.add(Dense(1, activation='sigmoid'))
# compile the keras model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# fit the keras model on the dataset
model.fit(X_train_enc, y_train_enc, epochs=100, batch_size=16, verbose=2)
# evaluate the keras model
_, accuracy = model.evaluate(X_test_enc, y_test_enc, verbose=0)
print('Accuracy: %.2f' % (accuracy*100))
I highly suggest to go through the example in keras documentation. You are facing class imbalanced problem .Check out this url.
You need to use class_weight argument while fitting your model
I have the following classification model built
def build_classifier(input_size):
hl_1_dim = int(input_size / 2)
hl_2_dim = int(hl_1_dim / 2)
classifier = Sequential()
classifier.add(Dense(units = hl_1_dim, kernel_initializer = 'normal', activation='relu', input_dim = input_size))
classifier.add(Dense(units = hl_2_dim, kernel_initializer = 'normal', activation='relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'normal', activation='sigmoid') )
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])
return classifier
that I supply a training set containing 20983 rows and 209 columns. All the values in the data set are normalized to [0,1] range.
To split the data set I use the train_test_split function like so:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, shuffle = True)
I train the model using these hyperparameters:
classifier = build_classifier(len(X_train[0]))
classifier.fit(X_train, Y_train, batch_size = 10, epochs = 50)
Regardless of any hyperparameter tuning I tried, whenever I form my problem as a classification problem, I get 1.0 accuracy both on test and training sets on second epoch out of 50. And loss from binary_crossentropy is always extremely small number in vicinity of 1.2403e-09. However, If I change the model to linear regression, I get more or less adequate loss using MSE as well as cross validation results:
def build_regressor(input_size):
hl_1_dim = int(input_size / 2)
hl_2_dim = int(hl_1_dim / 2)
regressor = Sequential()
regressor.add(Dense(units = hl_1_dim, kernel_initializer = 'normal', activation='relu', input_dim = input_size))
regressor.add(Dense(units = hl_2_dim, kernel_initializer = 'normal', activation='relu'))
regressor.add(Dense(units = 1, kernel_initializer = 'normal') )
#optimizer = keras.optimizers.RMSprop()
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
return regressor
Each row in my data set corresponds to an article from a news website, with various article parameters as columns like doc2vec vector representations of article's text and title, number of images/videos/iframes in the article, article's author, etc. as well as article views. The idea is to predict whether an article is going to be 'popular' based on the amount of views it gets.
To form this problem as a classification problem, I calculate the mathematical average (mean) of all the articles' views and if an article has views higher than the average, then I add value of 1 for a 'Popular' column in my dataset. If an article's view count is lower than the average, I give it a value of 0.
So, the 'X' matrix of my dataset consists of all the upper mentioned values excluding the views count column (obviously, since the ANN would find the correlation between the view count and popularity).
The 'Y' array contains the values for 'Popular' column (1 or 0).
I use sigmoid activation function for the classifier model and binary_crossentropy for loss function.
For regression problem, I eliminate the 'Popular' column, and directly use the view count for values in Y array. I use mean_squared_error for loss function.
Optimizer is set to 'adam' for both regression and classification models.
For regression, I get around 2087 as loss of predicted Y values corresponding to X_test matrix using mean_absolute_error() of sklearn.metrics. Which is more or less what I would expect. But for the classifier, I get 1.0 accuracy and extremely low loss for training set, as well as 1.0 accuracy for the test set.
I triple checked all the columns in the training set to make sure no column contains values that might give out a straight answer to whether the article is 'popular' or not. What might be the issue here?
So, I have this little project going on about predicting the nba 2019 champion but it seems that my code is not clear enough to make keras understand what I want. I have passed a list of past champions on my dataset and made it the output class to get the current champion.
I'm using a dataset for teams stats from 2014 to 2018 regular seasons and I'm assuming that I should have the 2019 stats to do it. I have made my dataset very well encoded for my NN to understand by providing one hot encoding in every feature I think it's useful.
x = pd.concat([df.drop(['Unnamed: 0','Team','Game','Date','Opponent','LastSeasonChamp'], axis = 1), df_ohc], axis = 1)
y = df['LastSeasonChamp']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.35)
x_train = tf.keras.utils.normalize(x_train.values, axis = 1)
x_test = tf.keras.utils.normalize(x_test.values, axis = 1)
n_classes = 30
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(x_train.shape[1], input_shape = (x_train.shape[0],x_train.shape[1]), activation = tf.nn.relu))
model.add(tf.keras.layers.Dense(np.mean([x_train.shape[1], n_classes], dtype = int), activation = tf.nn.relu))
model.add(tf.keras.layers.Dense(n_classes, activation = tf.nn.softmax))
model.compile(optimizer = 'adagrad' , loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train.values, epochs = 3)
model.evaluate(x_test, y_test)
model.save('nba_champ_2019')
new_model = tf.keras.models.load_model('nba_champ_2019')
pred = new_model.predict(x_test)
y_pred = to_categorical(pred)
So, I could expect my y_pred to be a column with 0 and 1 and but all I get is a column full of 1.
The to_categorical function is used to convert a list of class IDs to an one-hot matrix. You don't need it here. You should get the output you expect by removing in this case.
I'm currently undertaking my first 'real' DL project of (surprise) predicting stock movements. I know that I'm 1000:1 to make anything useful but I'm enjoying it and want to see it through, I've learnt more in my few weeks of attempting this than I have in the prior 6 months of completing MOOC's.
I'm building an LSTM using Keras to currently predict the next 1 step forward and have attempted the task as both classification (up/down/steady) and now as a regression problem. Both result in a similar roadblock in that my validation loss never improves from epoch #1.
I can get the model to overfit such that training loss approaches zero with MSE (or 100% accuracy if classification), but at no stage does the validation loss decrease. This screams overfitting to my untrained eye so I added varying amounts of dropout but all that does is stifle the learning of the model/training accuracy and shows no improvements on the validation accuracy.
I have attempted to change a significant number of hyperparameters - learning rate, optimiser, batchsize, lookback window, #layers, #units, dropout, #samples, etc, also tried with subset of data and subset of features but I just can't get it to work so I'm very thankful for any help.
Code Below (it's not pretty I know):
# Import saved full dataframe ~ 200 features
import feather
df = feather.read_dataframe('df_feathered')
df.set_index('time', inplace=True)
# Difference the dataset to make stationary
df = df.diff(periods=1, axis=0)
# MAKE LARGE SAMPLE FOR TESTING
df_train = df.loc['2017-3-1':'2017-6-30']
df_val = df.loc['2017-7-1':'2017-8-31']
df_test = df.loc['2017-9-1':'2017-9-30']
# Make x_train, x_val sets by dropping target variable
x_train = df_train.drop('close+1', axis=1)
x_val = df_val.drop('close+1', axis=1)
# Scale the training data first then fit the transform to the test set
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_val)
# scaler = MinMaxScaler(feature_range=(0,1))
# x_train = scaler.fit_transform(df_train1)
# x_test = scaler.transform(df_val1)
# Create y_train, y_test, simply target variable for regression
y_train = df_train['close+1']
y_test = df_val['close+1']
# Define Lookback window for LSTM input
sliding_window = 15
# Convert x_train, x_test, y_train, y_test into 3d array (samples,
timesteps, features) for LSTM input
dataXtrain = []
for i in range(len(x_train)-sliding_window-1):
a = x_train[i:(i+sliding_window), 0:(x_train.shape[1])]
dataXtrain.append(a)
dataXtest = []
for i in range(len(x_test)-sliding_window-1):
a = x_test[i:(i+sliding_window), 0:(x_test.shape[1])]
dataXtest.append(a)
dataYtrain = []
for i in range(len(y_train)-sliding_window-1):
dataYtrain.append(y_train[i + sliding_window])
dataYtest = []
for i in range(len(y_test)-sliding_window-1):
dataYtest.append(y_test[i + sliding_window])
# Make data the divisible by a variety of batch_sizes for training
# Started at 1000 to not include replaced NaN values
dataXtrain = np.array(dataXtrain[1000:172008])
dataYtrain = np.array(dataYtrain[1000:172008])
dataXtest = np.array(dataXtest[1000:83944])
dataYtest = np.array(dataYtest[1000:83944])
# Checking input shapes
print('dataXtrain size is: {}'.format((dataXtrain).shape))
print('dataXtest size is: {}'.format((dataXtest).shape))
print('dataYtrain size is: {}'.format((dataYtrain).shape))
print('dataYtest size is: {}'.format((dataYtest).shape))
### ACTUAL LSTM MODEL
batch_size = 256
timesteps = dataXtrain.shape[1]
features = dataXtrain.shape[2]
# Model set-up, stacked 4 layer stateful LSTM
model = Sequential()
model.add(LSTM(512, return_sequences=True, stateful=True,
batch_input_shape=(batch_size, timesteps, features)))
model.add(LSTM(256,stateful=True, return_sequences=True))
model.add(LSTM(256,stateful=True, return_sequences=True))
model.add(LSTM(128,stateful=True))
model.add(Dense(1, activation='linear'))
model.summary()
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.9, patience=5, min_lr=0.000001, verbose=1)
def coeff_determination(y_true, y_pred):
from keras import backend as K
SS_res = K.sum(K.square( y_true-y_pred ))
SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
return ( 1 - SS_res/(SS_tot + K.epsilon()) )
model.compile(loss='mse',
optimizer='nadam',
metrics=[coeff_determination,'mse','mae','mape'])
history = model.fit(dataXtrain, dataYtrain,validation_data=(dataXtest, dataYtest),
epochs=100,batch_size=batch_size, shuffle=False, verbose=1, callbacks=[reduce_lr])
score = model.evaluate(dataXtest, dataYtest,batch_size=batch_size, verbose=1)
print(score)
predictions = model.predict(dataXtest, batch_size=batch_size)
print(predictions)
import matplotlib.pyplot as plt
%matplotlib inline
#plt.plot(history.history['mean_squared_error'])
#plt.plot(history.history['val_mean_squared_error'])
plt.plot(history.history['coeff_determination'])
plt.plot(history.history['val_coeff_determination'])
#plt.plot(history.history['mean_absolute_error'])
#plt.plot(history.history['mean_absolute_percentage_error'])
#plt.plot(history.history['val_mean_absolute_percentage_error'])
#plt.title("MSE")
plt.ylabel("R2")
plt.xlabel("epoch")
plt.legend(["train", "val"], loc="best")
plt.show()
plt.plot(history.history["loss"][5:])
plt.plot(history.history["val_loss"][5:])
plt.title("model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train", "val"], loc="best")
plt.show()
plt.figure(figsize=(20,8))
plt.plot(dataYtest)
plt.plot(predictions)
plt.title("Prediction")
plt.ylabel("Price")
plt.xlabel("Time")
plt.legend(["Truth", "Prediction"], loc="best")
plt.show()
Maybe you should remember you are predicting sock returns, which it's very likely to predict nothing. So val_loss increasing is not overfitting at all. Instead of adding more dropouts, maybe you should think about adding more layers to increase it's power.
Try to reduce learning rate much (and remove dropouts for now).
Why do you use
shuffle=False
in fit() function?