I was following a python project to loosely predict the price of stocks when I encountered an index error with an LSTM model. This is the guide I was following and my code roughly matches: Prediction Tutorial. But for ease of access this is my code exactly:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 20, 10
df = pd.read_csv('HistoricalData_Apple.csv')
df = df[['Date', 'Close/Last']]
df = df.replace({'\$':''}, regex=True)
df = df.astype({"Close/Last": float})
df["Date"] = pd.to_datetime(df.Date, format="%m/%d/%Y")
df.index = df['Date']
df = df.sort_index(ascending=True, axis=0)
data = pd.DataFrame(index=range(0, len(df)), columns=['Date', 'Close/Last'])
for i in range(0, len(data)):
data["Date"][i] = df['Date'][i]
data["Close/Last"][i] = df["Close/Last"][i]
scaler = MinMaxScaler(feature_range=(0, 1))
data.index = data.Date
data.drop("Date", axis=1, inplace=True)
final_data = data.values
train_data = final_data[0:200, :]
valid_data = final_data[200:, :]
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(final_data)
x_train_data, y_train_data = [], []
for i in range(60, len(train_data)):
x_train_data.append(scaled_data[i-60:i, 0])
y_train_data.append(scaled_data[i, 0])
lstm_model = Sequential()
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=
(np.shape(x_train_data)[1], 1)))
lstm_model.add(LSTM(units=50))
lstm_model.add(Dense(1))
model_data = data[len(data)-len(valid_data)-60:].values
model_data = model_data.reshape(-1, 1)
model_data = scaler.transform(model_data)
lstm_model.compile(loss='mean_squared_error', optimizer='adam')
lstm_model.fit(x_train_data, y_train_data, epochs=1, batch_size=1, verbose=2)
X_test=[]
for i in range(60, model_data.shape[0]):
X_test.append(model_data[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = lstm_model.predict(X_test)
predicted_stock_price = scaler.inverse_transform(predicted_stock_price)
train_data = data[:200]
valid_data = data[200:]
valid_data['Predictions'] = predicted_stock_price
plt.plot(train_data["Close"])
plt.plot(valid_data[['Close', "Predictions"]])
plt.show()
This code should be working according to the tutorial I was following, but every time I run the code I receive this error message:
2021-11-08 14:57:34.659018: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations: AVX AVX2 To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Traceback (most recent call last): File "C:/Users/ME/AppData/Roaming/JetBrains/PyCharmCE2021.2/scratches/scratch_1.py", line 42, in <module> lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(np.shape(x_train_data)[1], 1))) IndexError: tuple index out of range
I do not know what this means exactly or how to fix it.
The expected input shape of the LSTM layer is (batch, timesteps, features). As you have only one feature you can reshape your training sequences as follows
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
as you already did for the validation sequences.
import pandas as pd
import numpy as np
import yfinance as yf
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
pd.options.mode.chained_assignment = None
# download the data
df = yf.download(tickers=['AAPL'], period='10y')
# split the data
train_data = df[['Close']].iloc[: - 200, :]
valid_data = df[['Close']].iloc[- 200:, :]
# scale the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(train_data)
train_data = scaler.transform(train_data)
valid_data = scaler.transform(valid_data)
# extract the training sequences
x_train, y_train = [], []
for i in range(60, train_data.shape[0]):
x_train.append(train_data[i - 60: i, 0])
y_train.append(train_data[i, 0])
x_train = np.array(x_train)
y_train = np.array(y_train)
# extract the validation sequences
x_valid = []
for i in range(60, valid_data.shape[0]):
x_valid.append(valid_data[i - 60: i, 0])
x_valid = np.array(x_valid)
# reshape the sequences
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
x_valid = x_valid.reshape(x_valid.shape[0], x_valid.shape[1], 1)
# train the model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=x_train.shape[1:]))
model.add(LSTM(units=50))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=50, batch_size=128, verbose=1)
# generate the model predictions
y_pred = model.predict(x_valid)
y_pred = scaler.inverse_transform(y_pred)
y_pred = y_pred.flatten()
# plot the model predictions
df.rename(columns={'Close': 'Actual'}, inplace=True)
df['Predicted'] = np.nan
df['Predicted'].iloc[- y_pred.shape[0]:] = y_pred
df[['Actual', 'Predicted']].plot(title='AAPL')
Related
I have the code below and I want to add a meta-heuristic method (like pso, apso, krill herd , ...) to it to predict better , but I don't know where and how I must add it
is there any one who can help me whit this , its too essential for me
I'm new on python and need help
may thanks to any one who write the code for me or give me advise about how to do it
''''
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as web
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
# Load Data
company = 'ETH-USD'
start = dt.datetime(2012,1,1)
end = dt.datetime(2020,1,1)
data = web.DataReader(company, 'yahoo', start, end)
#print(data.last(5))
#print(data.info())
# Prepare Data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1,1))
prediction_days = 60
x_train = []
y_train = []
for x in range(prediction_days, len(scaled_data)):
x_train.append(scaled_data[x - prediction_days:x, 0])
y_train.append(scaled_data[x, 0])
x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
# Build The Model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1)) # Prediction of the next closing value
model.compile(optimizer='adam', loss= 'mean_squared_error')
model.fit(x_train, y_train, epochs=25, batch_size=32)
''' Test The Model Accuracy on Existing Data '''
# Load Test Data
test_start = dt.datetime(2020,1,1)
test_end = dt.datetime.now()
test_data = web.DataReader(company, 'yahoo', test_start, test_end)
actual_prices = test_data['Close'].values
total_dataset = pd.concat((data['Close'], test_data['Close']), axis=0)
model_inputs = total_dataset[len(total_dataset) - len(test_data) - prediction_days:].values
model_inputs = model_inputs.reshape(-1, 1)
model_inputs = scaler.transform(model_inputs)
# Make Predictions on Test Data
x_test = []
for x in range(prediction_days, len(model_inputs)):
x_test.append(model_inputs[x-prediction_days:x, 0])
x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
predicted_prices = model.predict(x_test)
predicted_prices = scaler.inverse_transform(predicted_prices)
# Plot The Test Predictions
plt.plot(actual_prices, color="black", label=f"Actual {company} Price")
plt.plot(predicted_prices, color='green', label=f"Predicted {company} Price")
plt.title(f"{company} Share Price")
plt.xlabel('Time')
plt.ylabel(f'{company} Share Price')
plt.legend()
plt.show()
# Predict Next Day
real_data = [model_inputs[len(model_inputs) + 1 - prediction_days:len(model_inputs+1), 0]]
real_data = np.array(real_data)
real_data = np.reshape(real_data, (real_data.shape[0], real_data.shape[1],1))
prediction = model.predict(real_data)
prediction = scaler.inverse_transform(prediction)
print(f"Prediction: {prediction}")
''''
I think these errors havev something to do with the format of my data or the way my code is interacting with the data set, but I'm not a developer by any stretch of the imagination so I'm not really sure exactly what is going on.
/Users/kylehammerberg/PycharmProjects/LSTM1P/matplottest.py:54: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray
X_test = np.array(X_test)
Traceback (most recent call last):
File "/Users/kylehammerberg/PycharmProjects/LSTM1P/matplottest.py", line 55, in
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
IndexError: tuple index out of range
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import keras
url = 'https://raw.githubusercontent.com/khammerberg53/MLPROJ1/main/SP500.csv'
dataset_train = pd.read_csv(url)
training_set = dataset_train.iloc[:, 1:2].values
dataset_train.head()
print(dataset_train.head())
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
training_set_scaled = sc.fit_transform(training_set)
X_train = []
y_train = []
for i in range(60, 2000):
X_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Dense
model = Sequential()
model.add(LSTM(units=50,return_sequences=True,input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=1))
model.compile(optimizer='adam',loss='mean_squared_error')
model.fit(X_train,y_train,epochs=100,batch_size=32)
url = 'https://raw.githubusercontent.com/khammerberg53/MLPROJ1/main/SP500%20test%20setcsv.csv'
dataset_test = pd.read_csv(url)
real_stock_price = dataset_test.iloc[:, 1:2].values
dataset_total = pd.concat((dataset_train['Value'], dataset_test['Value']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
for i in range(3, 100):
X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = model.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
plt.plot(real_stock_price, color = 'black', label = 'TATA Stock Price')
plt.plot(predicted_stock_price, color = 'green', label = 'Predicted TATA Stock Price')
plt.title('TATA Stock Price Prediction')
plt.xlabel('Time')
plt.ylabel('TATA Stock Price')
plt.legend()
plt.show()
print(plt.show())
You cannot range from 3 to 100 if you define your X_test the way you do. If you change your code to:
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
X_test = []
for i in range(60, 161):
X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
the rest of the code will produce (I only took 2 epochs and might explain that the predictions aren't what you expected):
with 20 epochs, you'd get this:
I have a problem with the prediction. The program is designed to predict stock market prices. Here EUR / USD.
I trained the model and tested it with the test data. The results look good. At this point I can't get any further.
How can I make a prediction?
I tested the program with data from yesterday.
How can I predict today's data?
What should I enter and in what format?
import math
import matplotlib.pyplot as plt
import keras
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping
daten=open("C:")
df = pd.read_csv(daten)
df
training_set = df.iloc[436065:438073, 4:5]
test_set = df.iloc[438074:438170, 4:5]
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)
X_train = []
y_train = []
for i in range(70, 2000):
X_train.append(training_set_scaled[i-70:i, 0])
y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
n=50
dop=0.1
model = Sequential()#1
model.add(LSTM(units = n, return_sequences = True, input_shape = (X_train.shape[1], 1)))
model.add(Dropout(dop))#2
model.add(LSTM(units = n, return_sequences = True))
model.add(Dropout(dop))#3
model.add(LSTM(units = n, return_sequences = True))
model.add(Dropout(dop))#4
model.add(LSTM(units = n, return_sequences = True))
model.add(Dropout(dop))#5
model.add(LSTM(units = n, return_sequences = True))
model.add(Dropout(dop))#6
model.add(LSTM(units = n, return_sequences = True))
model.add(Dropout(dop))#7
model.add(LSTM(units = n))
model.add(Dropout(dop))
model.add(Dense(units = 1))
model.compile(optimizer = 'adam', loss = 'mean_squared_error')
model.fit(X_train, y_train, epochs = 100, batch_size =32)
dataset_total = pd.concat((training_set, test_set), axis = 0)
inputs = dataset_total[len(dataset_total) - len(test_set) - 70:].values
inputs = inputs.reshape(-1,1)
inputs = sc.transform(inputs)
type(inputs.shape)
X_test = []
for i in range(70, 167):
X_test.append(inputs[i-70:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
print(X_test.shape)
predicted_stock_price = model.predict(X_test)
predicted_stock_price = sc.inverse_transform(predicted_stock_price)
predicted_stock_price.shape
plt.plot(df.loc[438074:438170, "Close"])
plt.grid(True)
plt.plot(predicted_stock_price)
plt.grid(True)
I have this piece of code. But when I try to run the prediction value code there's an error
# Creating a data structure with n timesteps
X_test = []
for i in range(5, 25):
X_test.append(inputs[i-5:i, 0])
X_test = np.array(X_test)
# Reshape to a new dimension
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
# Identify the predicted values
predicted_number = regressor.predict(X_test)
# Inverse the scaling to put them back to the normal values
predicted_number = sc.inverse_transform(predicted_number)
the error is like this
AttributeError Traceback (most recent call last)
<ipython-input-364-17fa061596c6> in <module>()
1 # Identify the predicted values
----> 2 predicted_number = regressor.predict(X_test)
3 # Inverse the scaling to put them back to the normal values
4 predicted_number = sc.inverse_transform(predicted_number)
5 KerasRegressor.model
~\Anaconda3\lib\site-packages\keras\wrappers\scikit_learn.py in predict(self, x, **kwargs)
320 """
321 kwargs = self.filter_sk_params(Sequential.predict, kwargs)
--> 322 preds = np.array(self.model.predict(x, **kwargs))
323 if preds.shape[-1] == 1:
324 return np.squeeze(preds, axis=-1)
AttributeError: 'KerasRegressor' object has no attribute 'model'
in case if needed, below is the full script
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
dataset_train = pd.read_csv('Datatraining.csv')
training_set = dataset_train.iloc[:, 1:2].values
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)
X_train = []
y_train = []
for i in range(60, 72):
X_train.append(training_set_scaled[i-60:i, 0])
y_train.append(training_set_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, newshape = (X_train.shape[0], X_train.shape[1], 1))
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
regressor = Sequential()
# Adding the first LSTM layer and some Dropout regularisation
regressor.add(LSTM( units = 50, return_sequences = True, input_shape = (X_train.shape[1], 1) ))
regressor.add(Dropout(0.2))
# Adding the second LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
# Adding the third LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))
# Adding the fourth LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))
regressor.add(Dense(units = 1))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')
regressor.fit(X_train, y_train, epochs = 300, batch_size = 32)
dataset_test = pd.read_csv('Datatesting.csv')
real_number_arrivals = dataset_test.iloc[:, 1:2].values
dataset_total = pd.concat( (downloads['China'], dataset_test['China']), axis = 0 )
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 72:].values
inputs = inputs.reshape(-1, 1)
inputs = sc.transform(inputs)
# Creating a data structure with n timesteps
X_test = []
for i in range(5, 25):
X_test.append(inputs[i-5:i, 0])
X_test = np.array(X_test)
# Reshape to a new dimension
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
# Identify the predicted values
predicted_number = regressor.predict(X_test)
# Inverse the scaling to put them back to the normal values
predicted_number = sc.inverse_transform(predicted_number)
any solution would be really helpful. Thanks in advance
You have forgotten to fit the model first.
You did not share the whole code, but I believe you have some X_train and Y_train somewhere in your code. So try this line:
regressor.fit(X_train, Y_train)
And then you can run the prediction.
I am attempting a CNN for classification of biological data (EEG data). However, after importing and splitting the data into train/test/dev sets and building the CNN, I cannot get the input array shape to match the expected array shape.
Note: Data contains 5 trials/samples for each participant(ID's) in the study, so GSS was used to ensure data from each participant was not mixed train and test sets.
Code and error as follows:
#Load Data
def load_all_data(filename):
import numpy as np
a = np.load(filename)
d = (dict(zip(("data1{}".format(k) for k in a), (a[k] for k in a))))
return d
filename = ("dataname.npz")
X = load_all_data(filename)['array_0']
y = load_all_data(filename)['array_1']
IDs = load_all_data(filename)['array_2']
#Split Test Data with Groupshuffle Split
from sklearn.model_selection import GroupShuffleSplit
import numpy as np
test_size = 0.2
gss = GroupShuffleSplit(n_splits = 1, test_size = 0.2)
for train,test in gss.split(X, y, IDs):
X_train = X[train]
y_train = y[train]
IDs_train = IDs[train]
X_test = X[test]
y_test = y[test]
IDs_test = IDs[test]
fileoutname = 'train_test_data'
np.savez(fileoutname,X_train, y_train, X_test, y_test,IDs_train,IDs_test)
#Split Train, Test Data
gss = GroupShuffleSplit(1, test_size)
for train,test in gss.split(X, y, IDs):
X_train2 = X[train]
y_train2 = y[train]
IDs_train = IDs[train]
X_dev = X[test]
y_dev = y[test]
IDs_test = IDs[test]
#Add dimension to X and Convert y to Categorical
X_train2 = np.expand_dims(X_train2,axis=0)
y_train2 = keras.utils.to_categorical(y_train2,num_classes=2)
X_dev = np.expand_dims(X_test,axis=0)
y_dev = keras.utils.to_categorical(y_test,num_classes=2)
X = np.expand_dims(X,axis=0)
#Build the CNN
def simpleCNN(self, units = 10):
import keras
from keras.layers import Dense
from keras.layers import Conv2D
from keras.layers import Flatten
from keras.models import Model, Input
inp = Input(shape = self.shape[1:], name='inp')
#layer 1
x = Conv2D(units, kernel_size=(1,1), strides = (1,1), activation='relu', data_format='channels_last')(inp)
#layer 2
x = Conv2D(units, kernel_size=(2,2), strides = (1,1), activation='relu', data_format='channels_last')(x)
#layer 3
x = Flatten()(x)
#layer4
out = Dense(2, activation='softmax',name='out')(x)
model = Model(inputs = inp, outputs = out)
return model
#Fit the Data
model = simpleCNN(X)
from keras.optimizers import Adamax
adamax = Adamax(lr=3e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0); #3e-4; 2e-3 is a default.
model.compile(optimizer=adamax, loss='categorical_crossentropy', metrics=['acc'])
model.fit(X_train2, y_train2, epochs=20, batch_size=32, verbose = 1, validation_data = (X_dev, y_dev))
ValueError: Error when checking input: expected inp to have shape (11459, 26, 60) but got array with shape (9065, 26, 60)