I've made a predictive model using LSTM which predicts future prices for raw materials like cotton,fibre,yarn etc. At the end of code I used matplotlib library to plot graph which displays the original prices, predicted prices and future predicted prices.
This is the graph which shows future prices according to dates
How do I display this graph on Django framework? Because I need to deploy this model on a web application using Django but the tutorials I've seen so far show predictive models which take user input and don't really show anything related to plots or graphs.
Following is the code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from datetime import datetime
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
import os
import glob
import pandas
import numpy
from sklearn import preprocessing
import numpy as np
# Importing Training Set
dataset_train = pd.read_csv('201222-yarn-market-price-china--034.csv1.csv')
dataset_train.info()
# Select features (columns) to be involved intro training and predictions
cols = list(dataset_train)[1:5]
# Extract dates (will be used in visualization)
datelist_train = list(dataset_train.iloc[0])
datelist_train = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in datelist_train]
print('Training set shape == {}'.format(dataset_train.shape))
print('All timestamps == {}'.format(len(datelist_train)))
print('Featured selected: {}'.format(cols))
dataset_train = dataset_train[cols].astype(str)
for i in cols:
for j in range(0, len(dataset_train)):
dataset_train[i][j] = dataset_train[i][j].replace(',', '')
dataset_train = dataset_train.astype(float)
# Using multiple features (predictors)
training_set = dataset_train.values
print('Shape of training set == {}.'.format(training_set.shape))
training_set
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
training_set_scaled = sc.fit_transform(training_set)
sc_predict = StandardScaler()
sc_predict.fit_transform(training_set[:, 0:1])
# Creating a data structure with 90 timestamps and 1 output
X_train = []
y_train = []
n_future = 60 # Number of days we want top predict into the future
n_past = 90 # Number of past days we want to use to predict the future
for i in range(n_past, len(training_set_scaled) - n_future +1):
X_train.append(training_set_scaled[i - n_past:i, 0:dataset_train.shape[1] - 1])
y_train.append(training_set_scaled[i + n_future - 1:i + n_future, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
print('X_train shape == {}.'.format(X_train.shape))
print('y_train shape == {}.'.format(y_train.shape))
# Import Libraries and packages from Keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
# Initializing the Neural Network based on LSTM
model = Sequential()
# Adding 1st LSTM layer
model.add(LSTM(units=64, return_sequences=True, input_shape=(n_past, dataset_train.shape[1]-1)))
# Adding 2nd LSTM layer
model.add(LSTM(units=10, return_sequences=False))
# Adding Dropout
model.add(Dropout(0.25))
# Output layer
model.add(Dense(units=1, activation='linear'))
# Compiling the Neural Network
model.compile(optimizer = Adam(learning_rate=0.01), loss='mean_squared_error')
es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)
mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', verbose=1,
save_best_only=True, save_weights_only=True)
tb = TensorBoard('logs')
history = model.fit(X_train, y_train, shuffle=True, epochs=30, callbacks=[es, rlr, mcp, tb],
validation_split=0.2, verbose=1, batch_size=256)
# Generate list of sequence of days for predictions
datelist_future = pd.date_range(datelist_train[-1], periods=n_future, freq='1d').tolist()
'''
Remeber, we have datelist_train from begining.
'''
# Convert Pandas Timestamp to Datetime object (for transformation) --> FUTURE
datelist_future_ = []
for this_timestamp in datelist_future:
datelist_future_.append(this_timestamp.date())
# Perform predictions
predictions_future = model.predict(X_train[-n_future:])
predictions_train = model.predict(X_train[n_past:])
# Inverse the predictions to original measurements
# ---> Special function: convert <datetime.date> to <Timestamp>
def datetime_to_timestamp(x):
'''
x : a given datetime value (datetime.date)
'''
return datetime.strptime(x.strftime('%m%d%Y'), '%m%d%Y')
y_pred_future = sc_predict.inverse_transform(predictions_future)
y_pred_train = sc_predict.inverse_transform(predictions_train)
a=dataset_train.iloc[:, 3]
print(a)
PREDICTIONS_FUTURE = pd.DataFrame(y_pred_future, columns=['Cotton
Yarn1']).set_index(pd.Series(datelist_future))
PREDICTION_TRAIN = pd.DataFrame(y_pred_train, columns=['Cotton
Yarn1']).set_index(pd.Series(datelist_train[2 * n_past + n_future -1:]))
# Convert <datetime.date> to <Timestamp> for PREDCITION_TRAIN
PREDICTION_TRAIN.index = PREDICTION_TRAIN.index.to_series().apply(datetime_to_timestamp)
print(PREDICTION_TRAIN.head(3))
#plt.rcParams["figure.figsize"] = (20,3)
#rcParams['figure.figsize'] = 14, 5
# Plot parameters
START_DATE_FOR_PLOTTING = '12/24/2019'
dataset_train = pd.DataFrame(dataset_train, columns=cols)
dataset_train.index = datelist_train
dataset_train.index = pd.to_datetime(dataset_train.index)
plt.plot(PREDICTIONS_FUTURE.index, PREDICTIONS_FUTURE['Cotton Yarn1'], color='r',
label='Predicted Stock Price')
plt.plot(PREDICTION_TRAIN.loc[START_DATE_FOR_PLOTTING:].index,
PREDICTION_TRAIN.loc[START_DATE_FOR_PLOTTING:]['Cotton Yarn1'], color='orange',
label='Training predictions')
plt.plot(dataset_train.loc[START_DATE_FOR_PLOTTING:].index,
dataset_train.loc[START_DATE_FOR_PLOTTING:]['Cotton Yarn1'], color='b', label='Actual Stock
Price')
plt.axvline(x = min(PREDICTIONS_FUTURE.index), color='green', linewidth=2, linestyle='--')
plt.grid(which='major', color='#cccccc', alpha=0.5)
plt.legend(shadow=True)
plt.title('Predcitions and Acutal Stock Prices', family='Arial', fontsize=12)
plt.xlabel('Timeline', family='Arial', fontsize=10)
plt.ylabel('Stock Price Value', family='Arial', fontsize=10)
plt.xticks(rotation=45, fontsize=8)
plt.show()
Related
I am trying to generate price predictions for cryptocurrencies using keras. I have it working on 1 day intervals using 1 day data from the Kraken API. My problem comes when I try to generate predictions on other time frames. 1hr 4hr and 1week. The prediction timestamps come out as daily regardless of the input data. For hourly the output dataframe is completely scrambled. These are screenshots of the output.This is correctly working
This is the scrambled 1 hour dataframe
This is the 4hr dataframe with wrong timestamps
My code is as follows. Sorry it's not a minimum reproducible example.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout, GRU, SimpleRNN, Conv1D
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, Normalizer
import os
import tensorflow as tf
import krakenex
from pykrakenapi import KrakenAPI
#Pulling a DataFrame from the API
api = krakenex.API()
k = KrakenAPI(api)
ohlc, last = k.get_ohlc_data("ETHUSD", interval=60)
df = ohlc
last_close = ohlc
last_close = last_close[['close']]
last_close = last_close.iloc[::-1]
last_price = last_close.close[-1]
print(last_price)
#Clipping Dataframe to only containg index anc close prices
df = df[['close']]
#Reversing dataframe order
df = df.iloc[::-1]
df.tail()
#LSTM works better if data is scaled but removing for a moment
scaler = MinMaxScaler()
df = pd.DataFrame(scaler.fit_transform(df), columns = df.columns, index=df.index)
#print(df.columns)
df.tail()
#Visualizing price action
df.plot(figsize=(14,8))
plt.title('ETH prices')
plt.ylabel('normalized prices')
plt.show()
def split_sequence(seq, n_steps_in, n_steps_out):
"""
Splits the unzivariate time sequence
"""
# Creating a list for both variables
X, y = [], []
for i in range(len(seq)):
# Finding the end of the current sequence
end = i + n_steps_in
out_end = end + n_steps_out
# Breaking out of the loop if we have exceeded the dataset's length
if out_end > len(seq):
break
# Splitting the sequences into: x = past prices and indicators, y = prices ahead
seq_x, seq_y = seq[i:end], seq[end:out_end]
X.append(seq_x)
y.append(seq_y)
return np.array(X), np.array(y)
#Some charting for accuracy tests
def visualize_results(results):
history = results.history
plt.figure(figsize=(14,8))
plt.plot(history['val_loss'])
plt.plot(history['loss'])
plt.legend(['val_loss', 'loss'])
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.show()
plt.figure(figsize=(14,8))
plt.plot(history['val_accuracy'])
plt.plot(history['accuracy'])
plt.legend(['val_accuracy', 'accuracy'])
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.show()
#number of periods to look back at
n_per_in = 680
#number of periods to predict
n_per_out = 20
n_features = 1
#making a sequence for predictions
X, y = split_sequence(list(df.close), n_per_in, n_per_out)
#Giving the model the proper dimensions
X = X.reshape(X.shape[0], X.shape[1], n_features)
# Instantiating the model
d = 0.2
activ = 'softsign'
model = Sequential()
#model.add(Conv1D(100, kernel_size=2, input_shape=(n_per_in, n_features), activation=activ))
model.add(LSTM(60, input_shape=(n_per_in, n_features), activation=activ))
model.add(Dense(units=n_per_out))
# Model summary
model.summary()
model.compile(loss = "mse", optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), metrics=['accuracy'])
res = model.fit(X,y, epochs=10, batch_size=4, validation_split=0.2)
plt.figure(figsize=(14, 8))
yhat = model.predict(X[-1].reshape(1,n_per_in,n_features)).tolist()[0]
yhat = np.array(yhat).reshape(-1,1).tolist()
actual = y[-1].reshape(-1,1)
print("predicted ", yhat)
plt.plot(yhat, label='predicted')
print('actuals ', actual.tolist())
plt.plot(actual.tolist(), label='actual')
plt.title("Predicted vs Actual")
plt.ylabel('price')
plt.legend()
plt.show()
yhat = model.predict(np.array(df.head(n_per_in)).reshape(1, n_per_in, n_features)).tolist()[0]
yhat = np.array(yhat).reshape(-1,1).tolist()
preds = pd.DataFrame(yhat, index=pd.date_range(start=df.index[-1], periods=len(yhat)), columns=df.columns)
print(preds)
periods = 30
actual = pd.DataFrame(df[['close']].tail(periods), index = df.close.tail(periods).index, columns= df.columns).append(preds.head(1))
plt.figure(figsize=(14,8))
plt.plot(actual, label='actuals')
plt.plot(preds, label='predictions')
plt.ylabel("price")
plt.xlabel('times')
plt.title(f'Forecasting the next {len(yhat)} days')
plt.legend()
plt.show()
How can I get the correct time frames in my output? I've gone to the docs and scoured google for hours and can't come up with a solution other than switching to prophet rather than Keras, and I don't want to do that.
Hello I am new to building models in python and I am trying to learn because I need to train a model using Python and extract its weights and biases to build the model on FPGA
I was following this tutorial:
https://medium.com/#curiousily/human-activity-recognition-using-lstms-on-android-tensorflow-for-hackers-part-vi-492da5adef64
I have been trying to implement the same model in the previous link using Keras. However, when I tried to train the keras model the accuracy was 0.0905 eventhough it has the same structure as the tensorflow model.
import keras.layers
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from scipy import stats
from sklearn import metrics
import seaborn as sns
from keras.utils.vis_utils import plot_model
import pydot as py
RANDOM_SEED = 42
#Reading Dataset
columns = ['user', 'activity', 'timestamp', 'x_axis', 'y_axis', 'z_axis']
df = pd.read_csv('WISDM_ar_v1.1_raw.txt', header=None, names=columns)
df = df.dropna()
#data_preprocessing
N_TIME_STEPS = 200
N_FEATURES = 3
step = 20
segments = []
labels = []
for i in range(0, len(df) - N_TIME_STEPS, step):
xs = df['x_axis'].values[i:i + N_TIME_STEPS]
ys = df['y_axis'].values[i:i + N_TIME_STEPS]
zs = df['z_axis'].values[i:i + N_TIME_STEPS]
# Note that we take the most common activity and assign it as a label for the sequence.
label = stats.mode(df['activity'][i:i + N_TIME_STEPS])[0][0]
segments.append([xs, ys, zs])
labels.append(label)
#print(np.array(segments).shape)
#(54901,3,200)
reshaped_segments = np.array(segments, dtype=np.float32).reshape(-1, N_TIME_STEPS, N_FEATURES)
#print(reshaped_segments.shape)
#(54901,200,3)
# Labels one hot encoding
labels = np.array(pd.get_dummies(labels), dtype=np.float32)
#print(labels.shape)
#(54901,6)
X_train, X_test, y_train, y_test = train_test_split(reshaped_segments, labels, test_size=0.2, random_state=RANDOM_SEED)
N_CLASSES = 6
N_HIDDEN_UNITS = 64
model = Sequential()
model.add(
LSTM((N_HIDDEN_UNITS),input_shape=(N_TIME_STEPS,N_FEATURES),return_sequences=True,recurrent_activation='relu'))
model.add(LSTM(labels.shape[1],return_sequences=False,recurrent_activation='relu'))
print(model.summary())
opt = keras.optimizers.Adam(learning_rate=0.0025)
model.compile(loss= 'categorical_crossentropy',optimizer=opt,metrics=['categorical_accuracy'])
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
history = model.fit(X_train,y_train,epochs=50,batch_size=1024)
print(model.get_weights())
predictions = model.predict(X_test)
plt.plot(history.history['loss'])
plt.show()
categories = ['Downstairs', 'Jogging', 'Sitting', 'Standing', 'Upstairs', 'Walking']
max_test = np.argmax(y_test, axis=1)
max_predictions = np.argmax(predictions, axis=1)
confusion_matrix = metrics.confusion_matrix(max_test, max_predictions)
plt.figure(figsize=(16, 14))
sns.heatmap(confusion_matrix, xticklabels=categories, yticklabels=categories, annot=True, fmt="d");
plt.title("Confusion matrix")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
model.save('mymodel')
This is my Keras implemenation, if someone can guide me on what is the difference between both models or if I am missing something I would be very grateful
I have followed this tutorial https://www.youtube.com/watch?v=QIUxPv5PJOY to predict the stock price of Apple one day into the future. The code is:
#Import the libraries
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
#Get the stock quote
df = web.DataReader('AAPL', data_source='yahoo', start='2012-01-01', end='2020-12-07')
#Show the data
df
#Get the number of rows and columns in the data set
df.shape
#Visualize the closing price history
#We create a plot with name 'Close Price History'
plt.figure(figsize=(16,8))
plt.title('Close Price History')
#We give the plot the data (the closing price of our stock)
plt.plot(df['Close'])
#We label the axis
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
#We show the plot
plt.show()
#Create a new dataframe with only the 'Close' column
data = df.filter(['Close'])
#Convert the dataframe to a numpy array
dataset = data.values
#Get the number of rows to train the model on
training_data_len = math.ceil( len(dataset) * 0.8 )
training_data_len
#Scale the data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
scaled_data
#Create the training data set
#Create the scaled training data set
train_data = scaled_data[0:training_data_len, :]
#Split the data into x_train and y_train data sets
x_train = []
y_train = []
#We create a loop
for i in range(60, len(train_data)):
x_train.append(train_data[i-60:i, 0]) #Will conaint 60 values (0-59)
y_train.append(train_data[i, 0]) #Will contain the 61th value (60)
if i <= 60:
print(x_train)
print(y_train)
print()
#Convert the x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)
#Reshape the data
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_train.shape
#Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
#Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')
#Train the model
model.fit(x_train, y_train, batch_size=1, epochs=1)
#Create the testing data set
#Create a new array containing scaled values from index 1738 to 2247
test_data = scaled_data[training_data_len - 60:]
#Create the data set x_test and y_test
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
x_test.append(test_data[i-60:i, 0])
#Convert the data to a numpy array
x_test = np.array(x_test)
#Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
#Get the model's predicted price values for the x_test data set
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
predictions
#Evaluate model (get the root mean quared error (RMSE))
rmse = np.sqrt( np.mean( predictions - y_test )**2 )
rmse
#Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions
#Visualize the data
plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'Predictions']])
plt.legend(['Train', 'Validation', 'Predictions'], loc='lower right')
plt.show()
Now I want to extend the predicted portion of the graph to show future dates as well ('x' days into the future). I think I could do it by getting the predicted price for the next day and then use that price in the input to get the next day, and then use that day to get the next day, and so on. How can I do it? I thought of appending the next day pred price to the dataset used to train the model, but I wasn't successful at this. Thank you for your help.
Your intuition is correct. I have done what you were thinking of in this way:
X_FUTURE = 100
predictions = np.array([])
last = x_test[-1]
for i in range(X_FUTURE):
curr_prediction = model.predict(np.array([last]))
print(curr_prediction)
last = np.concatenate([last[1:], curr_prediction])
predictions = np.concatenate([predictions, curr_prediction[0]])
predictions = scaler.inverse_transform([predictions])[0]
print(predictions)
I have basically constructed shifting arrays with the new predictions
After that I have constructed the dataframe that contains the new prediction:
import datetime
from datetime import timedelta
dicts = []
curr_date = data.index[-1]
for i in range(X_FUTURE):
curr_date = curr_date + timedelta(days=1)
dicts.append({'Predictions':predictions[i], "Date": curr_date})
new_data = pd.DataFrame(dicts).set_index("Date")
And I plotted the result:
#Plot the data
train = data
#Visualize the data
plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(new_data['Predictions'])
plt.legend(['Train', 'Predictions'], loc='lower right')
plt.show()
Why it seems so bad (anyway we don't know the future...)? I did retrain the model on all the dataset, but the problem here is that the further I go the greater would be the uncertain. I am not too expert of time series prediction, but I think that the model has not learned any good pattern under the time series. But as example it does what it needs to do
I wrote a small
"Linear Regression Neural Network Tensorflow Keras Python program"
Input dataset is
y = mx + c straight line data.
Predicted y values are not correct and are giving horizontal line kind of
values, instead of a line with some slope.
I ran this program on Windows laptop with tensorflow, Keras and
Jupyter notebook.
What to do to fix this program please?
Thanks and best regards,
SSJ
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
n2 = 50
count = 20
n4 = n2 + count
p = 100
m = 10
c = 5
x = np.linspace(n2, n4, p)
y = m * x + c
x
y
plt.scatter(x,y)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
x_normalizer = preprocessing.Normalization(input_shape=[1,])
x_normalizer.adapt(x)
x_normalized = x_normalizer(x)
y_normalizer = preprocessing.Normalization(input_shape=[1,])
y_normalizer.adapt(y)
y_normalized = x_normalizer(y)
y_model = tf.keras.Sequential([
y_normalizer,
layers.Dense(1)
])
y_model.compile(optimizer='rmsprop', loss='mse', metrics = ['mae'])
y_hist = y_model.fit(x, y, epochs=100, verbose=0, validation_split = 0.2)
hist = pd.DataFrame(y_hist.history)
hist['epoch'] = y_hist.epoch
hist.head()
hist.tail()
xin = [51,53,59,64]
ypred = y_model.predict(xin)
ypred
plt.scatter(x, y)
plt.scatter(xin, ypred, color = 'r')
plt.grid(linestyle = '--')
Use StandardScaler instead of Normalization
Normalizer acts row-wise and StandardScaler column-wise.
Normalizer does not remove the mean and scale by deviation but scales
the whole row to unit norm.
Found here: Difference between StandardScaler and Normalizer
This is how you can process the data:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
x = np.linspace(50, 70, 100).reshape(-1, 1)
y = 10 * x + 5
x_standard_scaler = StandardScaler().fit(x)
y_standard_scaler = StandardScaler().fit(y)
x_scaled = x_standard_scaler.transform(x)
y_scaled = y_standard_scaler.transform(y)
Remember that you need two separate scalers for x and y so don't use the same object for that. Also if you want to use that scaler to process new data for testing, save the scaler in some variable. A good practice is to not refit the scaler again on test data.
model = Sequential([
Dense(1, input_dim=1, activation='linear'),
])
model.compile(optimizer='rmsprop', loss='mse')
history = model.fit(x_scaled, y_scaled, epochs=1000, verbose=0, validation_split = 0.2).history
pd.DataFrame(history).plot()
plt.show()
As you can see the model is converging. Its worth to plot the loss history which helps to tell if your model is learning or not.
x_test = np.linspace(20, 100, 10).reshape(-1, 1)
y_test = 10 * x_test + 5
x_test_scaled = x_standard_scaler.transform(x_test)
y_test_scaled = y_standard_scaler.transform(y_test)
If you have a test data that you want to use for validation or just predict it, remember to use standard scaler again, but without fitting. It should be fitted on train data only in most cases.
y_test_pred_scaled = model.predict(x_test_scaled)
y_test_pred = y_standard_scaler.inverse_transform(y_test_pred_scaled)
plt.scatter(x_test, y_test, s=30, label='true')
plt.scatter(x_test, y_test_pred, s=15, label='pred')
plt.legend()
plt.show()
If you want to get your prediction rescaled back to its original range use inverse_transform. Notice that prediction on x_test after rescaling is very close to y_test.
I am designing a stock prediction program using tensorflow2.1. My predictions variable should be a 2-dimensional variable at the end of my program but is only a one dimensional. The error returned is:
Traceback (most recent call last):
File "/Users/owner/Desktop/algo/predict.py", line 120, in <module>
valid['Predictions'] = predictions
File "/usr/local/lib/python3.7/site-packages/pandas/core/frame.py", line 3487, in __setitem__
self._set_item(key, value)
File "/usr/local/lib/python3.7/site-packages/pandas/core/frame.py", line 3563, in _set_item
self._ensure_valid_index(value)
File "/usr/local/lib/python3.7/site-packages/pandas/core/frame.py", line 3540, in _ensure_valid_index
value = Series(value)
File "/usr/local/lib/python3.7/site-packages/pandas/core/series.py", line 314, in __init__
data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True)
File "/usr/local/lib/python3.7/site-packages/pandas/core/internals/construction.py", line 729, in sanitize_array
raise Exception("Data must be 1-dimensional")
Exception: Data must be 1-dimensional
Here is my code:
import requests
import math
import numpy as np
import pandas as pd
import pandas_datareader as web
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Embedding
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import MinMaxScaler
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from datetime import datetime, date
plt.style.use('fivethirtyeight')
today = date.today()
df = web.DataReader('GC=F', data_source='yahoo', start='2019-02-14', end=str(today))
#print(df)
plt.figure(figsize=(16,8))
plt.title('GOLD PRICE HISTORY')
plt.plot(df['Close'])
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close price USD($)', fontsize=18)
#plt.show()
#get closing price
data = df.filter(['Close'])
#get closing price values
dataset = data.values
#set training data length to 91% of total data set
training_data_len = math.ceil(len(dataset))
print(training_data_len)
#Scale the data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
#create training data set
#create the scaled training data set
train_data = scaled_data[0: training_data_len , :]
#split the data into x-train and y-train datasets
x_train = []
y_train = []
for i in range(20, len(train_data)):
x_train.append(train_data[i-20:i, 0])
y_train.append(train_data[i, 0])
if i<= 20:
#print(x_train)
#print(y_train)
pass
#convert x-train and y-train to numpy arrays to train models
x_train, y_train = np.array(x_train), np.array(y_train)
#reshape the data, LSTM model expects 3D dataset
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
#Build LSTM MODEL
model = tf.keras.Sequential([
#tf.keras.layers.Embedding(encoder.vocab_size, 64),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
tf.keras.layers.Dense(25, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='mean_squared_error',
optimizer=tf.keras.optimizers.Adam(1e-4),
metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=1, epochs=1)
#create testing data set
#creat new array containing scaled values
test_data = scaled_data[training_data_len - 20: , :]
print(test_data.shape)
#create the datasets x-test and y-test
x_test=[]
y_test=dataset[training_data_len:, :]
for i in range(20, len(test_data)+1):
x_test.append(test_data[i-20:i, 0])
#convert data to numpy array
x_test = np.array(x_test)
#reshape data to 3D
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
print(x_test.shape)
#Get predicted price values
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
print(predictions.shape)
#get root mean squared error
rmse = np.sqrt(((predictions - y_test) ** 2).mean())
print(rmse)
#Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions
#Visualize the data
plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
plt.show()
Is this error due to Pandas, Numpy or Tensorflow? And what can be done to satisfy this error statement? I have been looking for help on GitHub but they redirected me to Stackoverflow.
Thank you.
Change valid['Predictions'] = predictions to valid['Predictions'] = np.squeeze(predictions). The error should go away.
Your predictions is a list of lists.
print(predictions) results in [[1597.7726]]
You can use numpy's squeeze method
...
print(predictions.shape)
predictions = np.squeeze(predictions)
...
or use s simple workaround
...
print(predictions.shape)
predictions = predictions[0]
...
to solve the error.