Hello everyone i have a question is it smart and can you implement a time series forecasting model using TensorFlow and Long Short-Term Memory (LSTM) neural network to predict the monthly sales of a retail company?
Here is an example of my code for how i am trying to train my model is this the correct way of doing this or should i be doing something else? Any hints tops or help would be greatly appreciated.
""" my code uses the tensorflow library to create an LSTM neural network that predicts the monthly sales of a retail company. The data is loaded from a CSV file, normalized using the MinMaxScaler, split into training and testing sets, and prepared for the model. The LSTM model is then built, trained, and evaluated on the test data. """
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
# Load the retail sales data
sales_data = pd.read_csv("sales.csv")
# Normalize the data using MinMaxScaler
scaler = MinMaxScaler()
sales_data = scaler.fit_transform(sales_data)
# Split the data into training and testing sets
train_data = sales_data[:int(sales_data.shape[0]*0.8),:]
test_data = sales_data[int(sales_data.shape[0]*0.8):,:]
# Create the input data for the model
def create_input_data(data, window_size=12):
X = []
y = []
for i in range(data.shape[0]-window_size):
X.append(data[i:i+window_size,0])
y.append(data[i+window_size,0])
return np.array(X), np.array(y)
X_train, y_train = create_input_data(train_data)
X_test, y_test = create_input_data(test_data)
# Build the LSTM model
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(units=50, input_shape=(12, 1)))
model.add(tf.keras.layers.Dense(units=1))
model.compile(optimizer="adam", loss="mean_squared_error")
# Train the model
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
model.fit(X_train, y_train, epochs=100, batch_size=32)
# Evaluate the model on the test data
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
test_loss = model.evaluate(X_test, y_test)
print("Test Loss:", test_loss)
I have not tested my model yet because i am uncertain if i am going about do this the right way.
Related
I am working with an LSTM project for learning purposes where I am using time-series data that has 3 columns [current, sma, target] where sma is the simple moving average; I extracted these values from the dataframe like so
data = df[['current', 'sma', 'target']].values
# normlize data
scaler = MinMaxScaler(feature_range=(0,1))
dataset = scaler.fit_transform(data)
# then split inputs from targets
X = dataset[:, :2]
y = dataset[:, 2]
# split into xtrain ytrain xtest ytest
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
Everything works fine so far, and I understand, but the uncharted territory for me would be to convert the x_*, y_* arrays into a 3-d arrays to feed the model; I am using a simple model just to make this work, I am not looking for impressive results, this is purely educational.
The model that I will use:
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(128, input_shape=(timesteps, features), return_sequences=True))
model.add(tf.keras.layers.LSTM(64, return_sequences=False))
model.add(tf.keras.layers.Dense(features))
model.compile(loss='mean_squared_error', optimizer='adam')
How to reshape the data to feed it to the model?
I am learning how transfer learning works using this data https://www.kaggle.com/competitions/santander-customer-satisfaction/data .. so this is my simple source model code in tensorflow. and I am saving this model
import pandas as pd
pd.set_option('display.max_rows', None)
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt
import tensorflow as tf
""" # Read in the csv data using pandas
train = pd.read_csv('Z:\ADwork2\python\PM/train.csv',index_col=0)
test = pd.read_csv('Z:\ADwork2\python\PM/test.csv', index_col=0)
sample = pd.read_csv('Z:\ADwork2\python\PM/sample_submission.csv')
"""
# Read in the csv data using pandas
train = pd.read_csv('train.csv',index_col=0)
test = pd.read_csv('test.csv', index_col=0)
sample = pd.read_csv('sample_submission.csv')
train.dtypes.value_counts()
train.select_dtypes(include=['int64']).nunique()
features_to_drop = train.nunique()
features_to_drop = features_to_drop.loc[features_to_drop.values==1].index
# now drop these columns from both the training and the test datasets
train = train.drop(features_to_drop,axis=1)
test = test.drop(features_to_drop,axis=1)
train.isnull().values.any()
X = train.iloc[:,:-1]
y = train['TARGET']
y.value_counts().to_frame().T
from imblearn.over_sampling import SMOTE
X_resampled, y_resampled = SMOTE().fit_resample(X, y)
y_resampled.value_counts().to_frame().T
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled,
train_size=0.5,
test_size=0.2,
random_state=42,
shuffle=True)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
test = scaler.transform(test)
model = keras.Sequential(
[
keras.layers.Dense(units=9, activation="relu", input_shape=(X_train.shape[-1],) ),
# randomly delete 30% of the input units below
keras.layers.Dropout(0.3),
keras.layers.Dense(units=9, activation="relu"),
# the output layer, with a single neuron
keras.layers.Dense(units=1, activation="sigmoid"),
]
)
# save the initial weights for later
initial_weights = model.get_weights()
model.summary()
#keras.utils.plot_model(model, show_shapes=True)
learning_rate = 0.001
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=keras.metrics.AUC()
)
history = model.fit(X_train, y_train,
epochs=500,
batch_size=1000,
validation_data=(X_val, y_val),
verbose=0)
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(
min_delta = 0.0002, # minimium amount of change to count as an improvement
patience = 20, # how many epochs to wait before stopping
restore_best_weights=True,
)
model.set_weights(initial_weights)
history = model.fit(X_train, y_train,
epochs=500,
batch_size=1000,
validation_data=(X_val, y_val),
verbose=0,
# add in our early stopping callback
callbacks=[early_stopping]
)
sample['TARGET'] = model.predict(test)
sample.to_csv('submission.csv',index=False)
#tf.keras.models.save_model()
model.save('modelcentral.h5')
I am saving this model and then loading this model into new python file in the target model
from pyexpat import model
import pandas as pd
pd.set_option('display.max_rows', None)
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt
import tensorflow as tf
import tryt
# Read in the csv data using pandas
train = pd.read_csv('train.csv',index_col=0)
test = pd.read_csv('test.csv', index_col=0)
sample = pd.read_csv('sample_submission.csv')
train.dtypes.value_counts()
train.select_dtypes(include=['int64']).nunique()
features_to_drop = train.nunique()
features_to_drop = features_to_drop.loc[features_to_drop.values==1].index
# now drop these columns from both the training and the test datasets
train = train.drop(features_to_drop,axis=1)
test = test.drop(features_to_drop,axis=1)
train.isnull().values.any()
X = train.iloc[:,:-1]
y = train['TARGET']
y.value_counts().to_frame().T
from imblearn.over_sampling import SMOTE
X_resampled, y_resampled = SMOTE().fit_resample(X, y)
y_resampled.value_counts().to_frame().T
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled,
train_size=0.5,
test_size=0.2,
random_state=42,
shuffle=True)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
test = scaler.transform(test)
#f.keras.models.load_model()
# It can be used to reconstruct the model identically.
model = keras.models.load_model("modelcentral.h5")
model.trainable=False
#layer1.trainable = False
#inputs = keras.Input(shape=(150, 150, 3))
learning_rate = 0.001
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
loss="binary_crossentropy",
metrics=keras.metrics.AUC()
)
history = model.fit(X_train, y_train,
epochs=500,
batch_size=1000,
validation_data=(X_val, y_val),
verbose=0)
model.summary()
for now I am just freezing all model layers but what if I need to fine tune last layers for example I HAVE BINARY Classification in source model and what if in the target model there is multi-classification. how can I fine tune last layers? i am following this repo https://github.com/rasbt/stat453-deep-learning-ss21/blob/main/L14/5-transfer-learning-vgg16_small.ipynb to learn fine-tuning of final layers for transfer learning but this code is in pytorch and on image data .. so I am confused
model.classifier[1].requires_grad = True
model.classifier[3].requires_grad = True
#For the last layer, because the number of class labels differs compared to ImageNet, we replace the output layer with your own output layer:
model.classifier[6] = torch.nn.Linear(4096, 10)
please help and if there is any mistake in current code then guide me
Given your source model:
import tensorflow as tf
model = tf.keras.Sequential(
[
tf.keras.layers.Dense(units=9, activation="relu", input_shape=(10,) ),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(units=9, activation="relu"),
tf.keras.layers.Dense(units=1, activation="sigmoid"),
])
model.save('model.h5')
You can do something like this to replace your last layer with some other layer:
model = tf.keras.models.load_model("model.h5")
transfer_model = tf.keras.Sequential()
for idx, l in enumerate(model.layers):
if idx == len(model.layers) - 1:
transfer_model.add(tf.keras.layers.Dense(units=10, activation="softmax")) # add output layer with 10 different classes
else: transfer_model.add(l)
print(transfer_model.summary())
You can decide which layers you then want to freeze or make trainable using l.trainable = True / False. You could also do this all without the for loop if you prefer:
model.layers[0].trainable = True
model.layers[2].trainable = True
outputs = tf.keras.layers.Dense(units=10, activation="softmax")(model.layers[-2].output)
transfer_model = tf.keras.Model(inputs=model.input, outputs=outputs)
I am trying to build a LSTM model for crypto currency prediction just for fun.
I managed to build & compile my LSTM model. However, I couldn't success to predict future dates.
I have checked these solutions so far;
How to use the LSTM model for multi-step forecasting?
Forecast future values with LSTM in Python
How to predict actual future values after testing the trained LSTM model?
I couldn't implement these solutions into my code.
A summary of my dataset is like (simple bitcoin prices):
open,close,high,low,volume,time,date
4331.6,4354.43,4394.47,4303.29,3841.525758,1543438799,2018-11-28 23:59:59
4356.23,4243.57,4359.13,4218.79,4434.861032,1543442399,2018-11-29 00:59:59
4243.57,4236.09,4266.0,4185.01,4347.171442,1543445999,2018-11-29 01:59:59
4236.4,4264.85,4279.9,4215.8,2999.814805,1543449599,2018-11-29 02:59:59
First preparing & scaling my data:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.optimizers import adam_v2
from keras.layers import Dense, LSTM, LeakyReLU, Dropout
data = pd.read_csv('bitcoin.csv')
price = data.filter(['close'])
min_max_scaler = MinMaxScaler()
norm_data = min_max_scaler.fit_transform(price.values)
And then splitting my test & train data from original data.
def univariate_data(dataset, start_index, end_index, history_size, target_size):
data = []
labels = []
start_index = start_index + history_size
if end_index is None:
end_index = len(dataset) - target_size
for i in range(start_index, end_index):
indices = range(i-history_size, i)
data.append(np.reshape(dataset[indices], (history_size, 1)))
labels.append(dataset[i+target_size])
return np.array(data), np.array(labels)
past_history = 5
future_target = 0
TRAIN_SPLIT = int(len(norm_data) * 0.75)
x_train, y_train = univariate_data(norm_data, 0, TRAIN_SPLIT, past_history, future_target)
x_test, y_test = univariate_data(norm_data, TRAIN_SPLIT, None, past_history, future_target)
And finally i compile my model & predict.
num_units = 64
learning_rate = 0.0001
activation_function = 'sigmoid'
adam = adam_v2.Adam(learning_rate=learning_rate)
loss_function = 'mse'
batch_size = 5
num_epochs = 64
model = Sequential()
model.add(LSTM(units = num_units, activation=activation_function, input_shape=(None, 1)))
model.add(LeakyReLU(alpha=0.5))
model.add(Dropout(0.1))
model.add(Dense(units = 1))
model.compile(optimizer=adam, loss=loss_function)
history = model.fit(
x_train,
y_train,
validation_split=0.1,
batch_size=batch_size,
epochs=num_epochs,
shuffle=False
)
model.save('bitcoin.h5')
test_predict = model.predict(x_test)
train_predict = model.predict(x_train)
The result is satisfying for me. But instead of predicting a train data, I want to predict the future by using this model. (For example next 100 rows...)
I am learning numpy & pandas and all other libraries used in this example.
So my main goal is to use data from 2018 and try to predict data for 2019. I'm using a GRU model and I have the following code. I have a few issues, I'm not sure if the code is actually correct or if I am missing something, and also for model.fit should I use validation_split=0.1 or validation_data=X_test,y_test since I'm using a different dataframe for tesing.
Regarding the accuracy, it is very small and doesn't make any sense and I have no idea why.
import pandas as pd
import tensorflow as tf
from keras.layers.core import Dense
from keras.layers.recurrent import GRU
from keras.models import Sequential
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorboardcolab import TensorBoardColab, TensorBoardColabCallback
df = pd.read_csv('IF 10 PERCENT.csv',index_col=None)
#Loading Second Dataframe
df2 = pd.read_csv('2019 10minutes IF 10 PERCENT.csv',index_col=None)
tbc=TensorBoardColab() # Tensorboard
X_train= df[['WindSpeed_mps','AmbTemp_DegC','RotorSpeed_rpm','RotorSpeedAve','NacelleOrientation_Deg','MeasuredYawError','Pitch_Deg','WindSpeed1','WindSpeed2','WindSpeed3','GeneratorTemperature_DegC','GearBoxTemperature_DegC']]
X_train=X_train.values
y_train= df['Power_kW']
y_train=y_train.values
X_test= df2[['WindSpeed_mps','AmbTemp_DegC','RotorSpeed_rpm','RotorSpeedAve','NacelleOrientation_Deg','MeasuredYawError','Pitch_Deg','WindSpeed1','WindSpeed2','WindSpeed3','GeneratorTemperature_DegC','GearBoxTemperature_DegC']]
X_test=X_test.values
y_test= df2['Power_kW']
y_test=y_test.values
# conversion to numpy array
# scaling values for model
x_scale = MinMaxScaler()
y_scale = MinMaxScaler()
X_train= x_scale.fit_transform(X_train)
y_train= y_scale.fit_transform(y_train.reshape(-1,1))
X_test=x_scale.fit_transform(X_test)
y_test=y_scale.fit_transform(y_test.reshape(-1,1))
X_train = X_train.reshape((-1,1,12))
X_test = X_test.reshape((-1,1,12))
# splitting train and test
# creating model using Keras
model = Sequential()
model.add(GRU(units=512, return_sequences=True, input_shape=(1,12)))
model.add(GRU(units=256, return_sequences=True))
model.add(GRU(units=256))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(loss=['mse'], optimizer='adam',metrics=['accuracy'])
model.summary()
#model.fit(X_train, y_train, batch_size=250, epochs=10, validation_split=0.1, verbose=1, callbacks=[TensorBoardColabCallback(tbc)])
model.fit(X_train, y_train, batch_size=250, epochs=10, validation_data=(X_test,y_test), verbose=1, callbacks=[TensorBoardColabCallback(tbc)])
score = model.evaluate(X_test, y_test)
print('Score: {}'.format(score))
print('Accuracy: {}'.format(acc))
y_predicted = model.predict(X_test)
y_predicted = y_scale.inverse_transform(y_predicted)
y_t
est = y_scale.inverse_transform(y_test)
plt.plot(y_predicted, label='Predicted')
plt.plot(y_test, label='Measurements')
plt.legend()
plt.show()
Thank you
It sounds to me that you are trying to solve a regression problem here. if it is so, It does not make sense to measure accuracy as a metric, since accuracy is about to measure the exact label matching. MSE should be pretty good for the regression
I have a dataset with >16k vectors (21 dimensions).
I use 80% for training and 20% for testing.
I implement Neural Network and Naive Bayes with above dataset.
Get dataset and split it
data_set = np.loadtxt("./data/_vector21.csv", delimiter=",")
inp_vec = data_set[:, 1:22]
out_vec = data_set[:, 22:]
# Split dataset into training set and test set
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(inp_vec, out_vec, test_size=0.2) # 80% training and 20% test
Neural Network
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(21, 100, 100, 6), max_iter=1000)
mlp.fit(X_train, y_train)
predictions = mlp.predict(X_test)
print("\nAccuracy: %.2f%%\n" % (accuracy_score(y_test, predictions)*100))
# Accuracy: 61.26%
Naive Bayes
# Create a Gaussian Classifier
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
# Train the model using the training sets
model.fit(X_train, y_train)
# Predict the response for test dataset
y_pred = model.predict(X_test)
print("\nAccuracy: %.3f%%" % (metrics.accuracy_score(y_test, y_pred)*100))
#Accuracy: 34.050%
I expect the output of Neural Network model and Naive Bayes model closer.
Can anyone tell me what did I do wrong and how can fix that?