LSTM model has poor prediction in simple example - python

I am trying to generate a LSTM model using Keras. I create a simple sine wave example which contain more thang 1000 point to predict the next point. But the result is not good as i expected. When i fit the model the result is moves between 0~1 not like the sine wave. I have tried to change parameter like epoch, batchsize, learning rate, but it is not better.
What am I doing wrong?
import joblib
import numpy as np
import matplotlib.pyplot as plt
import copy
import gc
import os
import sys
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from keras.callbacks import Callback
learning_rate = 0.001
len_train = 30
total_predict = 300
len_test = 400
epoch = 100
batch_size = 32
workers = -1
class Callback_Class(Callback):
def load_data(self, x_test, y_test):
self.x_test = x_test
self.y_test = np.array(y_test)
def model_predict(self, data_close):
output_predict = []
for i in range(total_predict):
if (i==0):
data_close_ = data_close.reshape(-1, len_train, 1)
data_close_ = np.delete(data_close_, 0)
data_close_ = np.append(data_close_, pred_close)
data_close_ = data_close_.reshape(-1, len_train, 1)
pred_close = model.predict(data_close_)
pred_close = pred_close.ravel()
pred_close = np.array(pred_close).reshape(len(pred_close), 1)
pred_cl = sc.inverse_transform(pred_close)
output_predict = np.array(output_predict)
return output_predict
def on_epoch_end(self, epoch, logs=None):
if (epoch % 20 == 0):
output_predict = self.model_predict(self.x_test)
fig, ax = plt.subplots(figsize=(12,6))
plt.title(f"Model predict")
plt.plot(output_predict.ravel(), color="red", label='Predict')
plt.plot(self.y_test.ravel(), color="blue", label='REAL')
plt.legend(loc='lower left')
def lstm_reg(input_shape=(60, 1), unit=40, clustering_params=None):
inputs = Input(input_shape)
lstm1f = Bidirectional(LSTM(units=32, return_sequences=True))(inputs)
lstm1f = Bidirectional(LSTM(units=32, return_sequences=False))(lstm1f)
outputs = Dense(units=1, activation='linear')(lstm1f)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mean_squared_error', metrics=["accuracy"])
return model
def create_data_train(data_time_series):
data_time_series = np.array(data_time_series).ravel()
X_train = []
y_train = []
for i in range(len_train, len(data_time_series)):
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
return X_train, y_train
x = np.linspace(-20*np.pi, 20*np.pi, 2001)
sin_alpha = np.sin(x).ravel()
sin_alpha_train = np.array(copy.deepcopy(sin_alpha))[:len(sin_alpha)-len_test]
sin_alpha_train = np.array(sin_alpha_train).reshape(len(sin_alpha_train), 1)
sc = MinMaxScaler(feature_range=(0, 1))
sin_alpha_train = sc.fit_transform(sin_alpha_train)
X_train, y_train = create_data_train(sin_alpha_train)
joblib.dump(sc, f'Demo_MinMaxScaler.gz')
sc = joblib.load(f"Demo_MinMaxScaler.gz")
X_test = np.array(copy.deepcopy(sin_alpha))[len(sin_alpha)-len_test:len(sin_alpha)-len_test+len_train]
X_test = np.array(X_test).reshape(len(X_test), 1)
X_test = sc.fit_transform(X_test)
y_test = np.array(copy.deepcopy(sin_alpha))[len(sin_alpha)-len_test+len_train:len(sin_alpha)-len_test+len_train+total_predict]
model = lstm_reg(input_shape=(len_train, 1), unit=int(2*(len_train+len(y_train))/3))
callback_class = Callback_Class()
callback_class.load_data(X_test, y_test), y_train, epochs=epoch, use_multiprocessing=True, verbose=1, callbacks=[callback_class], workers=workers, batch_size=batch_size)

It seems like you are normalizing your features and your labels in these lines
sc = MinMaxScaler(feature_range=(0, 1))
sin_alpha_train = sc.fit_transform(sin_alpha_train)
X_train, y_train = create_data_train(sin_alpha_train)
Try it without scaling your label set. Due to your output layer using the linear activation function, which is correct as you're working on a regression problem, the model should be able to handle non scaled labels. The model only learns your data in a range of 0 to 1 while your sine wave goes from -1 to 1.


TypeError: Cannot iterate over a Tensor with unknown first dimension

Getting the following error when I execute the below code:
TypeError: Cannot iterate over a Tensor with unknown first dimension.
How to solve this? The error is in the line output_gcn = gcn(input_layer)
I tried reshaping the input_layer, but it didnt work
What is the problem and how to solve it?
Please let me know the solution as early as possible, as I am doing something apart from learning and have deadlines to meet
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from import UniformRandomWalk
#from stellargraph.layer import GCN
from stellargraph import StellarGraph
from tensorflow.keras import layers, Model, optimizers
from stellargraph.mapper import FullBatchNodeGenerator
from stellargraph.layer import GCN
from stellargraph.layer import node2vec
from stellargraph import StellarGraph
#from stellargraph.draw import draw
#generator = PaddedGraphGenerator(graphs=graphs)`
pro_tweets = pprocess[0:10000]
labels = df_encoded[['label_mild', 'label_moderate', 'label_non-depressed',
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(pro_tweets)
similarity_matrix = cosine_similarity(vectors)
adj_matrix = np.zeros(similarity_matrix.shape)
adj_matrix[similarity_matrix > 0] = similarity_matrix[similarity_matrix > 0]
graph = StellarGraph(adj_matrix, node_features=vectors)
rw = UniformRandomWalk(graph)
walks =[0])), length=5, n=1)
gcn = GCN(layer_sizes=[32, 16], activations=["relu", "relu"], generator =
FullBatchNodeGenerator(graph, method="gcn"))
#input_layer = GCN.get_input_layer(graph)
input_layer = layers.Input(shape = (vectors.shape[1],), dtype="float32", name="input")
#reshaped_input_layer = tf.reshape(input_layer, [vectors.shape[1],])
import tensorflow as tf
output_gcn = gcn(input_layer)
#input_layer = layers.Input(shape=(adj_matrix.shape[0],adj_matrix.shape[1]),
dtype="int32", name="input")
#output_layer = gcn(input_layer)
output_embedding = node2vec(output_dim=16)(output_gcn)
dense_layer = layers.Dense(16, activation="relu")(output_embedding)
output_layer = layers.Dense(4, activation="softmax")(dense_layer)
'''create the final dense layer
dense_layer = layers.Dense(16, activation="relu")(output_layer)
output_layer = layers.Dense(1, activation="sigmoid")(dense_layer)'''
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer=optimizers.Adam(lr=0.01), loss="binary_crossentropy", metrics=
X_train, X_test, y_train, y_test = train_test_split(walks, labels, test_size=0.2,
random_state=42), y_train, validation_data=(X_test, y_test), epochs=50)
test_predictions = model.predict(X_test)
test_predictions = np.round(test_predictions)
accuracy = (test_predictions == y_test).mean()
print("Accuracy: {:.4f}".format(accuracy))
train_predictions = model.predict(X_train)
train_predictions = np.round(train_predictions)
accuracy = (train_predictions == y_train).mean()
print("Accuracy: {:.4f}".format(accuracy))]

Predict sine wave with Python

I am trying to train a neural network to predict the next point of a sine wave using MLPRegressor, looking at performances I cannot get the R2 to be above 0.90, any help would be greatly appreciated!
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd
# Create an `np.array` containing a sequence of 10 values of the sine function.
seq = np.array([np.sin(2*np.pi*t/10) for t in range(10)])
#generate 100 pairs of two-point sequences from the values in `seq`.
num_sequences = 100
x_train = np.array([])
y_train = np.array([])
for i in range(num_sequences):
rand = np.random.randint(10)
x_train = np.append(x_train, [[seq[rand], seq[np.mod(rand+1, 10)]]])
y_train = np.append(y_train, seq[np.mod(rand+1, 10)])
x_train = np.resize(x_train, (100, 2))
x_test = np.array([seq, np.roll(seq, -1)])
y_test = np.array(np.roll(seq, -2))
x_test = np.reshape(x_test, (-1, 2))
# Place the data in a `pandas` `DataFrame`.
pdata = pd.DataFrame({'x1':x_train[:,0], 'x2':x_train[:,1],'y':y_train})
reg_model = MLPRegressor(hidden_layer_sizes = (64,64,64), activation = "relu", random_state = 2, max_iter = 400), y_train)
y_pred = reg_model.predict(x_test)
print(' R2 = ', r2_score(y_pred, y_test))
print('MAE = ', mean_absolute_error(y_pred, y_test))
print('MSE = ', mean_squared_error(y_pred, y_test))
Your data was not correctly set up.
You can use this code to create a sine wave:
sin_wave = np.sin(np.arange(-1000, 1000, 0.1))
Then use this code to create data for training and testing:
input_seq = np.array([sin_wave[x:x+2] for x in range(len(sin_wave)-2)])
output = np.array([sin_wave[x+2] for x in range(len(sin_wave)-2)])
x_train = input_seq[:1500]
y_train = output[:1500]
x_test = input_seq[1500:]
y_test = output[1500:]
Finally, use this code to build, train and evaluate your model:
reg_model = MLPRegressor(hidden_layer_sizes = (64,64,64), activation = "relu", random_state = 2, max_iter = 400), y_train)
y_pred = reg_model.predict(x_test)
print(' R2 = ', r2_score(y_pred, y_test))
print('MAE = ', mean_absolute_error(y_pred, y_test))
print('MSE = ', mean_squared_error(y_pred, y_test))
The complete code:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np
sin_wave = np.sin(np.arange(-1000, 1000, 0.1))
input_seq = np.array([sin_wave[x:x+2] for x in range(len(sin_wave)-2)])
output = np.array([sin_wave[x+2] for x in range(len(sin_wave)-2)])
# Split into Train and test
x_train = input_seq[:1500]
y_train = output[:1500]
x_test = input_seq[1500:]
y_test = output[1500:]
reg_model = MLPRegressor(hidden_layer_sizes = (64,64,64), activation = "relu", random_state = 2, max_iter = 400), y_train)
y_pred = reg_model.predict(x_test)
print(' R2 = ', r2_score(y_pred, y_test))
print('MAE = ', mean_absolute_error(y_pred, y_test))
print('MSE = ', mean_squared_error(y_pred, y_test))

How to plot training and test convergence of a multilayer perceptron

I couldn't find anything helpful about plotting the process of converging test and training data of Sklearn.neural_network.MLPrgressor. I found that there is loss_curve_ attribute, but what about validation data?
I have built a simple model in which both inputs and outputs are randomly selected (say x = numpy.linspace(0, numpy.pi, 100), y = numpy.sin(x). I wrote this one to obtain variation of sklearn.metrics.mean_squared_error for a different number of hidden layers.
How can I overcome this problem?
from sklearn.preprocessing import RobustScaler
inputs /= 10
ERE /= 10
scaler = RobustScaler()
inputs = scaler.fit_transform(inputs)
X_train, X_test, y_train, y_test = train_test_split(inputs, ERE,
from sklearn.neural_network import MLPRegressor
hidden_layer_size = (10, )
activation = "tanh"
solver = "adam"
alpha = 1e-4
batch_size = 6
learning_rate = "adaptive"
learning_rate_init = 1e-4
power_t = "sgd"
max_iter = 1000
shuffle = True
random_state = 123
verbose = True
early_stopping = True
validation_fraction = 0.15
n_iter_no_change = 35
from sklearn.metrics import mean_squared_error as mse
import numpy as np
error_scores = np.zeros(shape = (11,))
for _iterator, hidden_layer_size in enumerate(range(1, 110, 10)):
mlr = MLPRegressor(hidden_layer_sizes=hidden_layer_size,
alpha=alpha), y_train)
error_scores[_iterator] = mse(y_test, mlr.predict(X_test))
Class MLPrgressor (well, BaseMultilayerPerceptron really) has an undocumented validation_scores_ attribute which keeps track of scores on validation data. However, it is only populated if you pass True as parameter early_stopping when initialising the solver object.

InvalidArgumentError: Incompatible shapes with Keras LSTM Net

I want to predict the pressure of a machine. I have 18 input values and the pressure as output. So I have 19 columns and 7657 rows as the database consists of 7657 time steps and each counts for 1 sec.
I have a problem with the following code:
import tensorflow as tf
import pandas as pd
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
from sklearn import linear_model
from keras.models import Sequential
from keras.layers import Dense #Standard neural network layer
from keras.layers import LSTM
from keras.layers import Activation
from keras.layers import Dropout
df = pd.read_csv('Testdaten_2_Test.csv',delimiter=';')
feature_col_names=['LSDI','LZT1I', ..... ,'LZT5I']
predicted_class_names = ['LMDI']
x = df[feature_col_names].values
y = df[predicted_class_names].values
x_train_size = 6400
x_train, x_test = x[0:x_train_size], x[x_train_size:len(x)]
y_train_size = 6400
y_train, y_test = y[0:y_train_size], y[y_train_size:len(y)]
nb_model = linear_model.LinearRegression(), y=y_train)
nb_predict_train = nb_model.predict(x_test)
from sklearn import metrics
def scale(x, y):
# fit scaler
x_scaler = MinMaxScaler(feature_range=(-1, 1))
x_scaler =
x_scaled = x_scaler.transform(x)
# fit scaler
y_scaler = MinMaxScaler(feature_range=(-1, 1))
y_scaler =
y_scaled = y_scaler.transform(y)
return x_scaler, y_scaler, x_scaled, y_scaled
x_scaler, y_scaler, x_scaled, y_scaled = scale(x, y)
x_train, x_test = x_scaled[0:x_train_size], x_scaled[x_train_size:len(x)]
y_train, y_test = y_scaled[0:y_train_size], y_scaled[y_train_size:len(y)]
model = Sequential()
model.add(LSTM(10, return_sequences=True,batch_input_shape=(32,1,18)))
model.add(LSTM(1,return_sequences=True, activation='linear'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=
['accuracy']), y_train, epochs=10,batch_size=32)
score = model.evaluate(x_test, y_test,batch_size=32)
predicted = model.predict(x_test)
predicted = y_scaler.inverse_transform(predicted)
predicted = [x if x > 0 else 0 for x in predicted]
correct_values = y_scaler.inverse_transform(y_test)
correct_values = [x if x > 0 else 0 for x in correct_values]
I Get the Error:
ValueError: Error when checking input: expected lstm_1_input to have 3
dimensions, but got array with shape (1257, 18)
After the last line of code.
I also tried to reshape the test data but then I get a very similar error.
I think, I'm missing something very easy or basic but I can't figure it out at the moment, as I'm just a beginner in coding neuronal networks.
I need this for my master thesis so I would be very thank full if anyone could help me out.
The problem is that your model input batch_input_shape is fixed. The length of your test length is 1257 and cannot be divisible by 32. It should be changed as follows:
model.add(LSTM(10, return_sequences=True,batch_input_shape=(None,1,18)))
You should modify test shape before the model evaluate test.
x_test= x_test.reshape(len(x)-x_train_size,1,18)
y_test= y_test.reshape(len(y)-x_train_size,1,1)
score = model.evaluate(x_test, y_test,batch_size=32)
Of course, you have to reshape predicted and y_test before inverse_transform.
predicted = model.predict(x_test)
predicted= predicted.reshape(len(y)-x_train_size,1)
y_test= y_test.reshape(len(y)-x_train_size,1)

ValueError: Error when checking input: expected conv1d_1_input to have 3 dimensions, but got array with shape (3856, 52)

This is the code I used for fault detection. I need to design a CNN for fault detection with non-image dataset and I am unable to do so. Do I need to shape my input into 4D? I am getting the above error. Actually I have different Training and Testing samples. As Training, I have 480*52 and as Testing 960*52, So if I use them both, I am having another error saying target destination has different dimension.
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from keras.models import Sequential,Input,Model
from keras.layers import Dense,Dropout,Flatten
from keras.layers import Conv1D,MaxPooling1D
from keras.layers import LeakyReLU
# importing Dataset
from lib_read import read_data
folderk = 'TE_process/'
train, test = read_data( folderk )
#training datasets
#i have 10 files of datasets each of rows a480 and columns 52
#53 column i have assigned labels manually for training datasets
X_train = train.iloc[:,:-1].values
y_train = train.iloc[:,52].values
#testing datasets
#i have 10 files for testing easch of rows 960 and columns 52.
#53 column i have assigned label using the code from lib_read
#X_test = test.iloc[:,:-1].values
#y_test = test.iloc[:,52].values
# for 4_faults dataset: f1_small, f8_medium, f13_incipient, f17_big + no-faults
#Encoding the trianing datsets
labelencoder_X= LabelEncoder()
X_train[:,0] = labelencoder_X.fit_transform(X_train[:,0])
X_train[:,1] = labelencoder_X.fit_transform(X_train[:,1])
X_train[:,2] = labelencoder_X.fit_transform(X_train[:,2])
X_train[:,3] = labelencoder_X.fit_transform(X_train[:,3])
X_train[:,4] = labelencoder_X.fit_transform(X_train[:,4])
X_train[:,5] = labelencoder_X.fit_transform(X_train[:,5])
X_train[:,6] = labelencoder_X.fit_transform(X_train[:,6])
X_train[:,7] = labelencoder_X.fit_transform(X_train[:,7])
X_train[:,8] = labelencoder_X.fit_transform(X_train[:,8])
X_train[:,9] = labelencoder_X.fit_transform(X_train[:,9])
X_train[:,10] = labelencoder_X.fit_transform(X_train[:,10])
X_train[:,11] = labelencoder_X.fit_transform(X_train[:,11])
X_train[:,12] = labelencoder_X.fit_transform(X_train[:,12])
X_train[:,13] = labelencoder_X.fit_transform(X_train[:,13])
X_train[:,14] = labelencoder_X.fit_transform(X_train[:,14])
X_train[:,15] = labelencoder_X.fit_transform(X_train[:,15])
X_train[:,16] = labelencoder_X.fit_transform(X_train[:,16])
X_train[:,17] = labelencoder_X.fit_transform(X_train[:,17])
X_train[:,18] = labelencoder_X.fit_transform(X_train[:,18])
X_train[:,19] = labelencoder_X.fit_transform(X_train[:,19])
X_train[:,20] = labelencoder_X.fit_transform(X_train[:,20])
X_train[:,21] = labelencoder_X.fit_transform(X_train[:,21])
X_train[:,22] = labelencoder_X.fit_transform(X_train[:,22])
X_train[:,23] = labelencoder_X.fit_transform(X_train[:,23])
X_train[:,24] = labelencoder_X.fit_transform(X_train[:,24])
X_train[:,25] = labelencoder_X.fit_transform(X_train[:,25])
X_train[:,26] = labelencoder_X.fit_transform(X_train[:,26])
X_train[:,27] = labelencoder_X.fit_transform(X_train[:,27])
X_train[:,28] = labelencoder_X.fit_transform(X_train[:,28])
X_train[:,29] = labelencoder_X.fit_transform(X_train[:,29])
X_train[:,30] = labelencoder_X.fit_transform(X_train[:,30])
X_train[:,31] = labelencoder_X.fit_transform(X_train[:,31])
X_train[:,32] = labelencoder_X.fit_transform(X_train[:,32])
X_train[:,33] = labelencoder_X.fit_transform(X_train[:,33])
X_train[:,34] = labelencoder_X.fit_transform(X_train[:,34])
X_train[:,35] = labelencoder_X.fit_transform(X_train[:,35])
X_train[:,36] = labelencoder_X.fit_transform(X_train[:,36])
X_train[:,37] = labelencoder_X.fit_transform(X_train[:,37])
X_train[:,38] = labelencoder_X.fit_transform(X_train[:,38])
X_train[:,39] = labelencoder_X.fit_transform(X_train[:,39])
X_train[:,40] = labelencoder_X.fit_transform(X_train[:,40])
X_train[:,41] = labelencoder_X.fit_transform(X_train[:,41])
X_train[:,42] = labelencoder_X.fit_transform(X_train[:,42])
X_train[:,43] = labelencoder_X.fit_transform(X_train[:,43])
X_train[:,44] = labelencoder_X.fit_transform(X_train[:,44])
X_train[:,45] = labelencoder_X.fit_transform(X_train[:,45])
X_train[:,46] = labelencoder_X.fit_transform(X_train[:,46])
X_train[:,47] = labelencoder_X.fit_transform(X_train[:,47])
X_train[:,48] = labelencoder_X.fit_transform(X_train[:,48])
X_train[:,49] = labelencoder_X.fit_transform(X_train[:,49])
X_train[:,50] = labelencoder_X.fit_transform(X_train[:,50])
X_train[:,51] = labelencoder_X.fit_transform(X_train[:,51])
labelencoder_yt = LabelEncoder()
y_train = labelencoder_yt.fit_transform(y_train)
yt_encoded = OneHotEncoder(categorical_features=[0])
y_train = yt_encoded.fit_transform(y_train.reshape(-1,1)).toarray()
#Spliting the datasets
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size= 0.2, random_state=0)
X_train = ss_X.fit_transform(X_train)
X_test = ss_X.transform(X_test)
#tryning to rshape the datasets from 2D to 4D
import numpy as np
#X_train = X_train.reshape(X_train.shape[0], 1, 20, 52)
#X_test = X_test.reshape(X_test.shape[0], 1, 480, 52)
#X_train = X_train.astype('float32')
#X_test = X_test.astype('float32')
#initializing CNN
fault_classifier = Sequential()
# Adding the input layer
fault_classifier.add(Conv1D(64, kernel_size=(3), activation="relu",input_shape=(3856,52)))
fault_classifier.add(Conv1D(64, kernel_size=(3), activation="relu"))
#fault_classifier.add(Conv2D(128, kernel_size=(3,3), activation="relu",input_shape=(50,20,1)))
#fully connected layer
fault_classifier.add(Dense(300, activation="relu"))
fault_classifier.add(Dense(10, activation='softmax'))
# sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
fault_classifier.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ['accuracy'])
history =, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=10)
# Predicting the Test set results
y_pred = fault_classifier.predict(X_test)
y_pred = (y_pred > 0.5)
pred_acc = accuracy_score(y_test, y_pred)

