here is my code
...
look_back = 20
train_size = int(len(data) * 0.80)
test_size = len(data) - train_size
train = data[0:train_size]
test = data[train_size:len(data)]
x_train, y_train = create_dataset(train, look_back)
x_test, y_test = create_dataset(test, look_back)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
y_train=np.repeat(y_train.reshape(-1,1), 20, axis=1).reshape(-1,20,1)
y_test=np.repeat(y_test.reshape(-1,1), 20, axis=1).reshape(-1,20,1)
...
model = Sequential()
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(1, return_sequences=True))
model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, epochs=10, batch_size=64)
p = model.predict(x_test)
and I want to predict the next value So,
predictions = model.predict(x_train) and shape is (62796, 20, 1)
and I coded the following site how to use the Keras model to forecast for future dates or events?
future = []
currentStep = predictions[-20:, :, :] # -20 is last look_back number
for i in range(10):
currentStep = model.predict(currentStep)
future.append(currentStep)
in this code future's result is
but p = model.predict(x_test)'s [:4000] result is
I want to know how to predict the exact next value.
But, The difference between the two results is very large.
what is the right way to Predict the next value??
I don't know where it went wrong or the code went wrong.
I hope for your opinion.
full source is https://gist.github.com/Lay4U/654f70bd1fb9c4f7d5bdb21ddcb588ab
Related
This is my code below.
I worked with the exact same model and it converged well when I didn't scaled the data.
The loss started somewhere around 1.0 and went down to 0.45.
However, after rewriting my code the model loss is bit weird. It starts from 0.4 and not converging. I'm wondering if I introduced any errors in the code while rewriting my code. Can someone please help me where I did it wrong??
label = np.array(df['label'])
features = np.array(df.iloc[:, 0:8])
X_train, X_rem, y_train, y_rem = train_test_split(features, label, test_size=0.3, random_state=seed)
X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, test_size=0.5, random_state=seed)
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
def DNN():
input_layer = Input(shape=(8,), name='input_layer')
Dense_1 = Dense(64, activation = 'relu', kernel_initializer = HeUniform(seed =1))(input_layer)
Dense_2 = Dense(32, activation = 'relu', kernel_initializer = HeUniform(seed =1))(Dense_1)
output_layer = Dense(1, activation='sigmoid', kernel_initializer = GlorotUniform(seed =1))(Dense_2)
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['acc', 'Recall', 'AUC'])
return model
model = DNN()
my_callbacks = [
EarlyStopping(patience=10),
ReduceLROnPlateau(monitor="val_loss", factor=0.3, patience= 5, cooldown = 3, mode="min", min_lr=0.0001, verbose =1),
ModelCheckpoint(monitor="val_loss", save_best_only = True, filepath = './checkpoint'),
TensorBoard(log_dir='./logs')
]
history = model.fit(X_train, y_train, batch_size=64, epochs=150, validation_data=(X_val, y_val), callbacks=my_callbacks)
This outputs:
Dear all: I'm very new to deep learning. I was trying to add a for loop to test all the possible combinations to get the best result. Currently what I have is the following.
def coeff_determination(y_true, y_pred):
SS_res = K.sum(K.square( y_true-y_pred ))
SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
return ( 1 - SS_res/(SS_tot + K.epsilon()) )
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
x_train = x_train.to_numpy()
x_test = x_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
(37118, 105)
(37118,)
(15908, 105)
(15908,)
timesteps = 3
features = 35 #this is the number of features
x_train = x_train.reshape((x_train.shape[0], timesteps, features))
x_test = x_test.reshape((x_test.shape[0], timesteps, features))
dense_layers=[0, 1, 2]
layer_sizes=[32, 64, 128]
LSTM_layers=[1,2,3]
for dense_layer in dense_layers:
for layer_size in layer_sizes:
for LSTM_layer in LSTM_layers:
NAME="{}-lstm-{}-nodes-{}-dense-{}".format(LSTM_layer, layer_size, dense_layer, int(time.time()))
tensorboard = TensorBoard(log_dir=f"LSTM_logs\\{NAME}")
print(NAME)
model = Sequential()
model.add(LSTM(layer_size, input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True))
for i in range(LSTM_layer-1):
model.add(LSTM(layer_size, input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True))
for i in range(dense_layer):
model.add(Dense(layer_size))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam',metrics=[coeff_determination])
epochs = 10
result = model.fit(x_train, y_train, epochs=epochs, batch_size=72, validation_data=(x_test, y_test), verbose=2, shuffle=False)
However, a got a traceback says the following
ValueError: Error when checking target: expected dense_192 to have 3 dimensions, but got array with shape (37118, 1)
and the error occurs in the following line.
---> 19 result = model.fit(x_train, y_train, epochs=epochs, batch_size=72, validation_data=(x_test, y_test), verbose=2, shuffle=False)
Could anyone please kindly give me some hint regarding how to solve the problem. Thanks a lot for your time and support.
Sincerely
Wilson
Use return_sequence = False for your last LSTM layer so it only returns a vector with the last hidden state.
Sincerely,
Alexander
more details: How to use return_sequences option and TimeDistributed layer in Keras?
I am getting this error for LSTM model.
data has three columns
Sentence (input)
Value (output)
Label (output)
I am using tensorflow2.3.0. I have tried 2.0.0 as suggested but I am landing up with dependency errors.
Please help me resolve this error below in quotes
"ValueError: Data cardinality is ambiguous:
x sizes: 720
y sizes: 89
Please provide data which shares the same first dimension."
### Create sequence
vocab_size = 20000
tokenizer = Tokenizer(num_words= vocab_size)
tokenizer.fit_on_texts(df['Sentence'])
sequences = tokenizer.texts_to_sequences(df['Sentence'])
data = pad_sequences(sequences, maxlen=100)
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])
X = df['Sentence']
y = df[['value','label']]
X_train, y_train, X_test, y_test = train_test_split(X, y, test_size = 0.1)
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)
vocab_size = len(tokenizer.word_index) + 1
maxlen = 200
X_train = pad_sequences(X_train, padding='post', maxlen=maxlen)
X_test = pad_sequences(X_test, padding='post', maxlen=maxlen)
#print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
model = Sequential()
model.add(Embedding(vocab_size, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Flatten())
model.add(Dense(2, activation='sigmoid'))
# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3,batch_size=8, validation_split=0.1)
accr = model.evaluate(X_test, y_test)
print('Test set\n Loss: {:0.3f}\n Accuracy: {:0.3f}'.format(accr[0], accr[1]))
Your data has two outputs (the Value and Label columns). But your model has only one output.
This code works:
X_train = tf.random.uniform([100, 100], 0, 100, dtype=tf.int32)
y_train = tf.random.uniform([100, 2])
model.fit(X_train, y_train, epochs=3,batch_size=8, validation_split=0.1)
Check the shape of y_train. It should be [batch_size, 2].
I have x_train 62796 and x_test 15684 and I want to predict the values after that. I seek your advice to forecast the values after that using LSTM in Keras. Here is my code :
...
look_back = 20
train_size = int(len(data) * 0.80)
test_size = len(data) - train_size
train = data[0:train_size]
test = data[train_size:len(data)]
x_train, y_train = create_dataset(train, look_back)
x_test, y_test = create_dataset(test, look_back)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
y_train=np.repeat(y_train.reshape(-1,1), 20, axis=1).reshape(-1,20,1)
y_test=np.repeat(y_test.reshape(-1,1), 20, axis=1).reshape(-1,20,1)
...
model = Sequential()
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(1, return_sequences=True))
model.compile(loss='mean_squared_error', optimizer='rmsprop', metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, epochs=10, batch_size=64)
p = model.predict(x_test)
So, predictions = model.predict(x_train) and shape is (62796, 20, 1)
I tried this code
future = []
currentStep = predictions[-20:, :, :] # -20 is last look_back number
for i in range(10):
currentStep = model.predict(currentStep)
future.append(currentStep)
In this code future result is :
but p = model.predict(x_test)'s [:4000] result is :
I want to know how to predict the exact next value. But, The difference between the two results is very large. I don't know where it went wrong or the code went wrong. Here is full source.
I am trying to run a LSTM using Keras on my custom features set. I have train and test features in separate files. Each csv file contains 11 columns with last column as class label. There are total 40 classes in my dataset. The problem is I am not able to figure out the correct input_shape to the first layer. I had explored all the stackoverflow and github but still not able to solve this
Below is my complete code.
import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
numpy.random.seed(7)
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=X_train.shape))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)
Whatever I change in input_shape parameter wither I get error in first LSTM layer of in fit method.
you don't have a time dimension in your input.
Input for RNN should be (batch_size, time_step, features) while your input has dimension (batch_size, features).
If you want to use your 10 columns one at a time you should reshape the array with
numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
Try this code:
train_dataset = numpy.loadtxt("train.csv", delimiter=",")
train_dataset = numpy.reshape(train_dataset, (-1, train_dataset.shape[1], 1))
X_train = train_dataset[:, 0:10]
y_train = train_dataset[:, 10]
test_dataset = numpy.loadtxt("test.csv", delimiter=",")
test_dataset = numpy.reshape(test_dataset, (-1, train_dataset.shape[1], 1))
X_test = test_dataset[:, 0:10]
y_test = test_dataset[:, 10]
model = Sequential()
model.add(LSTM(32, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(32, return_sequences=True))
model.add(LSTM(32))
model.add(Dense(1, activation='softmax'))
model.compile(loss='mean_squared_error', optimizer='sgd', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=10, epochs=1)
score, acc = model.evaluate(X_test, y_test, batch_size=10)
print('Test score:', score)
print('Test accuracy:', acc * 100)