Incompatible shapes in output layer - Tensorflow - python

I am trying to build a bi-LSTM model in tensorflow, environment google colab. In the training process, the model have an issue: the last layer says that there is shape incompatibility. I wonder if there is any way to reshape the x_train and y_train, to fix this problem
Traceback
ValueError: Shapes (16, 11) and (16, 10) are incompatible
If I change the value of the neurons units to my output layer, from 11 to 10, it does not give any error and the model can be trained. However, I want the output to be 10 and not 11.
# current output layer (run perfectly)
tf.keras.layers.Dense (11, activation = 'softmax')
# expected output layer (shape incompatibility)
tf.keras.layers.Dense (10, activation = 'softmax')
BiLSTM Model
def build_model(vocab_size, embedding_dim=64, input_length=30):
print('\nbuilding the model...\n')
model = tf.keras.Sequential([
tf.keras.layers.Embedding(input_dim=(vocab_size + 1), output_dim=embedding_dim, input_length=input_length),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(rnn_units,return_sequences=True, dropout=0.2)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(rnn_units,return_sequences=True, dropout=0.2)),
tf.keras.layers.GlobalMaxPool1D(),
tf.keras.layers.Dropout(0.1),
tf.keras.layers.Dense(64, activation='tanh'),
# softmax output layer
tf.keras.layers.Dense(10, activation='softmax')
])
# optimizer & loss
opt = 'RMSprop' #tf.optimizers.Adam(learning_rate=1e-4)
loss = 'categorical_crossentropy'
# Metrics
metrics = ['accuracy', 'AUC','Precision', 'Recall']
# compile model
model.compile(optimizer=opt,
loss=loss,
metrics=metrics)
model.summary()
return model
The BATCH_SIZE is set to 16. And the shapes of y_train and x_train are:
x_train.shape
(800, 30)
y_train.shape
(800,)
Training
def train(model, x_train, y_train, x_validation, y_validation,
epochs, batch_size=32, patience=5,
verbose=2, monitor_es='accuracy', mode_es='auto', restore=True,
monitor_mc='val_accuracy', mode_mc='max'):
print('\ntraining...\n')
# callback
early_stopping = tf.keras.callbacks.EarlyStopping(monitor=monitor_es,
verbose=1, mode=mode_es, restore_best_weights=restore,
min_delta=1e-3, patience=patience)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint('tfjsmode.h5', monitor=monitor_mc, mode=mode_mc,
verbose=1, save_best_only=True)
# Define Tensorboard as a Keras callback
tensorboard = TensorBoard(
log_dir='./logs',
histogram_freq=1,
write_images=True
)
keras_callbacks = [tensorboard, early_stopping, model_checkpoint]
# train model
history = model.fit(x_train, y_train,
batch_size=batch_size, epochs=epochs, verbose=verbose,
validation_data=(x_validation, y_validation),
callbacks=keras_callbacks)
return history
Preprocessing
def preprocess(x, padding_shape=30):
return np.array([ord(i.lower()) - ord('a')+1 if not i.isdigit() and i != ' ' else 0 for i in list(x)] + ([0] * (padding_shape - len(x))), dtype=int)
def prepare_dataset(labeldict : dict, test_size=.3, validation_size=.1):
print('preparing the dataset...\n')
from sklearn import preprocessing
# load dataset
# split dataset (as string into panda.core.series.Serie object)
x, y = load_clean_dataset()
x = np.array(list(map(preprocess, x)))
y = np.array(list(map(lambda x: labeldict[x.replace(' ', '_')], y)))
print(('y: {}').format(y))
# create/split train, validation and test and shuffle the data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size, shuffle=True)
print(x.max(), x.min())
x_train_val, x_validation, y_train_val, y_validation = train_test_split(x_train, y_train, test_size=test_size, shuffle=True)
# pandas.core.series.Series to numpy array
x_train, y_train = np.array(x_train), np.array(y_train)
x_validation, y_validation = np.array(x_validation), np.array(y_validation)
x_test, y_test = np.array(x_test), np.array(y_test)
x_train_val, y_train_val = np.array(x_train_val), np.array(y_train_val)
print(('\nx_train: \n{}\n\ny_train: \n{}').format(x_train_val, y_train_val))
y_train = tf.keras.utils.to_categorical(y, num_classes=10)
return (x_train, y_train), (x_validation, y_validation), (x_test, y_test), (x_train_val, y_train_val)

It seems you currently have labels as integers (i.e. not one-hot encoded vectors). For example your y seems to be like,
[0, 1, 8, 9, ....] # a vector of 800 elements
There's two ways to train a model on such data.
Alternative 1 (easiest I guess)
Use sparse_categorical_crossentropy as the loss function of the model
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=metrics)
Alternative 2
Convert your labels to one-hot encoded using,
y_onehot = tf.keras.utils.to_categorical(y, num_classes=10)
and then keep the loss of the model as categorical_crossentropy

Related

Bitcoin Price Prediction using LSTM

So I wanted to predict the future price of bitcoin using LSTM but as I am new to this domain I'm stuck at what to do in order to print just the price of future lets say I want to predict the price for 3 months ahead from now what should i do in order to just get the closing price for BTC after 3 months from now?
It will be really appreciated if you guys can help me with this problem.
train_data, test_data = train_test_split(df, test_size=test_size)
X_train = extract_window_data(train_data, window_len, zero_base)
X_test = extract_window_data(test_data, window_len, zero_base)
y_train = train_data[target_col][window_len:].values
y_test = test_data[target_col][window_len:].values
if zero_base:
y_train = y_train / train_data[target_col][:-window_len].values - 1
y_test = y_test / test_data[target_col][:-window_len].values - 1
return train_data, test_data, X_train, X_test, y_train, y_test
def build_lstm_model(input_data, output_size, neurons=100, activ_func='linear',
dropout=0.2, loss='mse', optimizer='adam'):
model = Sequential()
model.add(LSTM(neurons, input_shape=(input_data.shape[1], input_data.shape[2])))
model.add(Dropout(dropout))
model.add(Dense(units=output_size))
model.add(Activation(activ_func))
model.compile(loss=loss, optimizer=optimizer)
return model
np.random.seed(42)
window_len = 5
test_size = 0.2
zero_base = True
lstm_neurons = 100
epochs = 200
batch_size = 32
loss = 'mse'
dropout = 0.2
optimizer = 'adam'
train, test, X_train, X_test, y_train, y_test = prepare_data(
hist, target_col, window_len=window_len, zero_base=zero_base, test_size=test_size)
print(X_test)
model = build_lstm_model(
X_train, output_size=1, neurons=lstm_neurons, dropout=dropout, loss=loss,
optimizer=optimizer)
history = model.fit(
X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=batch_size, verbose=1, shuffle=True)
preds = test[target_col].values[:-window_len] * (preds + 1)
preds = pd.Series(index=targets.index, data=preds)
print(test)
line_plot(targets, preds, 'actual', 'prediction', lw=3)

DNN model suddenly not converging (having low loss and not changing much)

This is my code below.
I worked with the exact same model and it converged well when I didn't scaled the data.
The loss started somewhere around 1.0 and went down to 0.45.
However, after rewriting my code the model loss is bit weird. It starts from 0.4 and not converging. I'm wondering if I introduced any errors in the code while rewriting my code. Can someone please help me where I did it wrong??
label = np.array(df['label'])
features = np.array(df.iloc[:, 0:8])
X_train, X_rem, y_train, y_rem = train_test_split(features, label, test_size=0.3, random_state=seed)
X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, test_size=0.5, random_state=seed)
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
def DNN():
input_layer = Input(shape=(8,), name='input_layer')
Dense_1 = Dense(64, activation = 'relu', kernel_initializer = HeUniform(seed =1))(input_layer)
Dense_2 = Dense(32, activation = 'relu', kernel_initializer = HeUniform(seed =1))(Dense_1)
output_layer = Dense(1, activation='sigmoid', kernel_initializer = GlorotUniform(seed =1))(Dense_2)
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['acc', 'Recall', 'AUC'])
return model
model = DNN()
my_callbacks = [
EarlyStopping(patience=10),
ReduceLROnPlateau(monitor="val_loss", factor=0.3, patience= 5, cooldown = 3, mode="min", min_lr=0.0001, verbose =1),
ModelCheckpoint(monitor="val_loss", save_best_only = True, filepath = './checkpoint'),
TensorBoard(log_dir='./logs')
]
history = model.fit(X_train, y_train, batch_size=64, epochs=150, validation_data=(X_val, y_val), callbacks=my_callbacks)
This outputs:

Why does passing a list of metrics when compiling Tensorflow models result in an InvalidArgumentError?

I'm learning Tensorflow and I'm trying to pass a list of metrics to be evaluated when I compile classification models in a loop, like the example in the documentation.
However, when I pass a list like:
METRICS = [
keras.metrics.AUC(name='auc'),
keras.metrics.AUC(name='prc', curve='PR')
]
def compile_model(model, X_train, y_train, X_val=None, y_val=None, callbacks=None, batch_size=1000, epochs = 10, optimizer='adam', loss_func=keras.losses.BinaryCrossentropy(), metrics=['accuracy']):
# Copy the model
mdl = model
mdl.compile(optimizer = optimizer, loss = loss_func, metrics = metrics)
# Fit model
if (X_val is not None) and (y_val is not None):
mdl.fit(X_train, y_train, epochs = epochs, batch_size=batch_size, callbacks=callbacks,
validation_data = (X_val, y_val))
else:
mdl.fit(X_train, y_train, epochs = epochs, batch_size=batch_size, callbacks=callbacks)
return mdl
# Compile model
model = compile_model(model, X_train, y_train, X_test, y_test, callbacks=callbacks, batch_size=1000, epochs = 10, metrics=METRICS)
I get the error:
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'metrics/tp/AssignAddVariableOp/resource_1' with dtype resource
[[node metrics/tp/AssignAddVariableOp/resource_1 (defined at /opt/anaconda3/envs/tf/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]] [Op:__inference_keras_scratch_graph_6163]
If I try to compile the model with a list of metrics like
model = compile_model(model, X_train, y_train, X_test, y_test, callbacks=callbacks, batch_size=1000, \
epochs = 10, metrics=[keras.metrics.AUC(name='auc'), keras.metrics.AUC(name='prc', curve='PR')] )
the models are able to compile without any issues. None of the related questions I've seen about these placeholder tensors involve Keras' metrics. Why does the error only occur when I try to pass the metrics that way?

Error when checking target: expected dense_192 to have 3 dimensions, but got array with shape (37118, 1)

Dear all: I'm very new to deep learning. I was trying to add a for loop to test all the possible combinations to get the best result. Currently what I have is the following.
def coeff_determination(y_true, y_pred):
SS_res = K.sum(K.square( y_true-y_pred ))
SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
return ( 1 - SS_res/(SS_tot + K.epsilon()) )
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
x_train = x_train.to_numpy()
x_test = x_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
(37118, 105)
(37118,)
(15908, 105)
(15908,)
timesteps = 3
features = 35 #this is the number of features
x_train = x_train.reshape((x_train.shape[0], timesteps, features))
x_test = x_test.reshape((x_test.shape[0], timesteps, features))
dense_layers=[0, 1, 2]
layer_sizes=[32, 64, 128]
LSTM_layers=[1,2,3]
for dense_layer in dense_layers:
for layer_size in layer_sizes:
for LSTM_layer in LSTM_layers:
NAME="{}-lstm-{}-nodes-{}-dense-{}".format(LSTM_layer, layer_size, dense_layer, int(time.time()))
tensorboard = TensorBoard(log_dir=f"LSTM_logs\\{NAME}")
print(NAME)
model = Sequential()
model.add(LSTM(layer_size, input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True))
for i in range(LSTM_layer-1):
model.add(LSTM(layer_size, input_shape=(x_train.shape[1], x_train.shape[2]), return_sequences=True))
for i in range(dense_layer):
model.add(Dense(layer_size))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam',metrics=[coeff_determination])
epochs = 10
result = model.fit(x_train, y_train, epochs=epochs, batch_size=72, validation_data=(x_test, y_test), verbose=2, shuffle=False)
However, a got a traceback says the following
ValueError: Error when checking target: expected dense_192 to have 3 dimensions, but got array with shape (37118, 1)
and the error occurs in the following line.
---> 19 result = model.fit(x_train, y_train, epochs=epochs, batch_size=72, validation_data=(x_test, y_test), verbose=2, shuffle=False)
Could anyone please kindly give me some hint regarding how to solve the problem. Thanks a lot for your time and support.
Sincerely
Wilson
Use return_sequence = False for your last LSTM layer so it only returns a vector with the last hidden state.
Sincerely,
Alexander
more details: How to use return_sequences option and TimeDistributed layer in Keras?

Unable to train LSTM model (ValueError: Data cardinality is ambiguous:)

I am getting this error for LSTM model.
data has three columns
Sentence (input)
Value (output)
Label (output)
I am using tensorflow2.3.0. I have tried 2.0.0 as suggested but I am landing up with dependency errors.
Please help me resolve this error below in quotes
"ValueError: Data cardinality is ambiguous:
x sizes: 720
y sizes: 89
Please provide data which shares the same first dimension."
### Create sequence
vocab_size = 20000
tokenizer = Tokenizer(num_words= vocab_size)
tokenizer.fit_on_texts(df['Sentence'])
sequences = tokenizer.texts_to_sequences(df['Sentence'])
data = pad_sequences(sequences, maxlen=100)
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])
X = df['Sentence']
y = df[['value','label']]
X_train, y_train, X_test, y_test = train_test_split(X, y, test_size = 0.1)
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)
X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)
vocab_size = len(tokenizer.word_index) + 1
maxlen = 200
X_train = pad_sequences(X_train, padding='post', maxlen=maxlen)
X_test = pad_sequences(X_test, padding='post', maxlen=maxlen)
#print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
model = Sequential()
model.add(Embedding(vocab_size, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Flatten())
model.add(Dense(2, activation='sigmoid'))
# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
print(model.summary())
model.fit(X_train, y_train, epochs=3,batch_size=8, validation_split=0.1)
accr = model.evaluate(X_test, y_test)
print('Test set\n Loss: {:0.3f}\n Accuracy: {:0.3f}'.format(accr[0], accr[1]))
Your data has two outputs (the Value and Label columns). But your model has only one output.
This code works:
X_train = tf.random.uniform([100, 100], 0, 100, dtype=tf.int32)
y_train = tf.random.uniform([100, 2])
model.fit(X_train, y_train, epochs=3,batch_size=8, validation_split=0.1)
Check the shape of y_train. It should be [batch_size, 2].

Categories

Resources