I am building a 5-layer autoencoder with Keras. I made the model that maps from input to output which was fine. I built another model that maps from input to the latent coded vector which worked fine. However, I then tried to make a decoding model that maps from the latent coded vector to the output which did not work.
I know that first I should be making an input layer for the decoded model that makes that shape but I can't figure out how to get my coded layers data as an input for the decoded model and to let it map from the coded vector to the final layer.
from keras.layers import Input, Dense
from keras.models import Model
from keras.datasets import mnist
import numpy as np
(x_train, _), (x_test, _) = mnist.load_data()
# Prepare data and normalize
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape(len(x_train), -1)
x_test = x_test.reshape(len(x_test), -1)
input_size = 784
hidden_size = 128
coded_size = 64
x = Input(shape=(input_size,))
hidden_1 = Dense(hidden_size, activation='relu')(x)
coded =Dense(coded_size, activation='relu')(hidden_1)
hidden_2 = Dense(hidden_size, activation='relu')(coded)
r = Dense(input_size, activation='sigmoid')(hidden_2)
autoencoder = Model(inputs=x, outputs=r)
encoder = Model(inputs=x, outputs=coded)
decoder_input = Input(shape=(coded_size,)) # should do this, but don't know how to connect it below
decoder = Model(inputs=coded, output=r)
You can do it like this:
decoder_input = Input(shape=(coded_size,))
next_input = decoder_input
# get the decoder layers and apply them consecutively
for layer in autoencoder.layers[-2:]:
next_input = layer(next_input)
decoder = Model(inputs=decoder_input, outputs=next_input)
As a side note, there is no h in your model. I think it must be replaced by coded.
Related
i'm a deep learning enthusiasts, and i want finetune the keras.application deep model to train a new model, but something wrong. And the code is :
import keras.applications.resnet_v2
from keras.applications.resnet_v2 import ResNet50V2
from keras.applications.resnet_v2 import preprocess_input, decode_predictions
import numpy as np
import keras
import tensorflow as tf
from keras.layers import Dense, Input
from keras.models import Model
import os
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()#train_szie :(60000, 28, 28)
print(x_train.shape)#(60000, 28, 28)
print(y_train.shape)#(60000,)
x_train = [cv2.cvtColor(cv2.resize(img,(28,28)),cv2.COLOR_GRAY2BGR) for img in x_train]
x_train = np.asarray(x_train)
x_test = [cv2.cvtColor(cv2.resize(img,(28,28)),cv2.COLOR_GRAY2BGR) for img in x_test]
x_test = np.asarray(x_test)
x_train, x_test = x_train / 255.0, x_test / 255.0
base_model = ResNet50V2( weights='imagenet', include_top = False)
x = base_model.get_layer("conv2_block1_preact_bn").output
x = Dense(1024, activation='relu')(x)
predictions = Dense(200, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=1)
when i run it, the code is error,named
"res = tf.nn.sparse_softmax_cross_entropy_with_logits(
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
logits and labels must have the same first dimension, got logits shape [1568,10] and labels shape [32]
[[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_4157]"
but when i write the code, is now running fine.
x = base_model.output
I don't know why,i need your help, thanks
Quite a few changes required in your code and has multiple issues.
You're missing a Flatten layer in the middle. Because of that, when computing loss (with a 1D vector of labels), tensorflow squash all of the dimensions except the last together giving [7 x 7 x32 = 1568, 200] sized set of logits. Here 7x7 is the output width and height for a 28x28 sized input by your last conv layer. 32 is default batch size when using model.fit(). This is not compatible with the labels which would simply be a 32 item 1d vector in one iteration.
base_model = ResNet50V2(weights='imagenet', include_top = False)
# Input the way you've defined has undefined width and heigh
# dimension. This is an important requirement for the Flatten layer downstream
input = Input(shape=(28,28,3))
x = input
# Getting only the layers we care about
for layer in base_model.layers:
x = layer(x)
if layer.name == "conv2_block1_preact_bn":
break
# Before feeding data into the dense layer, you need to flatten
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(200, activation='softmax')(x)
model = Model(inputs=input, outputs=predictions)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
One concern I have is that mnist has 10 classes, so your last layer should only need 10 not 200
I am used to working in PyTorch but now have to learn Tensorflow for my job. I am trying to get up to speed by creating a simple dense network and training it on the MNIST dataset, but I cannot get it to train. My super simple code:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
# Load mnist data from keras
(train_data, train_label), (test_data, test_label) = tf.keras.datasets.mnist.load_data(path="mnist.npz")
train_label, test_label = to_categorical(train_label), to_categorical(test_label)
train_data, train_label, test_data, test_label = Flatten()(train_data), Flatten()(train_label), Flatten()(test_data), Flatten()(test_label)
# Create generic SGD optimizer (no learning schedule)
optimizer = SGD(learning_rate = 0.01)
# Define function to build and compile model
def build_mnist_model(input_shape, batch_size = 30):
input_img = Input(shape = input_shape, batch_size = batch_size)
# Pass through dense layer
x = Dense(200, activation = 'relu', use_bias = True)(input_img)
x = Dense(400, activation = 'relu', use_bias = True)(x)
scores = Dense(10, activation = 'softmax', use_bias = True)(x)
# Create and compile tf model
mnist_model = Model(input_img, scores)
mnist_model.compile(optimizer = optimizer, loss = 'categorical_crossentropy')
return mnist_model
# Build the model
mnist_model = build_mnist_model(train_data[0].shape)
# Train the model
mnist_model.fit(
x = train_data,
y = train_label,
batch_size = 30,
epochs = 20,
verbose = 2,
shuffle = True,
# steps_per_epoch = 200
)
When I run this I get
ValueError: When using data tensors as input to a model, you should specify the `steps_per_epoch` argument.
This does not really make sense to me because my train_data and train_label are just regular tensors and per the Tensorflow documentation in this case it should default to the number of samples in the dataset divided by the batch size (which would be 200 in my case).
At any rate, I tried specifying steps_per_epoch = 200 when I call mnist_model.fit() but then I get a different error:
InvalidArgumentError: Incompatible shapes: [60000,10] vs. [30,1]
[[{{node training_4/SGD/gradients/gradients/loss_5/dense_17_loss/softmax_cross_entropy_with_logits_grad/mul}}]]
I can't seem to discern where a size mismatch would come from. In PyTorch, I am used to manually creating batches (by subindexing my data and label tensors) but in Tensorflow this seems to happen automatically. As such, this leaves me quite confused about what batch has the wrong size, how it got the wrong size, etc. I hope this simple model is way easier than I am making it and I just do not know the Tensorflow tricks yet.
Thanks for the help.
This is my simple reproducible code:
from keras.callbacks import ModelCheckpoint
from keras.models import Model
from keras.models import load_model
import keras
import numpy as np
SEQUENCE_LEN = 45
LATENT_SIZE = 20
VOCAB_SIZE = 100
inputs = keras.layers.Input(shape=(SEQUENCE_LEN, VOCAB_SIZE), name="input")
encoded = keras.layers.Bidirectional(keras.layers.LSTM(LATENT_SIZE), merge_mode="sum", name="encoder_lstm")(inputs)
decoded = keras.layers.RepeatVector(SEQUENCE_LEN, name="repeater")(encoded)
decoded = keras.layers.Bidirectional(keras.layers.LSTM(VOCAB_SIZE, return_sequences=True), merge_mode="sum", name="decoder_lstm")(decoded)
autoencoder = keras.models.Model(inputs, decoded)
autoencoder.compile(optimizer="sgd", loss='mse')
autoencoder.summary()
x = np.random.randint(0, 90, size=(10, SEQUENCE_LEN,VOCAB_SIZE))
y = np.random.normal(size=(10, SEQUENCE_LEN, VOCAB_SIZE))
NUM_EPOCHS = 1
checkpoint = ModelCheckpoint(filepath='checkpoint/{epoch}.hdf5')
history = autoencoder.fit(x, y, epochs=NUM_EPOCHS,callbacks=[checkpoint])
and here is my code to have a look at the weights in the encoder layer:
for epoch in range(1, NUM_EPOCHS + 1):
file_name = "checkpoint/" + str(epoch) + ".hdf5"
lstm_autoencoder = load_model(file_name)
encoder = Model(lstm_autoencoder.input, lstm_autoencoder.get_layer('encoder_lstm').output)
print(encoder.output_shape[1])
weights = encoder.get_weights()[0]
print(weights.shape)
for idx in range(encoder.output_shape[1]):
token_idx = np.argsort(weights[:, idx])[::-1]
here print(encoder.output_shape) is (None,20) and print(weights.shape) is (100, 80).
I understand that get_weight will print the weight transition after the layer.
The part I did not get based on this architecture is 80. what is it?
And, are the weights here the weight that connect the encoder layer to the decoder? I meant the connection between encoder and the decoder.
I had a look at this question here. as it is only simple dense layers I could not connect the concept to the seq2seq model.
Update1
What is the difference between:
encoder.get_weights()[0] and encoder.get_weights()[1]?
the first one is (100,80) and the second one is (20,80) like conceptually?
any help is appreciated:)
The encoder as you have defined it is a model, and it consists of two layers: an input layer and the 'encoder_lstm' layer which is the bidirectional LSTM layer in the autoencoder. So its output shape would be the output shape of 'encoder_lstm' layer which is (None, 20) (because you have set LATENT_SIZE = 20 and merge_mode="sum"). So the output shape is correct and clear.
However, since encoder is a model, when you run encoder.get_weights() it would return the weights of all the layers in the model as a list. The bidirectional LSTM consists of two separate LSTM layers. Each of those LSTM layers has 3 weights: the kernel, the recurrent kernel and the biases. So encoder.get_weights() would return a list of 6 arrays, 3 for each of the LSTM layers. The first element of this list, as you have stored in weights and is subject of your question, is the kernel of one of the LSTM layers. The kernel of an LSTM layer has a shape of (input_dim, 4 * lstm_units). The input dimension of 'encoder_lstm' layer is VOCAB_SIZE and its number of units is LATENT_SIZE. Therefore, we have (VOCAB_SIZE, 4 * LATENT_SIZE) = (100, 80) as the shape of kernel.
I am trying to build an LSTM Autoencoder to predict Time Series data. Since I am new to Python I have mistakes in the decoding part. I tried to build it up like here and Keras. I could not understand the difference between the given examples at all. The code that I have right now looks like:
Question 1: is how to choose the batch_size and input_dimension when each sample has 2000 values?
Question 2: How to get this LSTM Autoencoder working (the model and the prediction) ? This ist just the model, but how to predict? That it is predicting from the lets say starting from sample 10 on till the end of the data?
Mydata has in total 1500 samples, I would go with 10 time steps (or more if better), and each sample has 2000 Values. If you need more information I would include them as well later.
trainX = np.reshape(data, (1500, 10,2000))
from keras.layers import *
from keras.models import Model
from keras.layers import Input, LSTM, RepeatVector
parameter
timesteps=10
input_dim=2000
units=100 #choosen unit number randomly
batch_size=2000
epochs=20
Model
inpE = Input((timesteps,input_dim))
outE = LSTM(units = units, return_sequences=False)(inpE)
encoder = Model(inpE,outE)
inpD = RepeatVector(timesteps)(outE)
outD1 = LSTM(input_dim, return_sequences=True)(outD
decoder = Model(inpD,outD)
autoencoder = Model(inpE, outD)
autoencoder.compile(loss='mean_squared_error',
optimizer='rmsprop',
metrics=['accuracy'])
autoencoder.fit(trainX, trainX,
batch_size=batch_size,
epochs=epochs)
encoderPredictions = encoder.predict(trainX)
The LSTM model that I use is this one:
def get_model(n_dimensions):
inputs = Input(shape=(timesteps, input_dim))
encoded = LSTM(n_dimensions, return_sequences=False, name="encoder")(inputs)
decoded = RepeatVector(timesteps)(encoded)
decoded = LSTM(input_dim, return_sequences=True, name='decoder')(decoded)
autoencoder = Model(inputs, decoded)
encoder = Model(inputs, encoded)
return autoencoder, encoder
autoencoder, encoder = get_model(n_dimensions)
autoencoder.compile(optimizer='rmsprop', loss='mse',
metrics=['acc', 'cosine_proximity'])
history = autoencoder.fit(x, x, batch_size=100, epochs=100)
encoded = encoder.predict(x)
It works with the data that have, x is of size (3000, 180, 40), that is 3000 samples, timesteps=180 and input_dim=40.
I'm using this tutorial about autoencoders: https://blog.keras.io/building-autoencoders-in-keras.html
All the code is working, however the performance is very bad (the results are blurred) when I set 10e-5 for the regularization parameter, which is the parameter defined in the tutorial code. In fact, I need to decrease the regularization to 10e-8 to have a correct output.
My question is as follows: Why the result is so different from the tutorial? Data is the same and parameters are the same, I didn't expect a large difference.
I suspect that the default behavior of the Keras functions has been changed (automatic batch normalization performed in all cases?) from May 14th, 2016.
Outputs
With 10e-5 regularization (blurred); val_loss of 0.2967 after 50 epochs and 0.2774 after 100 epochs.
With 10e-8 regularization: val_loss of 0.1080 after 50 epochs and 0.1009 after 100 epochs.
With no regularization: val_loss of 0.1018 after 50 epochs and 0.0944 after 100 epochs.
Complete code (for reference)
# Source: https://blog.keras.io/building-autoencoders-in-keras.html
import numpy as np
np.random.seed(2713)
from keras.layers import Input, Dense
from keras.models import Model
from keras import regularizers
encoding_dim = 32
input_img = Input(shape=(784,))
# add a Dense layer with a L1 activity regularizer
encoded = Dense(encoding_dim, activation='relu',
activity_regularizer=regularizers.l1(10e-5))(input_img)
decoded = Dense(784, activation='sigmoid')(encoded)
autoencoder = Model(input_img, decoded)
# this model maps an input to its encoded representation
encoder = Model(input_img, encoded)
# create a placeholder for an encoded (32-dimensional) input
encoded_input = Input(shape=(encoding_dim,))
# retrieve the last layer of the autoencoder model
decoder_layer = autoencoder.layers[-1]
# create the decoder model
decoder = Model(encoded_input, decoder_layer(encoded_input))
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
from keras.datasets import mnist
(x_train, _), (x_test, _) = mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
print(x_train.shape)
print(x_test.shape)
autoencoder.fit(x_train, x_train,
epochs=100,
batch_size=256,
shuffle=True,
validation_data=(x_test, x_test))
# encode and decode some digits
# note that we take them from the *test* set
encoded_imgs = encoder.predict(x_test)
decoded_imgs = decoder.predict(encoded_imgs)
# use Matplotlib (don't ask)
import matplotlib.pyplot as plt
n = 10 # how many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
# display original
ax = plt.subplot(2, n, i + 1)
plt.imshow(x_test[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
# display reconstruction
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
I have the same issue. And it is on GitHub here https://github.com/keras-team/keras/issues/5414
It seems like you were correct in just changing the constant.