I'm currently playing around with a basic LSTM-based Autoencoder using the Tensorflow library. The goal is for the Autoencoder to reconstruct multivariate time-series. I'm interested in moving the feature-wise normalization of the data from the data pipeline to inside the model.
Currently I normalize data the following way:
normalizer = Normalization(axis=-1)
normalizer.adapt(data_train)
data_train = normalizer(data_train)
inputs = Input(shape=[None, n_inputs])
x = LSTM(4, return_sequences=True)(inputs)
x = LSTM(2, return_sequences=True)(x)
x = LSTM(2, return_sequences=True)(x)
x = LSTM(4, return_sequences=True)(x)
x = TimeDistributed((Dense(n_inputs)))(x)
model = Model(inputs, x)
Which works as intended, leads to respectable loss (~1e-2) but is outside the model.
According to the documentation (under "Preprocessing data before the model or inside the model"), the following code should be equivalent to the above snippet, except that it runs inside the model:
normalizer = Normalization(axis=-1)
normalizer.adapt(data_train)
inputs = Input(shape=[None, n_inputs])
x = normalizer(inputs)
x = LSTM(4, return_sequences=True)(x)
x = LSTM(2, return_sequences=True)(x)
x = LSTM(2, return_sequences=True)(x)
x = LSTM(4, return_sequences=True)(x)
x = TimeDistributed((Dense(n_inputs)))(x)
model = Model(inputs, x)
However, running the latter variant leads to astronomical loss values (~1e3) and also worse results in testing. Hence my question is: what am I doing wrong? Could it be that I'm misunderstanding the documentation?
Any advice greatly appreciated!
The two methods seem to give consistent results as long as the normalizer is applied only to the inputs (i.e. to the feature matrix) when used outside the model:
import numpy as np
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, LSTM, TimeDistributed
from tensorflow.keras.models import Model
from tensorflow.keras.layers.experimental.preprocessing import Normalization
np.random.seed(42)
# define the input parameters
num_samples = 100
time_steps = 10
train_size = 0.8
# generate the data
X = np.random.normal(loc=10, scale=5, size=(num_samples, time_steps, 1))
y = np.mean(X, axis=1) + np.random.normal(loc=0, scale=1, size=(num_samples, 1))
# split the data
X_train, X_test = X[:np.int(train_size * X.shape[0]), :], X[np.int(train_size * X.shape[0]):, :]
y_train, y_test = y[:np.int(train_size * y.shape[0]), :], y[np.int(train_size * y.shape[0]):, :]
# normalize the inputs inside the model
normalizer = Normalization()
normalizer.adapt(X_train)
inputs = Input(shape=[None, 1])
x = normalizer(inputs)
x = LSTM(4, return_sequences=True)(x)
x = LSTM(2, return_sequences=True)(x)
x = LSTM(2, return_sequences=True)(x)
x = LSTM(4, return_sequences=True)(x)
x = TimeDistributed((Dense(1)))(x)
model = Model(inputs, x)
model.compile(loss='mae', optimizer='adam')
model.fit(X_train, y_train, batch_size=32, epochs=10, verbose=0)
print(model.evaluate(X_test, y_test))
# 10.704551696777344
# normalize the inputs outside the model
normalizer = Normalization()
normalizer.adapt(X_train)
X_train_normalized = normalizer(X_train)
X_test_normalized = normalizer(X_test)
inputs = Input(shape=[None, 1])
x = LSTM(4, return_sequences=True)(inputs)
x = LSTM(2, return_sequences=True)(x)
x = LSTM(2, return_sequences=True)(x)
x = LSTM(4, return_sequences=True)(x)
x = TimeDistributed((Dense(1)))(x)
model = Model(inputs, x)
model.compile(loss='mae', optimizer='adam')
model.fit(X_train_normalized, y_train, batch_size=32, epochs=10, verbose=0)
print(model.evaluate(X_test_normalized, y_test))
# 10.748750686645508
Related
I wanna use predictions in real time, but I want to test that the results are the same as in the case of giving all the data as input only once. This is the code:
import numpy as np
from keras.layers import Input, Dense, SimpleRNN
from keras.models import Model
#MODEL 1, for training
x = Input(shape=(None, 1))
h = SimpleRNN(30, return_sequences=True)(x)
out = Dense(1)(h)
model = Model(x, out)
model.compile(loss='mse', optimizer='adam')
X_train = np.random.rand(100, 50, 1)
y_train = np.random.rand(100, 50, 1)
model.fit(X_train, y_train, verbose = False)
#MODEL 1, for predictions in real time
x = Input(batch_shape=(1, None, 1))
h = SimpleRNN(30, stateful=True, return_sequences=True)(x)
out = Dense(1)(h)
predict_model = Model(x, out)
predict_model.set_weights(model.get_weights())
X = np.random.rand(2, 2, 1)
predictions = model.predict(X, verbose = False)
for sim in range(len(predictions)):
for i in range(len(predictions[0])):
pred = predict_model.predict(X[sim:(sim+1), i:(i + 1), :], verbose = False)
print(pred[0][0]) #Predictions in real time
print(predictions[sim][i]) #Predictions with MODEL 1
print()
predict_model.reset_states()
It prints this:
[0.09156141]
[0.09156139]
[-0.38076958]
[-0.38076955]
[0.12214336]
[0.12214339]
[-0.52013564]
[-0.5201356]
The results must be exactly the same because both have the same weights. What is happening?
This is my first time creating a multi-input keras model, and I am not sure if I am doing something wrong as my training accuracy is not rising above 50 percent. If I use a single-input model, I can get to 90 percent plus pretty easily. This model uses the same input but different sizes of the input and then takes the max of the softmax prediction as the final prediction. So, I would assume it would be easier to train the model.
Here is the code:
def full_model_2(model_key):
base_model, preprocess = basemodel(model_key) #base model is just an imported imagenet model
base_model2, preprocess = basemodel(model_key)
for layer in base_model.layers:
layer.name = layer.name + "dup"
#first model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(2048, activation='relu')(x)
x = BatchNormalization()(x)
x = Dense(512, activation='relu')(x)
x = BatchNormalization()(x)
predictions_x = Dense(classes, activation='softmax')(x)
#second model
y = base_model2.output
y = GlobalAveragePooling2D()(y)
y = Dense(2048, activation='relu')(y)
y = BatchNormalization()(y)
y = Dense(512, activation='relu')(y)
y = BatchNormalization()(y)
predictions_y = Dense(classes, activation='softmax')(y)
#predictions = Average()([predictions_x, predictions_y])
predictions = Maximum()([predictions_x, predictions_y])
model = Model(inputs= [base_model.input,base_model2.input], outputs=predictions)
for layer in model.layers[:-10]:
layer.trainable = False
return model, preprocess
My generator looks something like this (take two generators and creates two inputs)
def train_generator():
train_gen, _ = create_generators(batch_size, preprocess, img_size)
train_gen2, _ = create_generators(batch_size, preprocess, img_size2)
print(train_gen.seed)
print(train_gen2.seed)
while True:
X1i = train_gen.next()
X2i = train_gen2.next()
yield [X1i[0], X2i[0]], X1i[1]
Here is the original generator function:
train_datagen = ImageDataGenerator(preprocessing_function = preprocess)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
shuffle = True,
seed = seed,
target_size = (img_size, img_size),
batch_size = batch_size,
class_mode = 'categorical')
return train_generator
model.compile(optimizer= r_optimizer,
loss='categorical_crossentropy',
metrics = ['accuracy'])
I have plotted the inputs from the generators, and I am indeed getting the same images but different sizes, so the input from generators seems to be okay.
I am thinking my model setup is off.
Thank you for your suggestions.
I am attempting a CNN for classification of biological data (EEG data). However, after importing and splitting the data into train/test/dev sets and building the CNN, I cannot get the input array shape to match the expected array shape.
Note: Data contains 5 trials/samples for each participant(ID's) in the study, so GSS was used to ensure data from each participant was not mixed train and test sets.
Code and error as follows:
#Load Data
def load_all_data(filename):
import numpy as np
a = np.load(filename)
d = (dict(zip(("data1{}".format(k) for k in a), (a[k] for k in a))))
return d
filename = ("dataname.npz")
X = load_all_data(filename)['array_0']
y = load_all_data(filename)['array_1']
IDs = load_all_data(filename)['array_2']
#Split Test Data with Groupshuffle Split
from sklearn.model_selection import GroupShuffleSplit
import numpy as np
test_size = 0.2
gss = GroupShuffleSplit(n_splits = 1, test_size = 0.2)
for train,test in gss.split(X, y, IDs):
X_train = X[train]
y_train = y[train]
IDs_train = IDs[train]
X_test = X[test]
y_test = y[test]
IDs_test = IDs[test]
fileoutname = 'train_test_data'
np.savez(fileoutname,X_train, y_train, X_test, y_test,IDs_train,IDs_test)
#Split Train, Test Data
gss = GroupShuffleSplit(1, test_size)
for train,test in gss.split(X, y, IDs):
X_train2 = X[train]
y_train2 = y[train]
IDs_train = IDs[train]
X_dev = X[test]
y_dev = y[test]
IDs_test = IDs[test]
#Add dimension to X and Convert y to Categorical
X_train2 = np.expand_dims(X_train2,axis=0)
y_train2 = keras.utils.to_categorical(y_train2,num_classes=2)
X_dev = np.expand_dims(X_test,axis=0)
y_dev = keras.utils.to_categorical(y_test,num_classes=2)
X = np.expand_dims(X,axis=0)
#Build the CNN
def simpleCNN(self, units = 10):
import keras
from keras.layers import Dense
from keras.layers import Conv2D
from keras.layers import Flatten
from keras.models import Model, Input
inp = Input(shape = self.shape[1:], name='inp')
#layer 1
x = Conv2D(units, kernel_size=(1,1), strides = (1,1), activation='relu', data_format='channels_last')(inp)
#layer 2
x = Conv2D(units, kernel_size=(2,2), strides = (1,1), activation='relu', data_format='channels_last')(x)
#layer 3
x = Flatten()(x)
#layer4
out = Dense(2, activation='softmax',name='out')(x)
model = Model(inputs = inp, outputs = out)
return model
#Fit the Data
model = simpleCNN(X)
from keras.optimizers import Adamax
adamax = Adamax(lr=3e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0); #3e-4; 2e-3 is a default.
model.compile(optimizer=adamax, loss='categorical_crossentropy', metrics=['acc'])
model.fit(X_train2, y_train2, epochs=20, batch_size=32, verbose = 1, validation_data = (X_dev, y_dev))
ValueError: Error when checking input: expected inp to have shape (11459, 26, 60) but got array with shape (9065, 26, 60)
After spending days failing to use neural network for Q learning, I decided to go back to the basics and do a simple function approximation to see if everything was working correctly and see how some parameters affected the learning process.
Here is the code that I came up with
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import random
import numpy
from sklearn.preprocessing import MinMaxScaler
regressor = Sequential()
regressor.add(Dense(units=20, activation='sigmoid', kernel_initializer='uniform', input_dim=1))
regressor.add(Dense(units=20, activation='sigmoid', kernel_initializer='uniform'))
regressor.add(Dense(units=20, activation='sigmoid', kernel_initializer='uniform'))
regressor.add(Dense(units=1))
regressor.compile(loss='mean_squared_error', optimizer='sgd')
#regressor = ExtraTreesRegressor()
N = 5000
X = numpy.empty((N,))
Y = numpy.empty((N,))
for i in range(N):
X[i] = random.uniform(-10, 10)
X = numpy.sort(X).reshape(-1, 1)
for i in range(N):
Y[i] = numpy.sin(X[i])
Y = Y.reshape(-1, 1)
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()
X = X_scaler.fit_transform(X)
Y = Y_scaler.fit_transform(Y)
regressor.fit(X, Y, epochs=2, verbose=1, batch_size=32)
#regressor.fit(X, Y.reshape(5000,))
x = numpy.mgrid[-10:10:100*1j]
x = x.reshape(-1, 1)
y = numpy.mgrid[-10:10:100*1j]
y = y.reshape(-1, 1)
x = X_scaler.fit_transform(x)
for i in range(len(x)):
y[i] = regressor.predict(numpy.array([x[i]]))
plt.figure()
plt.plot(X_scaler.inverse_transform(x), Y_scaler.inverse_transform(y))
plt.plot(X_scaler.inverse_transform(X), Y_scaler.inverse_transform(Y))
The problem is that all my predictions are around 0 in value. As you can see I used an ExtraTreesRegressor from sklearn (commented lines) to check that the protocol is actually correct. So what is wrong with my neural network ? Why is it not working ?
(The actual problem that I'm trying to solve is to compute the Q function for the mountain car problem using neural network. How is it different from this function approximator ?)
With these changes:
Activations to relu
Remove kernel_initializer (i.e. leave the default 'glorot_uniform')
Adam optimizer
100 epochs
i.e.
regressor = Sequential()
regressor.add(Dense(units=20, activation='relu', input_dim=1))
regressor.add(Dense(units=20, activation='relu'))
regressor.add(Dense(units=20, activation='relu'))
regressor.add(Dense(units=1))
regressor.compile(loss='mean_squared_error', optimizer='adam')
regressor.fit(X, Y, epochs=100, verbose=1, batch_size=32)
and the rest of your code unchanged, here is the result:
Tinker, again and again...
A more concise version of your code that works:
def data_gen():
while True:
x = (np.random.random([1024])-0.5) * 10
y = np.sin(x)
yield (x,y)
regressor = Sequential()
regressor.add(Dense(units=20, activation='tanh', input_dim=1))
regressor.add(Dense(units=20, activation='tanh'))
regressor.add(Dense(units=20, activation='tanh'))
regressor.add(Dense(units=1, activation='linear'))
regressor.compile(loss='mse', optimizer='adam')
regressor.fit_generator(data_gen(), epochs=3, steps_per_epoch=128)
x = (np.random.random([1024])-0.5)*10
x = np.sort(x)
y = np.sin(x)
plt.plot(x, y)
plt.plot(x, regressor.predict(x))
plt.show()
Changes made: replacing low layer activations with hyperbolic tangents, replacing the static dataset with a random generator, replacing sgd with adam. That said, there still are problems with other parts of your code that I haven't been able to locate yet (most likely your scaler and random process).
I managed to get a good approximation by changing the architecture and the training as in the following code. It's a bit of an overkill but at least I know where the problem was coming from.
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import random
import numpy
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import ExtraTreesRegressor
from keras import optimizers
regressor = Sequential()
regressor.add(Dense(units=500, activation='sigmoid', kernel_initializer='uniform', input_dim=1))
regressor.add(Dense(units=500, activation='sigmoid', kernel_initializer='uniform'))
regressor.add(Dense(units=1, activation='sigmoid'))
regressor.compile(loss='mean_squared_error', optimizer='adam')
#regressor = ExtraTreesRegressor()
N = 5000
X = numpy.empty((N,))
Y = numpy.empty((N,))
for i in range(N):
X[i] = random.uniform(-10, 10)
X = numpy.sort(X).reshape(-1, 1)
for i in range(N):
Y[i] = numpy.sin(X[i])
Y = Y.reshape(-1, 1)
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()
X = X_scaler.fit_transform(X)
Y = Y_scaler.fit_transform(Y)
regressor.fit(X, Y, epochs=50, verbose=1, batch_size=2)
#regressor.fit(X, Y.reshape(5000,))
x = numpy.mgrid[-10:10:100*1j]
x = x.reshape(-1, 1)
y = numpy.mgrid[-10:10:100*1j]
y = y.reshape(-1, 1)
x = X_scaler.fit_transform(x)
for i in range(len(x)):
y[i] = regressor.predict(numpy.array([x[i]]))
plt.figure()
plt.plot(X_scaler.inverse_transform(x), Y_scaler.inverse_transform(y))
plt.plot(X_scaler.inverse_transform(X), Y_scaler.inverse_transform(Y))
However I'm still baffled that I found papers saying that they were using only two hidden layers of five neurons to approximate the Q function of the mountain car problem and training their network for only a few minutes and get good results. I will try changing my batch size in my original problem to see what results I can get but I'm not very optimistic
I have a problem which deals with predicting two outputs when given a vector of predictors.
Assume that a predictor vector looks like x1, y1, att1, att2, ..., attn, which says x1, y1 are coordinates and att's are the other attributes attached to the occurrence of x1, y1 coordinates. Based on this predictor set I want to predict x2, y2. This is a time series problem, which I am trying to solve using multiple regresssion.
My question is how do I setup keras, which can give me 2 outputs in the final layer.
from keras.models import Model
from keras.layers import *
#inp is a "tensor", that can be passed when calling other layers to produce an output
inp = Input((10,)) #supposing you have ten numeric values as input
#here, SomeLayer() is defining a layer,
#and calling it with (inp) produces the output tensor x
x = SomeLayer(blablabla)(inp)
x = SomeOtherLayer(blablabla)(x) #here, I just replace x, because this intermediate output is not interesting to keep
#here, I want to keep the two different outputs for defining the model
#notice that both left and right are called with the same input x, creating a fork
out1 = LeftSideLastLayer(balbalba)(x)
out2 = RightSideLastLayer(banblabala)(x)
#here, you define which path you will follow in the graph you've drawn with layers
#notice the two outputs passed in a list, telling the model I want it to have two outputs.
model = Model(inp, [out1,out2])
model.compile(optimizer = ...., loss = ....) #loss can be one for both sides or a list with different loss functions for out1 and out2
model.fit(inputData,[outputYLeft, outputYRight], epochs=..., batch_size=...)
You can make a model with multiple output with
the Functional API
by subclassing tf.keras.Model.
Here's an example of dual outputs (regression and classification) on the Iris Dataset, using the Functional API:
from sklearn.datasets import load_iris
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input, Model
import tensorflow as tf
data, target = load_iris(return_X_y=True)
X = data[:, (0, 1, 2)]
Y = data[:, 3]
Z = target
inputs = Input(shape=(3,), name='input')
x = Dense(16, activation='relu', name='16')(inputs)
x = Dense(32, activation='relu', name='32')(x)
output1 = Dense(1, name='cont_out')(x)
output2 = Dense(3, activation='softmax', name='cat_out')(x)
model = Model(inputs=inputs, outputs=[output1, output2])
model.compile(loss={'cont_out': 'mean_absolute_error',
'cat_out': 'sparse_categorical_crossentropy'},
optimizer='adam',
metrics={'cat_out': tf.metrics.SparseCategoricalAccuracy(name='acc')})
history = model.fit(X, {'cont_out': Y, 'cat_out': Z}, epochs=10, batch_size=8)
Here's a simplified version:
from sklearn.datasets import load_iris
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input, Model
data, target = load_iris(return_X_y=True)
X = data[:, (0, 1, 2)]
Y = data[:, 3]
Z = target
inputs = Input(shape=(3,))
x = Dense(16, activation='relu')(inputs)
x = Dense(32, activation='relu')(x)
output1 = Dense(1)(x)
output2 = Dense(3, activation='softmax')(x)
model = Model(inputs=inputs, outputs=[output1, output2])
model.compile(loss=['mae', 'sparse_categorical_crossentropy'], optimizer='adam')
history = model.fit(X, [Y, Z], epochs=10, batch_size=8)
Here's the same example, subclassing tf.keras.Model and with a custom training loop:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model
from sklearn.datasets import load_iris
tf.keras.backend.set_floatx('float64')
iris, target = load_iris(return_X_y=True)
X = iris[:, :3]
y = iris[:, 3]
z = target
ds = tf.data.Dataset.from_tensor_slices((X, y, z)).shuffle(150).batch(8)
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.d0 = Dense(16, activation='relu')
self.d1 = Dense(32, activation='relu')
self.d2 = Dense(1)
self.d3 = Dense(3, activation='softmax')
def call(self, x, training=None, **kwargs):
x = self.d0(x)
x = self.d1(x)
a = self.d2(x)
b = self.d3(x)
return a, b
model = MyModel()
loss_obj_reg = tf.keras.losses.MeanAbsoluteError()
loss_obj_cat = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
loss_reg = tf.keras.metrics.Mean(name='regression loss')
loss_cat = tf.keras.metrics.Mean(name='categorical loss')
error_reg = tf.keras.metrics.MeanAbsoluteError()
error_cat = tf.keras.metrics.SparseCategoricalAccuracy()
#tf.function
def train_step(inputs, y_reg, y_cat):
with tf.GradientTape() as tape:
pred_reg, pred_cat = model(inputs)
reg_loss = loss_obj_reg(y_reg, pred_reg)
cat_loss = loss_obj_cat(y_cat, pred_cat)
gradients = tape.gradient([reg_loss, cat_loss], model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
loss_reg(reg_loss)
loss_cat(cat_loss)
error_reg(y_reg, pred_reg)
error_cat(y_cat, pred_cat)
for epoch in range(50):
for xx, yy, zz in ds:
train_step(xx, yy, zz)
template = 'Epoch {:>2}, SCCE: {:>5.2f},' \
' MAE: {:>4.2f}, SAcc: {:>5.1%}'
print(template.format(epoch+1,
loss_cat.result(),
error_reg.result(),
error_cat.result()))
loss_reg.reset_states()
loss_cat.reset_states()
error_reg.reset_states()
error_cat.reset_states()