After getting responded to this question, I realized that I have a different question.
I would like to have a different objective component based on the batch that I am passing during a training step. Suppose my batch size is one and I associate each training data with two supporter vectors that are not part of the training step. So I need to figure out which part of the input vector is currently being processed.
import numpy as np
import keras.backend as K
from keras.layers import Dense, Input
from keras.models import Model
features = np.random.rand(100, 5)
labels = np.random.rand(100, 2)
holder = np.random.rand(200, 5) # each feature gets two supporter.
iter = np.arange(start=1, stop=features.shape[0], step=1)
supporters = {}
for i,j in zip(iter, holder): #(i, i+1) represent the ith training data
supporters[i]=j
For instance, the first two rows of supporters is for the first point in feature.
features[0] [0.71444629 0.77256729 0.95375736 0.18759234 0.8207317 ]
has the following two supporters.
1: array([0.76281692, 0.18698215, 0.11687052, 0.78084761, 0.10293403]),
2: array([0.98229912, 0.08784577, 0.08109571, 0.23665783, 0.52587238])
Now, I create a simple model.
# Simple neural net with three outputs
input_layer = Input((5,))
hidden_layer = Dense(16)(input_layer)
output_layer = Dense(2)(hidden_layer)
# Model
model = Model(inputs=input_layer, outputs=output_layer)
My goal is to create a loss function as
def custom_loss(y_true, y_pred):
# Normal MSE loss
mse = K.mean(K.square(y_true-y_pred), axis=-1)
#Assume that I properly pass model object into the method use the predict method
#to use the current network weights
new_constraint = K.sum(y_pred - model.predict(supporters))
return(mse+new_constraint)
Then, I go ahead and compile my model.
model.compile(loss=custom_loss, optimizer='sgd')
model.fit(features, labels, epochs=1, ,batch_size=1)
The problem is that since the batch size is one, I want to make sure that the loss function only considers the supporter of the current training input. For example, if I am training the third point in features, then I want to use the fifth and sixth vectors while creating new_constraint. How can I accomplish this?
You can implement it like this (I have used the TensorFlow based Keras api but it shouldn't matter)
import numpy as np
import tensorflow as tf
from tensorflow.keras import Input, layers, Model
from tensorflow.keras import backend as K
features = np.random.rand(100, 5)
labels = np.random.rand(100, 2)
supporters = np.random.rand(200, 5) # each feature gets two supporter.
# I will get both support vectors to iterate over
supporters_1 = supporters[::2, :]
supporters_2 = supporters[1::2, :]
print(supporters_1.shape, supporters_2.shape)
# Result -> ((100, 5), (100, 5))
# Create a tf dataset to use in training
dataset = tf.data.Dataset.from_tensor_slices(((features, supporters_1, supporters_2), labels)).batch(1)
# A look at what it returns
for i in dataset:
print(i)
break
'''
Result:
((<tf.Tensor: shape=(1, 5), dtype=float64, numpy=array([[0.42834492, 0.01041871, 0.53058175, 0.69453215, 0.83901092]])>,
<tf.Tensor: shape=(1, 5), dtype=float64, numpy=array([[0.1724601 , 0.14386688, 0.49018201, 0.13565471, 0.35159235]])>,
<tf.Tensor: shape=(1, 5), dtype=float64, numpy=array([[0.87243349, 0.98779049, 0.98405784, 0.74069913, 0.25763667]])>),
<tf.Tensor: shape=(1, 2), dtype=float64, numpy=array([[0.20993531, 0.70153453]])>)
'''
#=========================================================
# Creating the model (Input size is 5 and not 2 in your sample so I changed it)
# Same for the label shape
input_layer = Input((5,))
hidden_layer = layers.Dense(16)(input_layer)
output_layer = layers.Dense(2)(hidden_layer)
# Model
model = Model(inputs=input_layer, outputs=output_layer)
#=========================================================
# Implementing the custom loss
# Without the `K.abs` the result can be negative and hence the `K.abs`
def custom_loss(y_true, y_pred, support_pred_1, support_pred_2):
mse = tf.keras.losses.mse(y_true, y_pred)
new_constraint = K.abs(K.sum(y_pred - [support_pred_1, support_pred_2]))
return (mse+new_constraint)
# Instantiate an optimizer.
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
'''
Now we create a custom training loop. In this we will get the logits
of all the inputs and then compute loss using the custom loss
function and then optimize on that loss.
'''
epochs = 10
for epoch in range(epochs):
print("Start of epoch %d" % (epoch,))
for step, ((features, support_1, support_2), labels) in enumerate(dataset):
with tf.GradientTape() as tape:
logits = model(features, training=True)
logits_1 = model(support_1, training=True)
logits_2 = model(support_2, training=True)
loss_value = custom_loss(labels, logits, logits_1, logits_2)
grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_weights))
print('loss_value: ', loss_value)
EDIT: There is another way to do this. As below:
# Everthing same till the supporters_1, supporters_2
def combine(inputs, targets):
features = inputs[0]
supports1 = inputs[1]
supports2 = inputs[2]
# Stack the inputs as a batch
final = tf.stack((features, support_1, support_2))
final = tf.reshape(final, (3,5))
return final, targets
# Creating the dataset
dataset = tf.data.Dataset.from_tensor_slices(((features, supporters_1, supporters_2), labels)).batch(1)
dataset = dataset.map(combine, num_parallel_calls=-1)
# Check the output
for i in dataset:
print(i)
break
'''
(<tf.Tensor: shape=(3, 5), dtype=float64, numpy=
array([[0.35641985, 0.93025517, 0.72874829, 0.81810538, 0.46682277],
[0.95497516, 0.71722253, 0.10608685, 0.37267656, 0.94748968],
[0.04822454, 0.00480376, 0.08479184, 0.51133809, 0.38242403]])>, <tf.Tensor: shape=(1, 2), dtype=float64, numpy=array([[0.21399956, 0.97149716]])>)
'''
#================MODEL=================
input_layer = Input((5,))
hidden_layer = layers.Dense(16)(input_layer)
output_layer = layers.Dense(2)(hidden_layer)
# Model
model = Model(inputs=input_layer, outputs=output_layer)
#=======================================
# change the loss function accordingly
'''
The first row in the y_pred will be the prediction corresponding to
actual features and the rest will be predictions corresponding to
supports and hence you can change the loss function as below.
'''
def custom_loss(y_true, y_pred):
mse = tf.keras.losses.mse(y_true, y_pred[0, :])
new_constraint = K.abs(K.sum(y_pred[0, :] - y_pred[1:, :]))
return (mse+new_constraint)
# Compile
model.compile(loss=custom_loss, optimizer='adam')
# train
model.fit(dataset, epochs=5)
Related
I'm trying to reproduce the architecture of the network proposed in this publication in tensorFlow. Being a total beginner to this, I've been using this tutorial as a base to work on, using tensorflow==2.3.2.
To train this network, they use a loss which implies outputs from two branches of the network at the same time, which made me look towards custom losses function in keras. I've got that you can define your own, as long as the definition of the function looks like the following:
def custom_loss(y_true, y_pred):
I also understood that you could give other arguments like so:
def loss_function(margin=0.3):
def custom_loss(y_true, y_pred):
# And now you can use margin
You then just have to call these while compiling your model. When it comes to using multiple outputs, the most common approach seem to be the one proposed here, where you would give several losses functions, one being called for each of your output.
However, I could not find a solution to give several outputs to a loss function, which is what I need here.
To further explain it, here is a minimal working example showing what I've tried, which you can try for yourself in this collab.
import os
import tensorflow as tf
import keras.backend as K
from tensorflow.keras import datasets, layers, models, applications, losses
from tensorflow.keras.preprocessing import image_dataset_from_directory
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True)
PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
BATCH_SIZE = 32
IMG_SIZE = (160, 160)
IMG_SHAPE = IMG_SIZE + (3,)
train_dataset = image_dataset_from_directory(train_dir,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE)
validation_dataset = image_dataset_from_directory(validation_dir,
shuffle=True,
batch_size=BATCH_SIZE,
image_size=IMG_SIZE)
data_augmentation = tf.keras.Sequential([
layers.experimental.preprocessing.RandomFlip('horizontal'),
layers.experimental.preprocessing.RandomRotation(0.2),
])
preprocess_input = applications.resnet50.preprocess_input
base_model = applications.ResNet50(input_shape=IMG_SHAPE,
include_top=False,
weights='imagenet')
base_model.trainable = True
conv = layers.Conv2D(filters=128, kernel_size=(1,1))
global_pooling = layers.GlobalAveragePooling2D()
horizontal_pooling = layers.AveragePooling2D(pool_size=(1, 5))
reshape = layers.Reshape((-1, 128))
def custom_loss(y_true, y_pred):
print(y_pred.shape)
# Do some stuffs involving both outputs
# Returning something trivial here for correct behavior
return K.mean(y_pred)
inputs = tf.keras.Input(shape=IMG_SHAPE)
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=True)
first_branch = global_pooling(x)
second_branch = conv(x)
second_branch = horizontal_pooling(second_branch)
second_branch = reshape(second_branch)
model = tf.keras.Model(inputs, [first_branch, second_branch])
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
loss=custom_loss,
metrics=['accuracy'])
model.summary()
initial_epochs = 10
history = model.fit(train_dataset,
epochs=initial_epochs,
validation_data=validation_dataset)
while doing so, I thought that the y_pred given to loss function would be a list, containing both outputs. However, while running it, what I've got in stdout was this:
Epoch 1/10
(None, 2048)
(None, 5, 128)
What I understand from this is that the loss function is called with every output, one by one, instead of being called once with all the outputs, which means I can't define a loss that would use both the outputs at the same time. Is there any way to achieve this?
Please let me know if I'm unclear, or if you need further details.
I had the same problem trying to implement Triplet_Loss function.
I refered to Keras's implementation for Siamese Network with Triplet Loss Function but something didnt work out and I had to implement the network by myself.
def get_siamese_model(input_shape, conv2d_filters):
# Define the tensors for the input images
anchor_input = Input(input_shape, name="Anchor_Input")
positive_input = Input(input_shape, name="Positive_Input")
negative_input = Input(input_shape, name="Negative_Input")
body = build_body(input_shape, conv2d_filters)
# Generate the feature vectors for the images
encoded_a = body(anchor_input)
encoded_p = body(positive_input)
encoded_n = body(negative_input)
distance = DistanceLayer()(encoded_a, encoded_p, encoded_n)
# Connect the inputs with the outputs
siamese_net = Model(inputs=[anchor_input, positive_input, negative_input],
outputs=distance)
return siamese_net
and the "bug" was in DistanceLayer Implementation Keras posted (also in the same link above).
class DistanceLayer(tf.keras.layers.Layer):
"""
This layer is responsible for computing the distance between the anchor
embedding and the positive embedding, and the anchor embedding and the
negative embedding.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
def call(self, anchor, positive, negative):
ap_distance = tf.math.reduce_sum(tf.math.square(anchor - positive), axis=1, keepdims=True, name='ap_distance')
an_distance = tf.math.reduce_sum(tf.math.square(anchor - negative), axis=1, keepdims=True, name='an_distance')
return (ap_distance, an_distance)
When I was training the model, the loss function took only one of the vectors ap_distance or an_distance.
FINALLY, THE FIX WAS to concatenate the vectors together (along axis=1 this case) and on the loss function, take them apart:
def call(self, anchor, positive, negative):
ap_distance = tf.math.reduce_sum(tf.math.square(anchor - positive), axis=1, keepdims=True, name='ap_distance')
an_distance = tf.math.reduce_sum(tf.math.square(anchor - negative), axis=1, keepdims=True, name='an_distance')
return tf.concat([ap_distance, an_distance], axis=1)
on my custom loss:
def get_loss(margin=1.0):
def triplet_loss(y_true, y_pred):
# The output of the network is NOT A tuple, but a matrix shape (batch_size, 2),
# containing the distances between the anchor and the positive example,
# and the anchor and the negative example.
ap_distance = y_pred[:, 0]
an_distance = y_pred[:, 1]
# Computing the Triplet Loss by subtracting both distances and
# making sure we don't get a negative value.
loss = tf.math.maximum(ap_distance - an_distance + margin, 0.0)
# tf.print("\n", ap_distance, an_distance)
# tf.print(f"\n{loss}\n")
return loss
return triplet_loss
Ok, here is an easy way to achieve this. We can achieve this by using the loss_weights parameter. We can weigh multiple outputs exactly the same so that we can get the combined loss results. So, for two output we can do
loss_weights = 1*output1 + 1*output2
In your case, your network has two outputs, by the name they are reshape, and global_average_pooling2d. You can do now as follows
# calculation of loss for one output, i.e. reshape
def reshape_loss(y_true, y_pred):
# do some math with these two
return K.mean(y_pred)
# calculation of loss for another output, i.e. global_average_pooling2d
def gap_loss(y_true, y_pred):
# do some math with these two
return K.mean(y_pred)
And while compiling now you need to do as this
model.compile(
optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),
loss = {
'reshape':reshape_loss,
'global_average_pooling2d':gap_loss
},
loss_weights = {
'reshape':1.,
'global_average_pooling2d':1.
}
)
Now, the loss is the result of 1.*reshape + 1.*global_average_pooling2d.
My neural network in Keras learns a representation of my original data. In order to see exactly how it learns I thought it would be interesting to plot the data for every training batch (or epoch alternatively) and convert the plots into a video.
I'm stuck on how to get the outputs of my model during the training phase.
I thought about doing something like this (pseudo code):
epochs = 200
plt_outputs = []
for i in range(epochs):
model.fit(x_train,y_train, epochs = 1)
plt_outputs.append(output_layer(x_test))
where output_layer is the layer in my neural network I'm interested in. Afterwards I would use plot_data to generate each plot and turn it into a video. (That part I'm not concerned about yet..)
But that doesn't strike me as a good solution, plus I don't know how get the output for every batch. Any thoughts on this?
You can customize what happens in the test step, much like this official tutorial:
import tensorflow as tf
import numpy as np
class CustomModel(tf.keras.Model):
def test_step(self, data):
# Unpack the data
x, y = data
# Compute predictions
y_pred = self(x, training=False)
test_outputs.append(y_pred) # ADD THIS HERE
# Updates the metrics tracking the loss
self.compiled_loss(y, y_pred, regularization_losses=self.losses)
# Update the metrics.
self.compiled_metrics.update_state(y, y_pred)
# Return a dict mapping metric names to current value.
# Note that it will include the loss (tracked in self.metrics).
return {m.name: m.result() for m in self.metrics}
# Construct an instance of CustomModel
inputs = tf.keras.Input(shape=(8,))
x = tf.keras.layers.Dense(8, activation='relu')(inputs)
outputs = tf.keras.layers.Dense(1)(x)
model = CustomModel(inputs, outputs)
model.compile(loss="mse", metrics=["mae"], run_eagerly=True)
test_outputs = list() # ADD THIS HERE
# Evaluate with our custom test_step
x = np.random.random((1000, 8))
y = np.random.random((1000, 1))
model.evaluate(x, y)
I added a list, and now in the test step, it will append this list with the output. You will need to add run_eagerly=True in model.compile() for this to work. This will output a list of such outputs:
<tf.Tensor: shape=(32, 1), dtype=float32, numpy=
array([[ 0.10866462],
[ 0.2749035 ],
[ 0.08196291],
[ 0.25862294],
[ 0.30985728],
[ 0.20230596],
...
[ 0.17108777],
[ 0.29692617],
[-0.03684975],
[ 0.03525433],
[ 0.26774448],
[ 0.21728781],
[ 0.0840873 ]], dtype=float32)>
I am trying to implement WGAN with GP in TensorFlow 2.0. To calculate the gradient penalty it requires you to calculate the gradients of the predictions with respect to input images.
Now, to make it a bit more tractable, instead of computing the gradients of the predictions with respect to all the input images, it computes interpolated data points along the lines of original and fake data points and uses these as the inputs.
To implement this, I am first developing the compute_gradients function which would take some predictions and return the gradients of those with respect to some input images. First, I thought of doing this with tf.keras.backend.gradients but it won't work in eager mode. So, I am trying to do this now using GradientTape.
Here's the code I am using to test things out:
from tensorflow.keras import backend as K
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
import tensorflow as tf
import numpy as np
# Comes from Generative Deep Learning by David Foster
class RandomWeightedAverage(tf.keras.layers.Layer):
def __init__(self, batch_size):
super().__init__()
self.batch_size = batch_size
"""Provides a (random) weighted average between real and generated image samples"""
def call(self, inputs):
alpha = K.random_uniform((self.batch_size, 1, 1, 1))
return (alpha * inputs[0]) + ((1 - alpha) * inputs[1])
# Dummy critic
def make_critic():
critic = Sequential()
inputShape = (28, 28, 1)
critic.add(Conv2D(32, (5, 5), padding="same", strides=(2, 2),
input_shape=inputShape))
critic.add(LeakyReLU(alpha=0.2))
critic.add(Conv2D(64, (5, 5), padding="same", strides=(2, 2)))
critic.add(LeakyReLU(alpha=0.2))
critic.add(Flatten())
critic.add(Dense(512))
critic.add(LeakyReLU(alpha=0.2))
critic.add(Dropout(0.3))
critic.add(Dense(1))
return critic
# Gather dataset
((X_train, _), (X_test, _)) = tf.keras.datasets.fashion_mnist.load_data()
X_train = X_train.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)
# Note that I am using test images as fake images for testing purposes
interpolated_img = RandomWeightedAverage(32)([X_train[0:32].astype("float"), X_test[32:64].astype("float")])
# Compute gradients of the predictions with respect to the interpolated images
critic = make_critic()
with tf.GradientTape() as tape:
y_pred = critic(interpolated_img)
gradients = tape.gradient(y_pred, interpolated_img)
The gradients are coming to be None. Am I missing something here?
Gradients of predictions with respect to some tensors ... Am I missing something here?
Yes. You need a tape.watch(interpolated_img):
with tf.GradientTape() as tape:
tape.watch(interpolated_img)
y_pred = critic(interpolated_img)
GradientTape needs to store the intermediate values of the forward pass to calculate the gradients. Usually, you want gradients WRT variables. So it doesn't keep the trace of calculations starting from tensors, probably to save memory.
If you want a gradient WRT a tensor, you need to explicitly tell the tape.
I am doing a slight modification of a standard neural network by defining a custom loss function. The custom loss function depends not only on y_true and y_pred, but also on the training data. I implemented it using the wrapping solution described here.
Specifically, I wanted to define a custom loss function that is the standard mse plus the mse between the input and the square of y_pred:
def custom_loss(x_true)
def loss(y_true, y_pred):
return K.mean(K.square(y_pred - y_true) + K.square(y_true - x_true))
return loss
Then I compile the model using
model_custom.compile(loss = custom_loss( x_true=training_data ), optimizer='adam')
fit the model using
model_custom.fit(training_data, training_label, epochs=100, batch_size = training_data.shape[0])
All of the above works fine, because the batch size is actually the number of all the training samples.
But if I set a different batch_size (e.g., 10) when I have 1000 training samples, there will be an error
Incompatible shapes: [1000] vs. [10].
It seems that Keras is able to automatically adjust the size of the inputs to its own loss function base on the batch size, but cannot do so for the custom loss function.
Do you know how to solve this issue?
Thank you!
==========================================================================
* Update: the batch size issue is solved, but another issue occurred
Thank you, Ori, for the suggestion of concatenating the input and output layers! It "worked", in the sense that the codes can run under any batch size. However, it seems that the result from training the new model is wrong... Below is a simplified version of the codes to demonstrate the problem:
import numpy as np
import scipy.io
import keras
from keras import backend as K
from keras.models import Model
from keras.layers import Input, Dense, Activation
from numpy.random import seed
from tensorflow import set_random_seed
def custom_loss(y_true, y_pred): # this is essentially the mean_square_error
mse = K.mean( K.square( y_pred[:,2] - y_true ) )
return mse
# set the seeds so that we get the same initialization across different trials
seed_numpy = 0
seed_tensorflow = 0
# generate data of x = [ y^3 y^2 ]
y = np.random.rand(5000+1000,1) * 2 # generate 5000 training and 1000 testing samples
x = np.concatenate( ( np.power(y, 3) , np.power(y, 2) ) , axis=1 )
training_data = x[0:5000:1,:]
training_label = y[0:5000:1]
testing_data = x[5000:6000:1,:]
testing_label = y[5000:6000:1]
# build the standard neural network with one hidden layer
seed(seed_numpy)
set_random_seed(seed_tensorflow)
input_standard = Input(shape=(2,)) # input
hidden_standard = Dense(10, activation='relu', input_shape=(2,))(input_standard) # hidden layer
output_standard = Dense(1, activation='linear')(hidden_standard) # output layer
model_standard = Model(inputs=[input_standard], outputs=[output_standard]) # build the model
model_standard.compile(loss='mean_squared_error', optimizer='adam') # compile the model
model_standard.fit(training_data, training_label, epochs=50, batch_size = 500) # train the model
testing_label_pred_standard = model_standard.predict(testing_data) # make prediction
# get the mean squared error
mse_standard = np.sum( np.power( testing_label_pred_standard - testing_label , 2 ) ) / 1000
# build the neural network with the custom loss
seed(seed_numpy)
set_random_seed(seed_tensorflow)
input_custom = Input(shape=(2,)) # input
hidden_custom = Dense(10, activation='relu', input_shape=(2,))(input_custom) # hidden layer
output_custom_temp = Dense(1, activation='linear')(hidden_custom) # output layer
output_custom = keras.layers.concatenate([input_custom, output_custom_temp])
model_custom = Model(inputs=[input_custom], outputs=[output_custom]) # build the model
model_custom.compile(loss = custom_loss, optimizer='adam') # compile the model
model_custom.fit(training_data, training_label, epochs=50, batch_size = 500) # train the model
testing_label_pred_custom = model_custom.predict(testing_data) # make prediction
# get the mean squared error
mse_custom = np.sum( np.power( testing_label_pred_custom[:,2:3:1] - testing_label , 2 ) ) / 1000
# compare the result
print( [ mse_standard , mse_custom ] )
Basically, I have a standard one-hidden-layer neural network, and a custom one-hidden-layer neural network whose output layer is concatenated with the input layer. For testing purpose, I did not use the concatenated input layer in the custom loss function, because I wanted to see if the custom network can reproduce the standard neural network. Since the custom loss function is equivalent to the standard 'mean_squared_error' loss, both networks should have the same training results (I also reset the random seeds to make sure that they have the same initialization).
However, the training results are very different. It seems that the concatenation makes the training process different? Any ideas?
Thank you again for all your help!
Final update: Ori's approach of concatenating input and output layers works, and is verified by using the generator. Thanks!!
The problem is that when compiling the model, you set x_true to be a static tensor, in the size of all the samples. While the input for keras loss functions are the y_true and y_pred, where each of them is of size [batch_size, :].
As I see it there are 2 options you can solve this, the first one is using a generator for creating the batches, in such a way that you will have control over which indices are evaluated each time, and at the loss function you could slice the x_true tensor to fit the samples being evaluated:
def custom_loss(x_true)
def loss(y_true, y_pred):
x_true_samples = relevant_samples(x_true)
return K.mean(K.square(y_pred - y_true) + K.square(y_true - x_true_samples))
return loss
This solution can be complicated, what I would suggest is a simpler workaround -
Concatenate the input layer with the output layer, such that your new output will be of the form original_output , input.
Now you can use a new modified loss function:
def loss(y_true, y_pred):
return K.mean(K.square(y_pred[:,:output_shape] - y_true[:,:output_shape]) +
K.square(y_true[:,:output_shape] - y_pred[:,outputshape:))
Now your new loss function will take in account both the input data, and the prediction.
Edit:
Note that while you set the seed, your models are not exactly the same, and as you did not use a generator, you let keras choose the batches, and for different models he might pick different samples.
As your model does not converge, different samples can lead to different results.
I added a generator to your code, to verify the samples we pick for training, now you can see both results are the same:
def custom_loss(y_true, y_pred): # this is essentially the mean_square_error
mse = keras.losses.mean_squared_error(y_true, y_pred[:,2])
return mse
def generator(x, y, batch_size):
curIndex = 0
batch_x = np.zeros((batch_size,2))
batch_y = np.zeros((batch_size,1))
while True:
for i in range(batch_size):
batch_x[i] = x[curIndex,:]
batch_y[i] = y[curIndex,:]
i += 1;
if i == 5000:
i = 0
yield batch_x, batch_y
# set the seeds so that we get the same initialization across different trials
seed_numpy = 0
seed_tensorflow = 0
# generate data of x = [ y^3 y^2 ]
y = np.random.rand(5000+1000,1) * 2 # generate 5000 training and 1000 testing samples
x = np.concatenate( ( np.power(y, 3) , np.power(y, 2) ) , axis=1 )
training_data = x[0:5000:1,:]
training_label = y[0:5000:1]
testing_data = x[5000:6000:1,:]
testing_label = y[5000:6000:1]
batch_size = 32
# build the standard neural network with one hidden layer
seed(seed_numpy)
set_random_seed(seed_tensorflow)
input_standard = Input(shape=(2,)) # input
hidden_standard = Dense(10, activation='relu', input_shape=(2,))(input_standard) # hidden layer
output_standard = Dense(1, activation='linear')(hidden_standard) # output layer
model_standard = Model(inputs=[input_standard], outputs=[output_standard]) # build the model
model_standard.compile(loss='mse', optimizer='adam') # compile the model
#model_standard.fit(training_data, training_label, epochs=50, batch_size = 10) # train the model
model_standard.fit_generator(generator(training_data,training_label,batch_size), steps_per_epoch= 32, epochs= 100)
testing_label_pred_standard = model_standard.predict(testing_data) # make prediction
# get the mean squared error
mse_standard = np.sum( np.power( testing_label_pred_standard - testing_label , 2 ) ) / 1000
# build the neural network with the custom loss
seed(seed_numpy)
set_random_seed(seed_tensorflow)
input_custom = Input(shape=(2,)) # input
hidden_custom = Dense(10, activation='relu', input_shape=(2,))(input_custom) # hidden layer
output_custom_temp = Dense(1, activation='linear')(hidden_custom) # output layer
output_custom = keras.layers.concatenate([input_custom, output_custom_temp])
model_custom = Model(inputs=input_custom, outputs=output_custom) # build the model
model_custom.compile(loss = custom_loss, optimizer='adam') # compile the model
#model_custom.fit(training_data, training_label, epochs=50, batch_size = 10) # train the model
model_custom.fit_generator(generator(training_data,training_label,batch_size), steps_per_epoch= 32, epochs= 100)
testing_label_pred_custom = model_custom.predict(testing_data)
# get the mean squared error
mse_custom = np.sum( np.power( testing_label_pred_custom[:,2:3:1] - testing_label , 2 ) ) / 1000
# compare the result
print( [ mse_standard , mse_custom ] )
I am trying to make hourly predictions using a recurrent neural network using TensorFlow and Keras in Python.I have assigned my inputs of the neural network to be (None, None, 5) shown in my .
However, I am getting the errorː
ValueError: Error when checking input: expected gru_3_input to have shape (None, None, 10) but got array with shape (1, 4, 1) My MVCE code isː
%matplotlib inline
#!pip uninstall keras
#!pip install keras==2.1.2
import tensorflow as tf
import pandas as pd
from pandas import DataFrame
import math
#####Create the Recurrent Neural Network###
model = Sequential()
model.add(GRU(units=5,
return_sequences=True,
input_shape=(None, num_x_signals)))
## This line is going to map the above 512 values to just 1 (num_y_signal)
model.add(Dense(num_y_signals, activation='sigmoid'))
if False:
from tensorflow.python.keras.initializers import RandomUniform
# Maybe use lower init-ranges.##### I may have to change these during debugging####
init = RandomUniform(minval=-0.05, maxval=0.05)
model.add(Dense(num_y_signals,
activation='linear',
kernel_initializer=init))
warmup_steps = 5
def loss_mse_warmup(y_true, y_pred):
#
# Ignore the "warmup" parts of the sequences
# by taking slices of the tensors.
y_true_slice = y_true[:, warmup_steps:, :]
y_pred_slice = y_pred[:, warmup_steps:, :]
# These sliced tensors both have this shape:
# [batch_size, sequence_length - warmup_steps, num_y_signals]
# Calculate the MSE loss for each value in these tensors.
# This outputs a 3-rank tensor of the same shape.
loss = tf.losses.mean_squared_error(labels=y_true_slice,
predictions=y_pred_slice)
loss_mean = tf.reduce_mean(loss)
return loss_mean
optimizer = RMSprop(lr=1e-3) ### This is somthing related to debugging
model.compile(loss=loss_mse_warmup, optimizer=optimizer)#### I may have to make the output a singnal rather than the whole data set
print(model.summary())
model.fit_generator(generator=generator,
epochs=20,
steps_per_epoch=100,
validation_data=validation_data)
I am not sure why this could be, but i believe it could something to do with reshaping my training and testing data. ɪ have also attached my full error message to my code to make the problem reproducible.
I'm unsure about the correctness but here it is:
%matplotlib inline
#!pip uninstall keras
#!pip install keras==2.1.2
import tensorflow as tf
import pandas as pd
from pandas import DataFrame
import math
import numpy
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
import datetime
from keras.layers import Input, Dense, GRU, Embedding
from keras.optimizers import RMSprop
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
datetime = [datetime.datetime(2012, 1, 1, 1, 0, 0) + datetime.timedelta(hours=i) for i in range(10)]
X=np.array([2.25226244,1.44078451,0.99174488,0.71179491,0.92824542,1.67776948,2.96399534,5.06257161,7.06504245,7.77817664
,0.92824542,1.67776948,2.96399534,5.06257161,7.06504245,7.77817664])
y= np.array([0.02062136,0.00186715,0.01517354,0.0129046 ,0.02231125,0.01492537,0.09646542,0.28444476,0.46289928,0.77817664
,0.02231125,0.01492537,0.09646542,0.28444476,0.46289928,0.77817664])
X = X[1:11]
y= y[1:11]
df = pd.DataFrame({'date':datetime,'y':y,'X':X})
df['t']= [x for x in range(10)]
df['X-1'] = df['X'].shift(-1)
x_data = df['X-1'].fillna(0)
y_data = y
num_data = len(x_data)
#### training and testing split####
train_split = 0.6
num_train = int(train_split*num_data)
num_test = num_data-num_train## number of observations in test set
#input train test
x_train = x_data[0:num_train].reshape(-1, 1)
x_test = x_data[num_train:].reshape(-1, 1)
#print (len(x_train) +len( x_test))
#output train test
y_train = y_data[0:num_train].reshape(-1, 1)
y_test = y_data[num_train:].reshape(-1, 1)
#print (len(y_train) + len(y_test))
### number of input signals
num_x_signals = x_data.shape[0]
# print (num_x_signals)
## number of output signals##
num_y_signals = y_data.shape[0]
#print (num_y_signals)
####data scalling'###
x_scaler = MinMaxScaler(feature_range=(0,1))
x_train_scaled = x_scaler.fit_transform(x_train)
x_test_scaled = MinMaxScaler(feature_range=(0,1)).fit_transform(x_test)
y_scaler = MinMaxScaler()
y_train_scaled = y_scaler.fit_transform(y_train)
y_test_scaled = MinMaxScaler(feature_range=(0,1)).fit_transform(y_test)
def batch_generator(batch_size, sequence_length):
"""
Generator function for creating random batches of training-data.
"""
# Infinite loop. providing the neural network with random data from the
# datase for x and y
while True:
# Allocate a new array for the batch of input-signals.
x_shape = (batch_size, sequence_length, num_x_signals)
x_batch = np.zeros(shape=x_shape, dtype=np.float16)
# Allocate a new array for the batch of output-signals.
y_shape = (batch_size, sequence_length, num_y_signals)
y_batch = np.zeros(shape=y_shape, dtype=np.float16)
# Fill the batch with random sequences of data.
for i in range(batch_size):
# Get a random start-index.
# This points somewhere into the training-data.
idx = np.random.randint(num_train - sequence_length)
# Copy the sequences of data starting at this index.
x_batch[i] = x_train_scaled[idx:idx+sequence_length]
y_batch[i] = y_train_scaled[idx:idx+sequence_length]
yield (x_batch, y_batch)
batch_size =20
sequence_length = 2
generator = batch_generator(batch_size=batch_size,
sequence_length=sequence_length)
x_batch, y_batch = next(generator)
#########Validation Set Start########
def batch_generator(batch_size, sequence_length):
"""
Generator function for creating random batches of training-data.
"""
# Infinite loop. providing the neural network with random data from the
# datase for x and y
while True:
# Allocate a new array for the batch of input-signals.
x_shape = (batch_size, sequence_length, num_x_signals)
x_batch = np.zeros(shape=x_shape, dtype=np.float16)
# Allocate a new array for the batch of output-signals.
y_shape = (batch_size, sequence_length, num_y_signals)
y_batch = np.zeros(shape=y_shape, dtype=np.float16)
# Fill the batch with random sequences of data.
for i in range(batch_size):
# Get a random start-index.
# This points somewhere into the training-data.
idx = np.random.randint(num_train - sequence_length)
# Copy the sequences of data starting at this index.
x_batch[i] = x_test_scaled[idx:idx+sequence_length]
y_batch[i] = y_test_scaled[idx:idx+sequence_length]
yield (x_batch, y_batch)
validation_data= next(batch_generator(batch_size,sequence_length))
# validation_data = (np.expand_dims(x_test_scaled, axis=0),
# np.expand_dims(y_test_scaled, axis=0))
#Validation set end
#####Create the Recurrent Neural Network###
model = Sequential()
model.add(GRU(units=5,
return_sequences=True,
input_shape=(None, num_x_signals)))
## This line is going to map the above 512 values to just 1 (num_y_signal)
model.add(Dense(num_y_signals, activation='sigmoid'))
if False:
from tensorflow.python.keras.initializers import RandomUniform
# Maybe use lower init-ranges.##### I may have to change these during debugging####
init = RandomUniform(minval=-0.05, maxval=0.05)
model.add(Dense(num_y_signals,
activation='linear',
kernel_initializer=init))
warmup_steps = 5
def loss_mse_warmup(y_true, y_pred):
#
# Ignore the "warmup" parts of the sequences
# by taking slices of the tensors.
y_true_slice = y_true[:, warmup_steps:, :]
y_pred_slice = y_pred[:, warmup_steps:, :]
# These sliced tensors both have this shape:
# [batch_size, sequence_length - warmup_steps, num_y_signals]
# Calculate the MSE loss for each value in these tensors.
# This outputs a 3-rank tensor of the same shape.
loss = tf.losses.mean_squared_error(labels=y_true_slice,
predictions=y_pred_slice)
loss_mean = tf.reduce_mean(loss)
return loss_mean
optimizer = RMSprop(lr=1e-3) ### This is somthing related to debugging
model.compile(loss=loss_mse_warmup, optimizer=optimizer)#### I may have to make the output a singnal rather than the whole data set
print(model.summary())
model.fit_generator(generator=generator,
epochs=20,
steps_per_epoch=100,
validation_data=validation_data)
I've only changed part of code between validation set start and validation set end.