I am trying to implement a weighted average between two tensors in TensorFlow, where the weight can be learned automatically. Following the advice on how to design a custom layer for a keras model here, my attempt is the following:
class WeightedAverage(tf.keras.layers.Layer):
def __init__(self):
super(WeightedAverage, self).__init__()
init_value = tf.keras.initializers.Constant(value=0.5)
self.w = self.add_weight(name="weight",
initializer=init_value,
trainable=True)
def call(self, inputs):
return tf.keras.layers.average([inputs[0] * self.w,
inputs[1] * (1 - self.w)])
Now the problem is that after training the model, saving, and loading it again, the value for w remains 0.5. Is it possible that the parameter does not receive any gradient updates? When printing the trainable variables of my model, the parameter is listed and should therefore be included when calling model.fit.
Here is a possibility to implement a weighted average between two tensors, where the weight can be learned automatically. I also introduce the constrain that the weights must sum up to 1. To grant this we have to simply apply a softmax on our weights. In the dummy example below I combine with this method the output of two fully-connected branches but you can manage it in every other scenario
here the custom layer:
class WeightedAverage(Layer):
def __init__(self):
super(WeightedAverage, self).__init__()
def build(self, input_shape):
self.W = self.add_weight(
shape=(1,1,len(input_shape)),
initializer='uniform',
dtype=tf.float32,
trainable=True)
def call(self, inputs):
# inputs is a list of tensor of shape [(n_batch, n_feat), ..., (n_batch, n_feat)]
# expand last dim of each input passed [(n_batch, n_feat, 1), ..., (n_batch, n_feat, 1)]
inputs = [tf.expand_dims(i, -1) for i in inputs]
inputs = Concatenate(axis=-1)(inputs) # (n_batch, n_feat, n_inputs)
weights = tf.nn.softmax(self.W, axis=-1) # (1,1,n_inputs)
# weights sum up to one on last dim
return tf.reduce_sum(weights*inputs, axis=-1) # (n_batch, n_feat)
here the full example in a regression problem:
inp1 = Input((100,))
inp2 = Input((100,))
x1 = Dense(32, activation='relu')(inp1)
x2 = Dense(32, activation='relu')(inp2)
W_Avg = WeightedAverage()([x1,x2])
out = Dense(1)(W_Avg)
m = Model([inp1,inp2], out)
m.compile('adam','mse')
n_sample = 1000
X1 = np.random.uniform(0,1, (n_sample,100))
X2 = np.random.uniform(0,1, (n_sample,100))
y = np.random.uniform(0,1, (n_sample,1))
m.fit([X1,X2], y, epochs=10)
in the end, you can also visualize the value of the weights in this way:
tf.nn.softmax(m.get_weights()[-3]).numpy()
Related
I'm working on developing a neural network from scratch. The issue seems to maybe be with my relu back-propagation. When I train the model it sometimes outputs -0 and sometimes outputs good predictions (relatively). Can someone tell me if I'm doing my back propagation incorrectly or if there's a reason why my relu would be predicting -0?
--
[edit]
Fixed the issue of predicting -0, but now it just predicts 0 for all inputs for the XOR. Can someone look over my backpropagation?
import numpy as np
# Each layer in our neural network
class NeuralLayer:
def __init__(self, input_neurons, output_neurons):
self.weights = np.random.randn(input_neurons, output_neurons)* np.sqrt(2. / input_neurons)
self.bias = np.ones((1,output_neurons)) * 0.5
# Two different activations, sigmoid by default
def sigmoid(self, neurons):
self.act = 1.0/(1.0 + np.exp(-neurons))
return self.act
def sigmoidBackward(self, grad):
return grad * self.act * (1 - self.act)
def relu(self, neurons):
self.act = (neurons > 0)
return neurons * self.act
def reluBackward(self, grad):
return grad * self.act
# Forward pass for this layer
def forward(self, input, activation):
self.input = np.atleast_2d(input)
if activation == 'sigmoid':
return self.sigmoid(input # self.weights + self.bias)
else:
return self.relu(input # self.weights + self.bias)
# backward pass for this layer
def backward(self, grad, activation):
if activation == 'sigmoid':
grad = self.sigmoidBackward(np.atleast_2d(grad))
else:
grad = self.reluBackward(np.atleast_2d(grad))
self.grad_weights = np.matmul(self.input.T, grad)
self.grad_bias = grad.sum()
return grad # self.weights.T
def step(self, step_size):
self.weights -= step_size*self.grad_weights
self.bias -= step_size*self.grad_bias
# Our neural net
class NeuralNetwork:
# Dynamically create all layers
def __init__(self, input_neurons, hidden_neurons, layer_count, activation, output_neurons = 1):
self.activation = activation
# Used to ensure input neurons match inputted data
self.neuron_safety = input_neurons
assert layer_count >= 2 and output_neurons >= 1
# Input layer
self.layers = [NeuralLayer(input_neurons, hidden_neurons)]
# Hidden Layers
for i in range(layer_count - 2):
self.layers.append(NeuralLayer(hidden_neurons, hidden_neurons))
# Output layer
self.layers.append(NeuralLayer(hidden_neurons, output_neurons))
# Forward pass for each layer
def forward(self, inp):
assert inp.shape[0] == self.neuron_safety
for layer in self.layers:
inp = layer.forward(inp, self.activation)
return inp
def backward(self, grad):
for layer in reversed(self.layers):
grad = layer.backward(grad, self.activation)
def step(self, step_size = 0.01):
for layer in self.layers:
layer.step(step_size)
# loss function - only 1 output neuron
def meanSquaredError(self, preds, labels):
self.labels = labels
self.preds = preds
return (self.preds - self.labels)**2
def meanSquaredErrorGrad(self):
return 2 * (self.preds - self.labels)
# Create a neural network with 2 inputs, 2 hidden neurons in each layer, and 2 layers
net = NeuralNetwork(2,16,4, 'relu')
epochs = 5000
# Input data (A,B) for XOR
X = np.array([[0,0],[1,1], [1,0],[0,1]])
# Expected output data
Y = np.array([[0],[0],[1],[1]])
for i in range(epochs):
preds = []
for idx, x in enumerate(X):
predictions = net.forward(x)
preds.append(predictions)
loss = net.meanSquaredError(predictions, Y[idx])
loss_grad = net.meanSquaredErrorGrad()
net.backward(loss_grad)
net.step()
print("Model predicted: {}\nactual values: {} ".format(preds, Y.T))
Output:
Model predicted: [array([[-0.]]), array([[-0.]]), array([[1.]]), array([[-0.]])]
actual values: [[0 0 1 1]]
Sometimes the predictions are perfect, but most of the time at least one prediction will be -0
The bias gradient is incorrect. You are using self.grad_bias = grad.sum(). This will compute the sum of the entire matrix. It needs to be self.grad_bias = grad.sum(axis=0, keepdims=True) to compute a 1 x output_neurons array that will properly update the bias vector. Otherwise, grad.sum() provides a single number that you are using to update all of your biases, which is not correct.
Also, make sure you update your forward pass for your ReLU to np.maximum(neurons, 0) as described in the comments.
def relu(self, neurons):
self.act = (neurons > 0)
return np.maximum(neurons, 0)
The gradient of the activations will be 0 or 1 depending on which parts of the inputs were positive.
Finally, for the XOR problem you typically do not use ReLU as the activation for the output layer because it is not bounded between [0-1] as per the XOR problem. The reason why you got good results with the sigmoid activation function is that the dynamic range of that activation function suits the XOR problem well. As an experiment, you can modify the output layer to be sigmoid, and the hidden layers to be ReLU. If you do this, you should get just as good a performance as using sigmoid all the way.
I am trying to implement a weighted average between two tensors in TensorFlow, where the weight can be learned automatically. Following the advice on how to design a custom layer for a keras model here, my attempt is the following:
class WeightedAverage(tf.keras.layers.Layer):
def __init__(self):
super(WeightedAverage, self).__init__()
init_value = tf.keras.initializers.Constant(value=0.5)
self.w = self.add_weight(name="weight",
initializer=init_value,
trainable=True)
def call(self, inputs):
return tf.keras.layers.average([inputs[0] * self.w,
inputs[1] * (1 - self.w)])
Now the problem is that after training the model, saving, and loading it again, the value for w remains 0.5. Is it possible that the parameter does not receive any gradient updates? When printing the trainable variables of my model, the parameter is listed and should therefore be included when calling model.fit.
Here is a possibility to implement a weighted average between two tensors, where the weight can be learned automatically. I also introduce the constrain that the weights must sum up to 1. To grant this we have to simply apply a softmax on our weights. In the dummy example below I combine with this method the output of two fully-connected branches but you can manage it in every other scenario
here the custom layer:
class WeightedAverage(Layer):
def __init__(self):
super(WeightedAverage, self).__init__()
def build(self, input_shape):
self.W = self.add_weight(
shape=(1,1,len(input_shape)),
initializer='uniform',
dtype=tf.float32,
trainable=True)
def call(self, inputs):
# inputs is a list of tensor of shape [(n_batch, n_feat), ..., (n_batch, n_feat)]
# expand last dim of each input passed [(n_batch, n_feat, 1), ..., (n_batch, n_feat, 1)]
inputs = [tf.expand_dims(i, -1) for i in inputs]
inputs = Concatenate(axis=-1)(inputs) # (n_batch, n_feat, n_inputs)
weights = tf.nn.softmax(self.W, axis=-1) # (1,1,n_inputs)
# weights sum up to one on last dim
return tf.reduce_sum(weights*inputs, axis=-1) # (n_batch, n_feat)
here the full example in a regression problem:
inp1 = Input((100,))
inp2 = Input((100,))
x1 = Dense(32, activation='relu')(inp1)
x2 = Dense(32, activation='relu')(inp2)
W_Avg = WeightedAverage()([x1,x2])
out = Dense(1)(W_Avg)
m = Model([inp1,inp2], out)
m.compile('adam','mse')
n_sample = 1000
X1 = np.random.uniform(0,1, (n_sample,100))
X2 = np.random.uniform(0,1, (n_sample,100))
y = np.random.uniform(0,1, (n_sample,1))
m.fit([X1,X2], y, epochs=10)
in the end, you can also visualize the value of the weights in this way:
tf.nn.softmax(m.get_weights()[-3]).numpy()
I'm trying to implement an unsupervised ANN using Hebbian updating in Keras. I found a custom Hebbian layer made by Dan Saunders here - https://github.com/djsaunde/rinns_python/blob/master/hebbian/hebbian.py
(I hope it is not poor form to ask questions about another person's code here)
In the examples I found using this layer in the repo, this layer is used as an intermediate layer between Dense/Conv layers, but I would like to construct a network using only Hebbian layers.
Two critical things are confusing me in this implementation:
It seems as though input dims and output dims must be the same for this layer to work. Why would this be the case and what can I do to make it so they can be different?
Why is the diagonal of the weight matrix set to zero? It says this is to "ensure that no neuron is laterally connected to itself", but I thought the connection weights were between the previous layer and the current layer, not the current layer and itself.
Here is the code for the Hebbian Layer Implementation:
from keras import backend as K
from keras.engine.topology import Layer
import numpy as np
import tensorflow as tf
np.set_printoptions(threshold=np.nan)
sess = tf.Session()
class Hebbian(Layer):
def __init__(self, output_dim, lmbda=1.0, eta=0.0005, connectivity='random', connectivity_prob=0.25, **kwargs):
'''
Constructor for the Hebbian learning layer.
args:
output_dim - The shape of the output / activations computed by the layer.
lambda - A floating-point valued parameter governing the strength of the Hebbian learning activation.
eta - A floating-point valued parameter governing the Hebbian learning rate.
connectivity - A string which determines the way in which the neurons in this layer are connected to
the neurons in the previous layer.
'''
self.output_dim = output_dim
self.lmbda = lmbda
self.eta = eta
self.connectivity = connectivity
self.connectivity_prob = connectivity_prob
if self.connectivity == 'random':
self.B = np.random.random(self.output_dim) < self.connectivity_prob
elif self.connectivity == 'zero':
self.B = np.zeros(self.output_dim)
super(Hebbian, self).__init__(**kwargs)
def random_conn_init(self, shape, dtype=None):
A = np.random.normal(0, 1, shape)
A[self.B] = 0
return tf.constant(A, dtype=tf.float32)
def zero_init(self, shape, dtype=None):
return np.zeros(shape)
def build(self, input_shape):
# create weight variable for this layer according to user-specified initialization
if self.connectivity == 'all':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer='uniform', trainable=False)
elif self.connectivity == 'random':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer=self.random_conn_init, trainable=False)
elif self.connectivity == 'zero':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer=self.zero_init, trainable=False)
else:
raise NotImplementedError
# ensure that no neuron is laterally connected to itself
self.kernel = self.kernel * tf.diag(tf.zeros(self.output_dim))
# call superclass "build" function
super(Hebbian, self).build(input_shape)
def call(self, x):
x_shape = tf.shape(x)
batch_size = tf.shape(x)[0]
# reshape to (batch_size, product of other dimensions) shape
x = tf.reshape(x, (tf.reduce_prod(x_shape[1:]), batch_size))
# compute activations using Hebbian-like update rule
activations = x + self.lmbda * tf.matmul(self.kernel, x)
# compute outer product of activations matrix with itself
outer_product = tf.matmul(tf.expand_dims(x, 1), tf.expand_dims(x, 0))
# update the weight matrix of this layer
self.kernel = self.kernel + tf.multiply(self.eta, tf.reduce_mean(outer_product, axis=2))
self.kernel = tf.multiply(self.kernel, self.B)
self.kernel = self.kernel * tf.diag(tf.zeros(self.output_dim))
return K.reshape(activations, x_shape)
At first inspection I expected this layer to be able to take inputs from a previous layer, perform a simple activation calculation (input * weight), update the weights according to Hebbian updating (something like - if activation is high b/t nodes, increase weight), then pass the activations to the next layer.
I also expected that it would be able to deal with decreasing/increasing the number of nodes from one layer to the next.
Instead, I cannot seem to figure out why the input and output dims must be the same and why the diagonals of the weight matrix are set to zero.
Where in the code (implicitly or explicitly) is the specification that the layers need to be the same dims?
Where in the code (implicitly or explicitly) is the specification that this layer's weight matrix is connecting the current layer to itself?
Apologies if this Q should have been separated into 2, but it seems like they may be related to e/o so I kept them as 1.
Happy to provide more details if needed.
Edit: Realized I forgot to add the error message I get when I try to create a layer with different output dims than the input dims:
model = Sequential()
model.add(Hebbian(input_shape = (256,1), output_dim = 256))
This compiles w/o error ^
model = Sequential()
model.add(Hebbian(input_shape = (256,1), output_dim = 24))
This ^ throws the error:
IndexError: boolean index did not match indexed array along dimension 0; dimension is 256 but corresponding boolean dimension is 24
Okay I think I maybe figured it out, sort of. There were many small problems but the biggest thing was I needed to add the compute_output_shape function which makes the layer able to modify the shape of its input as explained here:
https://keras.io/layers/writing-your-own-keras-layers/
So here is the code with all the changes I made. It will compile and modify the input shape just fine. Note that this layer computes weight changes inside the layer itself and there may be some issues with that if you try to actually use the layer (I'm still ironing these out), but this is a separate issue.
class Hebbian(Layer):
def __init__(self, output_dim, lmbda=1.0, eta=0.0005, connectivity='random', connectivity_prob=0.25, **kwargs):
'''
Constructor for the Hebbian learning layer.
args:
output_dim - The shape of the output / activations computed by the layer.
lambda - A floating-point valued parameter governing the strength of the Hebbian learning activation.
eta - A floating-point valued parameter governing the Hebbian learning rate.
connectivity - A string which determines the way in which the neurons in this layer are connected to
the neurons in the previous layer.
'''
self.output_dim = output_dim
self.lmbda = lmbda
self.eta = eta
self.connectivity = connectivity
self.connectivity_prob = connectivity_prob
super(Hebbian, self).__init__(**kwargs)
def random_conn_init(self, shape, dtype=None):
A = np.random.normal(0, 1, shape)
A[self.B] = 0
return tf.constant(A, dtype=tf.float32)
def zero_init(self, shape, dtype=None):
return np.zeros(shape)
def build(self, input_shape):
# create weight variable for this layer according to user-specified initialization
if self.connectivity == 'random':
self.B = np.random.random(input_shape[0]) < self.connectivity_prob
elif self.connectivity == 'zero':
self.B = np.zeros(self.output_dim)
if self.connectivity == 'all':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer='uniform', trainable=False)
elif self.connectivity == 'random':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer=self.random_conn_init, trainable=False)
elif self.connectivity == 'zero':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer=self.zero_init, trainable=False)
else:
raise NotImplementedError
# call superclass "build" function
super(Hebbian, self).build(input_shape)
def call(self, x): # x is the input to the network
x_shape = tf.shape(x)
batch_size = tf.shape(x)[0]
# reshape to (batch_size, product of other dimensions) shape
x = tf.reshape(x, (tf.reduce_prod(x_shape[1:]), batch_size))
# compute activations using Hebbian-like update rule
activations = x + self.lmbda * tf.matmul(self.kernel, x)
# compute outer product of activations matrix with itself
outer_product = tf.matmul(tf.expand_dims(x, 1), tf.expand_dims(x, 0))
# update the weight matrix of this layer
self.kernel = self.kernel + tf.multiply(self.eta, tf.reduce_mean(outer_product, axis=2))
self.kernel = tf.multiply(self.kernel, self.B)
return K.reshape(activations, x_shape)
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
If anyone comes here from Google (like me; repeatedly) trying to make a layer that learns online when called on new input, I just found this other question and I think it's relevant:
Persistent Variable in keras Custom Layer
Self.call is only called when you are defining the graph, for learning to happen on every new input you need to add self.add_update to the call function.
I have used Keras and TensorFlow to classify the Fashion MNIST following this tutorial .
It uses the AdamOptimizer to find the value for model parameters that minimize the loss function of the network. The input for the network is a 2-D tensor with shape [28, 28], and output is a 1-D tensor with shape [10] which is the result of a softmax function.
Once the network has been trained, I want to use the optimizer for another task: find an input that maximizes one of the elements of the output tensor. How can this be done? Is it possible to do so using Keras or one have to use a lower level API?
Since the input is not unique for a given output, it would be even better if we could impose some constraints on the values the input can take.
The trained model has the following format
model = keras.Sequential([
keras.layers.Flatten(input_shape=(28, 28)),
keras.layers.Dense(128, activation=tf.nn.relu),
keras.layers.Dense(10, activation=tf.nn.softmax)
])
I feel you would want to backprop with respect to the input freezing all the weights to your model. What you could do is:
Add a dense layer after the input layer with the same dimensions as input and set it as trainable
Freeze all the other layers of your model. (except the one you added)
As an input, feed an identity matrix and train your model based on whatever output you desire.
This article and this post might be able to help you if you want to backprop based on the input instead. It's a bit like what you are aiming for but you can get the intuition.
It would be very similar to the way that filters of a Convolutional Network is visualized: we would do gradient ascent optimization in input space to maximize the response of a particular filter.
Here is how to do it: after training is finished, first we need to specify the output and define a loss function that we want to maximize:
from keras import backend as K
output_class = 0 # the index of the output class we want to maximize
output = model.layers[-1].output
loss = K.mean(output[:,output_class]) # get the average activation of our desired class over the batch
Next, we need to take the gradient of the loss we have defined above with respect to the input layer:
grads = K.gradients(loss, model.input)[0] # the output of `gradients` is a list, just take the first (and only) element
grads = K.l2_normalize(grads) # normalize the gradients to help having an smooth optimization process
Next, we need to define a backend function that takes the initial input image and gives the values of loss and gradients as outputs, so that we can use it in the next step to implement the optimization process:
func = K.function([model.input], [loss, grads])
Finally, we implement the gradient ascent optimization process:
import numpy as np
input_img = np.random.random((1, 28, 28)) # define an initial random image
lr = 1. # learning rate used for gradient updates
max_iter = 50 # number of gradient updates iterations
for i in range(max_iter):
loss_val, grads_val = func([input_img])
input_img += grads_val * lr # update the image based on gradients
Note that, after this process is finished, to display the image you may need to make sure that all the values in the image are in the range [0, 255] (or [0,1]).
After the hints Saket Kumar Singh gave in his answer, I wrote the following that seems to solve the question.
I create two custom layers. Maybe Keras offers already some classes that are equivalent to them.
The first on is a trainable input:
class MyInputLayer(keras.layers.Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyInputLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name='kernel',
shape=self.output_dim,
initializer='uniform',
trainable=True)
super(MyInputLayer, self).build(input_shape)
def call(self, x):
return self.kernel
def compute_output_shape(self, input_shape):
return self.output_dim
The second one gets the probability of the label of interest:
class MySelectionLayer(keras.layers.Layer):
def __init__(self, position, **kwargs):
self.position = position
self.output_dim = 1
super(MySelectionLayer, self).__init__(**kwargs)
def build(self, input_shape):
super(MySelectionLayer, self).build(input_shape)
def call(self, x):
mask = np.array([False]*x.shape[-1])
mask[self.position] = True
return tf.boolean_mask(x, mask,axis=1)
def compute_output_shape(self, input_shape):
return self.output_dim
I used them in this way:
# Build the model
layer_flatten = keras.layers.Flatten(input_shape=(28, 28))
layerDense1 = keras.layers.Dense(128, activation=tf.nn.relu)
layerDense2 = keras.layers.Dense(10, activation=tf.nn.softmax)
model = keras.Sequential([
layer_flatten,
layerDense1,
layerDense2
])
# Compile the model
model.compile(optimizer=tf.train.AdamOptimizer(),
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
# Train the model
# ...
# Freeze the model
layerDense1.trainable = False
layerDense2.trainable = False
# Build another model
class_index = 7
layerInput = MyInputLayer((1,784))
layerSelection = MySelectionLayer(class_index)
model_extended = keras.Sequential([
layerInput,
layerDense1,
layerDense2,
layerSelection
])
# Compile it
model_extended.compile(optimizer=tf.train.AdamOptimizer(),
loss='mean_absolute_error')
# Train it
dummyInput = np.ones((1,1))
target = np.ones((1,1))
model_extended.fit(dummyInput, target,epochs=300)
# Retrieve the weights of layerInput
layerInput.get_weights()[0]
Interesting. Maybe a solution would be to feed all your data to the network and for each sample save the output_layer after softmax.
This way, for 3 classes, where you want to find the best input for class 1, you are looking for outputs where the first component is high. For example: [1 0 0]
Indeed the output means the probability, or the confidence of the network, for the sample being one of the classes.
Funny coincident I was just working on the same "problem". I'm interested in the direction of adversarial training etc. What I did was to insert a LocallyConnected2D Layer after the input and then train with data which is all one and has as targets the class of interest.
As model I use
batch_size = 64
num_classes = 10
epochs = 20
input_shape = (28, 28, 1)
inp = tf.keras.layers.Input(shape=input_shape)
conv1 = tf.keras.layers.Conv2D(32, kernel_size=(3, 3),activation='relu',kernel_initializer='he_normal')(inp)
pool1 = tf.keras.layers.MaxPool2D((2, 2))(conv1)
drop1 = tf.keras.layers.Dropout(0.20)(pool1)
flat = tf.keras.layers.Flatten()(drop1)
fc1 = tf.keras.layers.Dense(128, activation='relu')(flat)
norm1 = tf.keras.layers.BatchNormalization()(fc1)
dropfc1 = tf.keras.layers.Dropout(0.25)(norm1)
out = tf.keras.layers.Dense(num_classes, activation='softmax')(dropfc1)
model = tf.keras.models.Model(inputs = inp , outputs = out)
model.compile(loss=tf.keras.losses.categorical_crossentropy,
optimizer=tf.keras.optimizers.RMSprop(),
metrics=['accuracy'])
model.summary()
after training I insert the new layer
def insert_intermediate_layer_in_keras(model,position, before_layer_id):
layers = [l for l in model.layers]
if(before_layer_id==0) :
x = new_layer
else:
x = layers[0].output
for i in range(1, len(layers)):
if i == before_layer_id:
x = new_layer(x)
x = layers[i](x)
else:
x = layers[i](x)
new_model = tf.keras.models.Model(inputs=layers[0].input, outputs=x)
return new_model
def fix_model(model):
for l in model.layers:
l.trainable=False
fix_model(model)
new_layer = tf.keras.layers.LocallyConnected2D(1, kernel_size=(1, 1),
activation='linear',
kernel_initializer='he_normal',
use_bias=False)
new_model = insert_intermediate_layer_in_keras(model,new_layer,1)
new_model.compile(loss=tf.keras.losses.categorical_crossentropy,
optimizer=tf.keras.optimizers.RMSprop(),
metrics=['accuracy'])
and finally rerun training with my fake data.
X_fake = np.ones((60000,28,28,1))
print(Y_test.shape)
y_fake = np.ones((60000))
Y_fake = tf.keras.utils.to_categorical(y_fake, num_classes)
new_model.fit(X_fake, Y_fake, epochs=100)
weights = new_layer.get_weights()[0]
imshow(weights.reshape(28,28))
plt.show()
Results are not yet satisfying but I'm confident of the approach and guess I need to play around with the optimiser.
I am trying to implement a variational autoencoder design in tensorflow. This particular approach alters the KL divergence part of the loss to remove any dependence on x, (the input data that is). Instead they want to compare (q_phi(z) || p(z)) rather than (q_phi(z|x) || p(z)). I am taking it that I need to run a random normal or uniform vector of the right shape through the trained network, but am having a hard time figuring out how to input it. Here is what I have so far:
class EncoderRNN(object):
def __init__(self, x, n_steps, input_size, output_size,
cell_size, num_layers, default_batch_size):
# ### Data Input ###
self.x = x
# self.init_state = init_state
self.n_steps = n_steps
self.input_size = input_size
self.output_size = output_size
self.cell_size = cell_size
self.num_layers = num_layers
self.default_batch_size = default_batch_size
...
class VAE(object):
def __init__(self, time_steps, features, latent_dim, encoder_hidden,
decoder_hidden, num_layers, default_batch_size,
learning_rate):
self.time_steps = time_steps
self.features = features
self.latent_dim = latent_dim
self.encoder_hidden = encoder_hidden
self.decoder_hidden = decoder_hidden
self.num_layers = num_layers
self.default_batch_size = default_batch_size
self.LR = learning_rate
with tf.name_scope('train_inputs'):
# ### Multi-D timeseries input ###
self.x = tf.placeholder("float", [None, self.time_steps,
self.features])
### 1 D timeseries output ###
self.x_target = tf.placeholder("float", [None, self.time_steps, 1])
self.batch_size = tf.placeholder(dtype=tf.int32)
...
Here is what I am trying to figure out:
def build_codec(self):
self.encoder = EncoderRNN(self.x,
self.time_steps,
self.features,
self.latent_dim,
self.encoder_hidden,
self.num_layers,
self.default_batch_size)
# ### Output tensors from encoder ###
self.mu = self.encoder.mu
self.logvar = self.encoder.logvar
epsilon = tf.random_normal(tf.shape(self.logvar), name='epsilon')
std_encoder = tf.exp(0.5 * self.logvar)
self.z = self.mu + tf.multiply(std_encoder, epsilon)
# Now I want to do this:
x_rand = tf.random_normal(tf.shape(self.x))
q_z = self.encoder(x_rand)
I am not quite sure how to get x_rand through the encoder graph.
Hoping that I understand the difficulty correctly, there are a couple of way:
First is to realize that TensorFlow allows you to feed any tensor, not just a placeholder. So, you can just do sess.run(..., feed_dict={self.x: x_rand}) to feed your random value into the self.x tensor.
Second, if you want to produce your random data with a Dataset pipeline. This can be useful if creating the input takes a while and you want to do it in parallel with sess.run() call. Then, you would instantiate your model twice. Once with regular self.x input and once with input tensor from Dataset iterator. Both models would share the variables (you would need to create them in a variable scope set to share variables) but the operations (nodes in the computation graph) would be different. You can then call sess.run() on any of these two models whenever you want.