I'm very new to Keras and I'm writing a custom layer which implements Gaussian function [exp(-(w*x-mean)^2/sigma^2) where W, mean, sigma are all randomly generated].
Below is code for the custom layer:
class Gaussian(Layer):
def __init__(self,**kwargs):
super(Gaussian, self).__init__(**kwargs)
def build(self, input_shape):
# Create trainable weights for this layer.
self.W_init = np.random.rand(1,input_shape[1])
self.W = K.variable(self.W_init, name="W")
# Create trainable means for this layer.
self.mean_init = np.random.rand(1,input_shape[1])
self.mean = K.variable(self.mean_init, name="mean")
# Create trainable sigmas for this layer.
self.sigma_init = np.random.rand(1,input_shape[1])
self.sigma = K.variable(self.sigma_init, name="sigma")
self.trainable_weights = [self.mean, self.sigma]
super(Gaussian, self).build(input_shape) # Be sure to call this somewhere!
def call(self, x):
result = tf.multiply(x, self.W)
result = tf.subtract(x, self.mean)
result = tf.multiply(tf.square(result),-1)
result = tf.divide(result, tf.square(self.sigma))
return result
def compute_output_shape(self, input_shape):
return input_shape
After putting it as the first layer in a Keras mnist tutorial(just wanted to make sure it runs without producing errors, didn't care for accuracy) and training the model, it appeared that the loss stopped decreasing after around 4 epochs and only the numbers of "mean" and "sigma" changed after training while the numbers of "W" remains the same. However, this doesn't happen if I put it as the second layer.
I ran the Keras mnist tutorial again without the custom layer and found out that the weights of the first layer didn't change either.
Is not updating the weights of first layer(more specifically the very first parameter) a Keras thing or am I missing something? Can I force it to update?
Thank you!
You are not implementing your layer correctly, Keras is not aware of your weights, that means they are not being trained by gradient descent. Take a look at this example:
from keras import backend as K
from keras.engine.topology import Layer
import numpy as np
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
# Create a trainable weight variable for this layer.
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[1], self.output_dim),
initializer='uniform',
trainable=True)
super(MyLayer, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
return K.dot(x, self.kernel)
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
Here you have to use add_weight to obtain a trainable weight, not just use K.variable as you are currently doing. This way your weights will be registered with Keras and they will be trained properly. You should do this for all trainable parameters in your layer.
Related
I want to get the weights of my custom layer, but I couldn't get them by model.layer().get_weights()[X].
So I checked the layers of the model, it seems that the custom layer is decomposed into several operations and no weights can be found in these layers.
Here is the custom layer code
class PixelBaseConv(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(PixelBaseConv, self).__init__(**kwargs)
def build(self, input_shape):
# kernel_shape: w*h*c*output_dim
kernel_size = input_shape[1:]
kernel_shape = (1,) + kernel_size + (self.output_dim, )
self.kernel = self.add_weight(name='kernel',
shape=kernel_shape,
initializer='uniform',
trainable=True)
super(PixelBaseConv, self).build(input_shape)
def call(self, inputs):
# output_shape: w*h*output_dim
outputs = []
inputs = K.cast(inputs, dtype="float32")
for i in range(self.output_dim):
#output = tf.keras.layers.Multiply()([inputs, self.kernel[..., i]])
output = inputs*self.kernel[...,i]
output = K.sum(output, axis=-1)
if len(outputs) != 0:
outputs = np.dstack([outputs, output])
else:
outputs = output[..., np.newaxis]
return tf.convert_to_tensor(outputs)
def compute_output_shape(self, input_shape):
return input_shape + (self.output_dim, )
Here is part of the model structure
enter image description here
I tried different ways to obtain the weights but due to the strange layers, failed.
Expected: the first five layers are replaced with single layer which has a trainable kernel. Weights can be get directly by get_weights()
I listed weight list length of the first 10 layers and printed weight of layer 1 by following codes
for i in range(len(model.layers)):
print("layer " + str(i), len(model.layers[i].get_weights()))
print(model.layers[1].get_weights()[0])
and got the result and error
enter image description here
enter image description here
I found why this problem occurred.
I wrote the custom layer by
import tensorflow.python.keras
while using other keras layers and creating the model by
import tensorflow.keras
I think these two libraries may not be compatible, so my custom layer was splitted into several operation layers. Thus, weights cannot be obtained and updated.
I changed all imports to tensorflow.keras, now everything goes well.
I was learning making custom layers in tensor flow but could not find out how to add trainable weights for example
class Linear(layers.Layer):
def __init__(self, units = 32, **kwargs):
super().__init__(kwargs)
self.units = units
def build(self, input_shape):
self.layer = layers.Dense(self.units, trainable= True)
super().build(input_shape)
def call(self, inputs):
return self.layer(inputs)
Now if I do
linear_layer = Linear(8)
x = tf.ones(shape =(4,3))
y = linear_layer(x)
print(linear_layer.trainable_variables)
I get an empty matrix and thus during gradient calculation I get no gradients, my question is how to create custom layers in a way that default keras layers are also trainable in that. One more thing if I do linear_layer.weights then it give me the weights, it means there is some problem with trainable weights.
My mind is stuck on that
To get trainable variables you have to access the "layer" attribute of your custom layer:
linear_layer = Linear(8)
x = tf.ones(shape =(4,3))
y = linear_layer(x)
print(linear_layer.layer.trainable_variables)
note that you just create a pre_built layer (Dense) in the build method instead of create the weights of your custom layer. look at link https://www.tensorflow.org/tutorials/customization/custom_layers
So I'm implementing Center loss: https://ydwen.github.io/papers/WenECCV16.pdf and I am having problem with updating weights in my layer, which here means updating centers in Center loss. When I print my class_centers like this tf.print(self.class_centers, summarize=-1, output_stream='file:///tensors.txt') than they never change. When I print other Variables they seem fine, so the only problem I can think of is that add_update() doesn't do what it should do.
The custom layer:
class CenterLossLayer(Layer):
def __init__(self, alpha=0.5, **kwargs):
self.alpha = alpha
super(CenterLossLayer, self).__init__(**kwargs)
def build(self, input_shape):
print('Center loss input 1 (feature_size): ', input_shape[0][1])
print('Center loss input 2 (num_classes): ', input_shape[1][1])
self.class_centers = self.add_weight(name='class_centers',
shape=(input_shape[1][1], input_shape[0][1]),
initializer='uniform',
trainable=False)
super(CenterLossLayer, self).build(input_shape)
def call(self, x, mask=None):
embeddings, one_hots = x
tf.print(self.class_centers, summarize=-1, output_stream='file:///tensors.txt')
batch_centers = K.dot(one_hots, self.class_centers)
batch_delta = batch_centers - embeddings
class_delta = K.dot(K.transpose(one_hots), batch_delta)
counts = K.sum(K.transpose(one_hots), axis=1, keepdims=True) + 1
class_delta = class_delta / counts
class_delta = K.in_train_phase(self.alpha * class_delta, 0 * class_delta)
updated_class_centers = self.class_centers - class_delta
self.add_update((self.class_centers, updated_class_centers), x[0])
losses = K.sum(K.square(embeddings - batch_centers), axis=1, keepdims=True)
return losses
def compute_output_shape(self, input_shape):
return (input_shape[1][0], )
and the final loss is:
def batch_mean_loss(y_true, y_pred):
return K.mean(y_pred, axis=0)
where y_pred is losses from CenterLossLayer.
The weird thing is that even thought the centers are not updating, the center loss is going down with each epoch and the final model is better that the one trained only with Softmax loss.
So I checked out how add_update() is used in BatchNormalization layer:
self.add_update([K.moving_average_update(self.moving_mean,
mean,
self.momentum),
K.moving_average_update(self.moving_variance,
variance,
self.momentum)],
inputs)
The thing is that the first argument of method add_update() is "updates: Update op" and moving_average_update() returns "An operation to update the variable.". So I guess that add_update() requires some sort of operation and moving_average_update() returns that. I don't know how to create this operation, so instead I did:
self.add_update(K.moving_average_update(self.class_centers, updated_class_centers, 0.0), x)
so it functions as just replacing self.class_centers with updated_class_centers and it works.
Even thought it works, I would appreciate if anyone knows how to do this properly.
Looks like you should do something like this:
class ComputeSum(keras.layers.Layer):
def __init__(self, input_dim):
super(ComputeSum, self).__init__()
self.total = tf.Variable(initial_value=tf.zeros((input_dim,)), trainable=False)
def call(self, inputs):
self.total.assign_add(tf.reduce_sum(inputs, axis=0))
return self.total
Snippet got from https://keras.io/guides/making_new_layers_and_models_via_subclassing/#layers-can-have-nontrainable-weighto
Note: I posted about this issue already here. I'm creating a new question because:
1. I think the issue specifically relates to reshaping my mask within my custom layer, but I'm not sure enough of that to completely ignore the other error I wrote about in the original post.
2. There are many posts about reshaping Keras layers or adding Masking layers, but I couldn't find any about reshaping a mask within a layer, so I hope this post can be useful more generally.
The issue:
I have a custom Keras layer that takes 2D input and returns 3D output (batch_size, max_length, 1024), which is passed on to a BiLSTM followed by a CRF.
The custom Keras layer is copied from this repository. The difference is I take the 'elmo' instead of 'default' outputs from the Elmo model, so that the output is 3D as required by the BiLSTM:
result = self.elmo(K.squeeze(K.cast(x, tf.string), axis=1),
as_dict=True,
signature='default',
)['elmo'] # The original code used 'default'
However the compute_mask function isn't appropriate for my architecture, as it's output is 2D. Thus I get the error:
InvalidArgumentError: Incompatible shapes: [32,47] vs. [32,0] [[{{node loss/crf_1_loss/mul_6}}]]
where 32 is batch size and 47 is one less than my specified max_length.
I'm sure I need to reshape the mask, but I couldn't find out anywhere how.
Happy to make a git repo with the whole thing and/or full stack trace if need be.
Custom ELMo Layer:
class ElmoEmbeddingLayer(Layer):
def __init__(self, **kwargs):
self.dimensions = 1024
self.trainable = True
super(ElmoEmbeddingLayer, self).__init__(**kwargs)
def build(self, input_shape):
self.elmo = hub.Module('https://tfhub.dev/google/elmo/2', trainable=self.trainable, name="{}_module".format(self.name))
self.trainable_weights += K.tf.trainable_variables(scope="^{}_module/.*".format(self.name))
super(ElmoEmbeddingLayer, self).build(input_shape)
def call(self, x, mask=None):
result = self.elmo(K.squeeze(K.cast(x, tf.string), axis=1),
as_dict=True, signature='default',)['elmo']
return result
# Original compute_mask function. Raises;
# InvalidArgumentError: Incompatible shapes: [32,47] vs. [32,0] [[{{node loss/crf_1_loss/mul_6}}]]
def compute_mask(self, inputs, mask=None):
return K.not_equal(inputs, '__PAD__')
def compute_output_shape(self, input_shape):
return input_shape[0], 48, self.dimensions
The model is built as follows:
def build_model(): # uses crf from keras_contrib
input = layers.Input(shape=(1,), dtype=tf.string)
model = ElmoEmbeddingLayer(name='ElmoEmbeddingLayer')(input)
model = Bidirectional(LSTM(units=512, return_sequences=True))(model)
crf = CRF(num_tags)
out = crf(model)
model = Model(input, out)
model.compile(optimizer="rmsprop", loss=crf_loss, metrics=[crf_accuracy, categorical_accuracy, mean_squared_error])
model.summary()
return model
I'm trying to implement an unsupervised ANN using Hebbian updating in Keras. I found a custom Hebbian layer made by Dan Saunders here - https://github.com/djsaunde/rinns_python/blob/master/hebbian/hebbian.py
(I hope it is not poor form to ask questions about another person's code here)
In the examples I found using this layer in the repo, this layer is used as an intermediate layer between Dense/Conv layers, but I would like to construct a network using only Hebbian layers.
Two critical things are confusing me in this implementation:
It seems as though input dims and output dims must be the same for this layer to work. Why would this be the case and what can I do to make it so they can be different?
Why is the diagonal of the weight matrix set to zero? It says this is to "ensure that no neuron is laterally connected to itself", but I thought the connection weights were between the previous layer and the current layer, not the current layer and itself.
Here is the code for the Hebbian Layer Implementation:
from keras import backend as K
from keras.engine.topology import Layer
import numpy as np
import tensorflow as tf
np.set_printoptions(threshold=np.nan)
sess = tf.Session()
class Hebbian(Layer):
def __init__(self, output_dim, lmbda=1.0, eta=0.0005, connectivity='random', connectivity_prob=0.25, **kwargs):
'''
Constructor for the Hebbian learning layer.
args:
output_dim - The shape of the output / activations computed by the layer.
lambda - A floating-point valued parameter governing the strength of the Hebbian learning activation.
eta - A floating-point valued parameter governing the Hebbian learning rate.
connectivity - A string which determines the way in which the neurons in this layer are connected to
the neurons in the previous layer.
'''
self.output_dim = output_dim
self.lmbda = lmbda
self.eta = eta
self.connectivity = connectivity
self.connectivity_prob = connectivity_prob
if self.connectivity == 'random':
self.B = np.random.random(self.output_dim) < self.connectivity_prob
elif self.connectivity == 'zero':
self.B = np.zeros(self.output_dim)
super(Hebbian, self).__init__(**kwargs)
def random_conn_init(self, shape, dtype=None):
A = np.random.normal(0, 1, shape)
A[self.B] = 0
return tf.constant(A, dtype=tf.float32)
def zero_init(self, shape, dtype=None):
return np.zeros(shape)
def build(self, input_shape):
# create weight variable for this layer according to user-specified initialization
if self.connectivity == 'all':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer='uniform', trainable=False)
elif self.connectivity == 'random':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer=self.random_conn_init, trainable=False)
elif self.connectivity == 'zero':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer=self.zero_init, trainable=False)
else:
raise NotImplementedError
# ensure that no neuron is laterally connected to itself
self.kernel = self.kernel * tf.diag(tf.zeros(self.output_dim))
# call superclass "build" function
super(Hebbian, self).build(input_shape)
def call(self, x):
x_shape = tf.shape(x)
batch_size = tf.shape(x)[0]
# reshape to (batch_size, product of other dimensions) shape
x = tf.reshape(x, (tf.reduce_prod(x_shape[1:]), batch_size))
# compute activations using Hebbian-like update rule
activations = x + self.lmbda * tf.matmul(self.kernel, x)
# compute outer product of activations matrix with itself
outer_product = tf.matmul(tf.expand_dims(x, 1), tf.expand_dims(x, 0))
# update the weight matrix of this layer
self.kernel = self.kernel + tf.multiply(self.eta, tf.reduce_mean(outer_product, axis=2))
self.kernel = tf.multiply(self.kernel, self.B)
self.kernel = self.kernel * tf.diag(tf.zeros(self.output_dim))
return K.reshape(activations, x_shape)
At first inspection I expected this layer to be able to take inputs from a previous layer, perform a simple activation calculation (input * weight), update the weights according to Hebbian updating (something like - if activation is high b/t nodes, increase weight), then pass the activations to the next layer.
I also expected that it would be able to deal with decreasing/increasing the number of nodes from one layer to the next.
Instead, I cannot seem to figure out why the input and output dims must be the same and why the diagonals of the weight matrix are set to zero.
Where in the code (implicitly or explicitly) is the specification that the layers need to be the same dims?
Where in the code (implicitly or explicitly) is the specification that this layer's weight matrix is connecting the current layer to itself?
Apologies if this Q should have been separated into 2, but it seems like they may be related to e/o so I kept them as 1.
Happy to provide more details if needed.
Edit: Realized I forgot to add the error message I get when I try to create a layer with different output dims than the input dims:
model = Sequential()
model.add(Hebbian(input_shape = (256,1), output_dim = 256))
This compiles w/o error ^
model = Sequential()
model.add(Hebbian(input_shape = (256,1), output_dim = 24))
This ^ throws the error:
IndexError: boolean index did not match indexed array along dimension 0; dimension is 256 but corresponding boolean dimension is 24
Okay I think I maybe figured it out, sort of. There were many small problems but the biggest thing was I needed to add the compute_output_shape function which makes the layer able to modify the shape of its input as explained here:
https://keras.io/layers/writing-your-own-keras-layers/
So here is the code with all the changes I made. It will compile and modify the input shape just fine. Note that this layer computes weight changes inside the layer itself and there may be some issues with that if you try to actually use the layer (I'm still ironing these out), but this is a separate issue.
class Hebbian(Layer):
def __init__(self, output_dim, lmbda=1.0, eta=0.0005, connectivity='random', connectivity_prob=0.25, **kwargs):
'''
Constructor for the Hebbian learning layer.
args:
output_dim - The shape of the output / activations computed by the layer.
lambda - A floating-point valued parameter governing the strength of the Hebbian learning activation.
eta - A floating-point valued parameter governing the Hebbian learning rate.
connectivity - A string which determines the way in which the neurons in this layer are connected to
the neurons in the previous layer.
'''
self.output_dim = output_dim
self.lmbda = lmbda
self.eta = eta
self.connectivity = connectivity
self.connectivity_prob = connectivity_prob
super(Hebbian, self).__init__(**kwargs)
def random_conn_init(self, shape, dtype=None):
A = np.random.normal(0, 1, shape)
A[self.B] = 0
return tf.constant(A, dtype=tf.float32)
def zero_init(self, shape, dtype=None):
return np.zeros(shape)
def build(self, input_shape):
# create weight variable for this layer according to user-specified initialization
if self.connectivity == 'random':
self.B = np.random.random(input_shape[0]) < self.connectivity_prob
elif self.connectivity == 'zero':
self.B = np.zeros(self.output_dim)
if self.connectivity == 'all':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer='uniform', trainable=False)
elif self.connectivity == 'random':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer=self.random_conn_init, trainable=False)
elif self.connectivity == 'zero':
self.kernel = self.add_weight(name='kernel', shape=(np.prod(input_shape[1:]), \
np.prod(self.output_dim)), initializer=self.zero_init, trainable=False)
else:
raise NotImplementedError
# call superclass "build" function
super(Hebbian, self).build(input_shape)
def call(self, x): # x is the input to the network
x_shape = tf.shape(x)
batch_size = tf.shape(x)[0]
# reshape to (batch_size, product of other dimensions) shape
x = tf.reshape(x, (tf.reduce_prod(x_shape[1:]), batch_size))
# compute activations using Hebbian-like update rule
activations = x + self.lmbda * tf.matmul(self.kernel, x)
# compute outer product of activations matrix with itself
outer_product = tf.matmul(tf.expand_dims(x, 1), tf.expand_dims(x, 0))
# update the weight matrix of this layer
self.kernel = self.kernel + tf.multiply(self.eta, tf.reduce_mean(outer_product, axis=2))
self.kernel = tf.multiply(self.kernel, self.B)
return K.reshape(activations, x_shape)
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
If anyone comes here from Google (like me; repeatedly) trying to make a layer that learns online when called on new input, I just found this other question and I think it's relevant:
Persistent Variable in keras Custom Layer
Self.call is only called when you are defining the graph, for learning to happen on every new input you need to add self.add_update to the call function.