I am trying to access the weights of a Keras layer and use the weight values themselves as the input to a different layer.
Here's a rough outline of what I'm hoping to achieve:
def generate_myModel(SEQUENCE_LENGTH, FILT_NUM, FILT_SIZE):
ip = keras.layers.Input(shape = (SEQUENCE_LENGTH,1))
conv_layer = keras.layers.Conv1D(filters = FILT_NUM, kernel_size = FILT_SIZE)
y = conv_layer(ip)
y = keras.layers.GlobalMaxPooling1D()(y)
out_y = keras.layers.Dense(units = 1, activation = 'linear')(y)
# Acquire the actual weights from the previous convolution layers
w1 = <WEIGHTS FROM conv_layer - THIS IS THE PART IN QUESTION>
out_w1 = keras.layers.Lambda( lambda x: K.std(x)/K.abs(K.mean(x)) )(w1)
myModel = keras.models.Model(inputs = ip, outputs = [out_y, out_w1])
return myModel
I'm aware that, once you have an instantiated model, you can use model.layers[i].get_weights(), but I'd like to be able to do this inside the actual architecture of the model.
Is this possible?
EDIT------------------------------------
Attempting the solution from the comments, I add layer.get_weights() into the model's architecture as so:
def generate_myModel(SEQUENCE_LENGTH, FILT_NUM, FILT_SIZE):
ip = keras.layers.Input(shape = (SEQUENCE_LENGTH,1))
conv_layer = keras.layers.Conv1D(filters = FILT_NUM, kernel_size = FILT_SIZE)
y = conv_layer(ip)
# Acquire the actual weights from the previous convolution layers
w1 = K.constant(conv_layer.get_weights()) # layer.get_weights returns a Numpy
# array, I need a Keras Tensor -
# so I use K.constant()
y = keras.layers.GlobalMaxPooling1D()(y)
out_y = keras.layers.Dense(units = 1, activation = 'linear')(y)
out_w1 = keras.layers.Lambda( lambda x: K.std(x)/K.abs(K.mean(x)) )(w1)
myModel = keras.models.Model(inputs = ip, outputs = [out_y, out_w1])
return myModel
but this leaves me with the following error:
AttributeError: 'NoneType' object has no attribute '_inbound_nodes'
Any guidance would be greatly appreciated!
Related
I have a neural network in pytorch and make each layer automatically via the following structure:
class FCN(nn.Module):
##Neural Network
def __init__(self,layers):
super().__init__() #call __init__ from parent class
self.activation = nn.Tanh()
self.loss_function = nn.MSELoss(reduction ='mean')
'Initialise neural network as a list using nn.Modulelist'
self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)])
self.iter = 0
'Xavier Normal Initialization'
for i in range(len(layers)-1):
nn.init.xavier_normal_(self.linears[i].weight.data, gain=1.0)
nn.init.zeros_(self.linears[i].bias.data)
'foward pass'
def forward(self, x):
if torch.is_tensor(x) != True:
x = torch.from_numpy(x)
a = x.float()
for i in range(len(layers)-2):
z = self.linears[i](a)
a = self.activation(z)
a = self.linears[-1](a)
return a
The following code also makes the network for me:
layers = np.array([2, 50, 50, 1])
model = FCN(layers)
Now, I am wondering how I can automatically add dropout layers to the network. I tried the following change in the network structure but it only gives me one dropout layer at the end:
self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1) + nn.Dropout(p=0.5)]
I very much appreciate any help in this regard.
If you can add a dropout layer by "adding it" with + as you do (I havent seen that, but if it works that is dope!) you should just move the + DropOut before the range I assume i.e
self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1])+ nn.Dropout(p=0.5) for i in range(len(layers)-1) ]
EDIT
As expected you can't add it like that.
What you would do is to add a list with dropout-layers in the same way you do linear-layers, which you then use in your forward pass.
Below is an example; it might need to be tweaked to match your inputs etc
class FCN(nn.Module):
## Neural Network
def __init__(self,layers):
super().__init__()
self.activation = nn.Tanh()
self.loss_function = nn.MSELoss(reduction ='mean')
'Initialise neural network as a list using nn.Modulelist'
self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)])
self.dropout_layers = [nn.Dropout(p=0.5) for _ in range(len(layers)-1)]
self.iter = 0
'Xavier Normal Initialization'
for i in range(len(layers)-1):
nn.init.xavier_normal_(self.linears[i].weight.data, gain=1.0)
nn.init.zeros_(self.linears[i].bias.data)
def forward(self,x):
for layer,dropout in zip(self.linears,self.dropout_layers):
x = layer(x)
x = dropout(x)
return x
I think you should rewrite your code, nn.Sequential() might be the best tool to use.
class FCN(nn.Module):
"""Neural Network"""
def __init__(self, layers, drop_p=0.5):
super().__init__() # call __init__ from parent class
self.loss_function = nn.MSELoss(reduction='mean')
# Use nn.Sequential to create the neural network
# Here you need a list of Module that you want to use
module_list = [[nn.Linear(layers[i], layers[i + 1]), nn.Dropout(p=drop_p), nn.Tanh()] for i in range(len(layers) - 2)]
self.linears = nn.Sequential(*[item for sublist in module_list for item in sublist])
self.last_layer = nn.Linear(layers[-2], layers[-1])
self.iter = 0
# Xavier Normal Initialization
self.linears.apply(self.weights_init)
self.last_layer.apply(self.weights_init)
#staticmethod
def weights_init(m):
# Xavier Normal Initialization
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_uniform_(m.weight)
torch.nn.init.zero_(m.bias)
# Forward pass
def forward(self, x):
if not torch.is_tensor(x):
x = torch.from_numpy(x)
x = x.float()
x = self.linears(x)
x = self.last_layer(x)
return x
Something like this works fine, you can test it out with a dummy tensor like:
if __name__ == "__main__":
layers_size = np.array([2, 50, 50, 1])
model = FCN(layers_size)
t = torch.rand((2,))
output = model(t)
print(output)
>>> tensor([-0.0045], grad_fn=<AddBackward0>)
For the flattening operation of the module_list variable you can check here.
I have already trained a model with this structure using first order optimization. The code is below:
def build_model():
input_layer1 = Input((dataX.shape[1],))
input_layer2 = Input((dataT1.shape[1],))
input_layer3 = Input((dataT2.shape[1],))
input_layer4 = Input((dataT3.shape[1],))
input_layer5 = Input((dataT4.shape[1],))
#continuous layers
hidden_1 = K.layers.Dense(64, activation='tanh')(input_layer1)
hidden_2 = K.layers.Dense(64, activation='tanh')(hidden_1)
hidden_3 = K.layers.Dense(64, activation='tanh')(hidden_2)
hidden_4 = K.layers.Dense(64, activation='tanh')(hidden_3)
#categorical layers and merging
hidden_5 = K.layers.Dense(64, activation='tanh')(input_layer2)
hidden_6 = K.layers.Dense(64, activation='tanh')(input_layer3)
hidden_7 = K.layers.Dense(64, activation='tanh')(input_layer4)
hidden_8 = K.layers.Dense(64, activation='tanh')(input_layer5)
merged1 = merge.concatenate([hidden_5, hidden_6])
merged2 = merge.concatenate([hidden_7, hidden_8])
merged3 = merge.concatenate([merged1, merged2])
hidden_9 = K.layers.Dense(64, activation='tanh')(merged3)
hidden_10 = K.layers.Dense(32, activation='tanh')(hidden_9)
hidden_11 = K.layers.Dense(16, activation='tanh')(hidden_10)
hidden_12 = K.layers.Dense(8, activation='tanh')(hidden_11)
hidden_13 = K.layers.Dense(4, activation='tanh')(hidden_12)
hidden_14 = K.layers.Dense(2, activation='tanh',name='2D')(hidden_13)
#completely merged kayers
merged_layers = merge.concatenate([hidden_4, hidden_14])
merged=K.layers.Dense(64, activation='tanh')(merged_layers)
merged1=K.layers.Dense(64, activation='tanh')(merged)
merged2=K.layers.Dense(64, activation='tanh')(merged1)
merged3=K.layers.Dense(64, activation='tanh')(merged2)
output=K.layers.Dense(1, activation='tanh')(merged3)
model = Model(inputs=[input_layer1,input_layer2,input_layer3,input_layer4,input_layer5], outputs=output)
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return model
The optimizers offered by TensorFlow are all first order. I want to use second-order optimization and was wondering how I would do this? All the examples I have looked at describe a way for sequential models with second order optimization such as L-BFGS but not functional models.
so i am currently implementing the model in the following paper https://openaccess.thecvf.com/content_cvpr_2018/papers/Oh_Fast_Video_Object_CVPR_2018_paper.pdf
And as the following model show they used 2 resnet50 inside the model
Model's Image
Labeled as the siamese encoders
I used the resnet50 model provided by Keras with following code :
input_shape = (480,854,4)
inputlayer_Q = Input(shape=input_shape, name="inputlayer_Q")
convlayer_Q = Conv2D(filters= 3,kernel_size = (3,3),padding = 'same')(inputlayer_Q)
model_Q = tf.keras.applications.resnet50.ResNet50(
input_shape=(
convlayer_Q.shape[1],convlayer_Q.shape[2],convlayer_Q.shape[3]),
include_top=False,
weights='imagenet'
)
They then took 3 skip connections from layers inside the resnet model, I tried to takes the skip connections by using the following line
res2_skip = model_Q.layers[38].output
res3_skip = model_Q.layers[80].output
res4_skip = model_Q.layers[142].output
But when I use it in later in the model and try to run it give me Graph disconnected.
So is there any way to make skip connections/modify models provided by Keras ?
Try this:
input_shape = (480,854,4)
# Target Stream = Q
inputlayer_Q = Input(shape=input_shape, name="inputlayer_Q")
# Refrence Stream = M
inputlayer_M = Input(shape=input_shape,name="inputlayer_M")
convlayer_Q = Conv2D(filters= 3,kernel_size = (3,3),padding = 'same')(inputlayer_Q)
convlayer_M = Conv2D(filters= 3,kernel_size = (3,3),padding = 'same')(inputlayer_M)
model_Q = tf.keras.applications.resnet50.ResNet50(
input_shape=(convlayer_Q.shape[1],convlayer_Q.shape[2],convlayer_Q.shape[3]), include_top=False, weights='imagenet'
)
model_Q._name ="resnet50_Q"
model_M = tf.keras.applications.resnet50.ResNet50(
input_shape=(convlayer_M.shape[1],convlayer_M.shape[2],convlayer_M.shape[3]), include_top=False, weights='imagenet'
)
model_M._name ="resnet50_M"
for model in [model_Q, model_M]:
for layer in model.layers:
old_name = layer.name
layer._name = f"{model.name}_{old_name}"
print(layer._name)
encoder_Q = tf.keras.Model(inputs=model_Q.inputs, outputs=model_Q.output,name ="encoder_Q" )
encoder_M = tf.keras.Model(inputs=model_M.inputs, outputs=model_M.output,name ="encoder_M" )
concatenate = Concatenate(axis=0,name ="Concatenate")([encoder_Q.output, encoder_M.output])
global_layer = GlobalConvBlock(concatenate)
res2_skip = encoder_Q.layers[38].output
res2_skip = ZeroPadding2D(padding=(0,1), data_format=None)(res2_skip)
res3_skip = encoder_Q.layers[80].output
res3_skip = ZeroPadding2D(padding=((0,0),(0,1)), data_format=None)(res3_skip)
res4_skip = encoder_Q.layers[142].output
ref1_16 = refineblock(res4_skip,global_layer,"ref1_16")
ref1_8 = refineblock(res3_skip,ref1_16,"ref1_8")
ref1_4 = refineblock(res2_skip,ref1_8,"ref1_4")
outconv = Conv2D(filters= 2,kernel_size = (3,3)) (ref1_4)
outconv1 = ZeroPadding2D(padding=((1,1),(0,0)), data_format=None)(outconv)
output = Softmax()(outconv1)
main_model = tf.keras.Model(inputs=[encoder_Q.inputs, encoder_M.inputs],outputs=output, name ="main model" )
I am trying to concatenate all inputs but for some reason im always getting that error: Type error: module object not callable, can you please help me fix it? I tried to replace merge with Keras.layers.concatenate but it didn't work out.
def stack_latent_layers(n):
#Stack n bidi LSTMs
return lambda x: stack(x, [lambda : Bidirectional(LSTM(hidden_units,
return_sequences = True))] * n )
def predict_classes():
#Predict to the number of classes
#Named arguments are passed to the keras function
return lambda x: stack(x,
[lambda : TimeDistributed(Dense(output_dim = num_of_classes(),
activation = "softmax"))] +
[lambda : TimeDistributed(Dense(hidden_units,
activation='relu'))] * 3)
word_embedding_layer = emb.get_keras_embedding(
trainable = True,
input_length = sent_maxlen, name='word_embedding_layer')
pos_embedding_layer = Embedding(output_dim = pos_tag_embedding_size,
input_dim = len(SPACY_POS_TAGS),
input_length = sent_maxlen,
name='pos_embedding_layer')
latent_layers = stack_latent_layers(num_of_latent_layers)
dropout = Dropout(0.1)
predict_layer = predict_classes()
## --------> 8] Prepare input features, and indicate how to embed them
inputs_and_embeddings = [(Input(shape = (sent_maxlen,),
dtype="int32",
name = "word_inputs"),
word_embedding_layer),
(Input(shape = (sent_maxlen,),
dtype="int32",
name = "predicate_inputs"),
word_embedding_layer),
(Input(shape = (sent_maxlen,),
dtype="int32",
name = "postags_inputs"),
pos_embedding_layer),
]
print('inputs_and_embeddings',inputs_and_embeddings)
## --------> 9] Concat all inputs and run on deep network
output = predict_layer(dropout(latent_layers(merge([embed(inp)
for inp, embed in inputs_and_embeddings],
mode = "concat",
concat_axis = -1
))))
replaced merge with keras.layers.concatenate
This is part of my current python code for NN training in python using CNTK module
batch_axis = C.Axis.default_batch_axis()
input_seq_axis = C.Axis.default_dynamic_axis()
input_dynamic_axes = [batch_axis, input_seq_axis]
input_dynamic_axes2 = [batch_axis, input_seq_axis]
input = C.input_variable(n_ins, dynamic_axes=input_dynamic_axes, dtype=numpy.float32)
output = C.input_variable(n_outs, dynamic_axes=input_dynamic_axes2, dtype=numpy.float32)
dnn_model = cntk_model.create_model(input, hidden_layer_type, hidden_layer_size, n_outs)
loss = C.squared_error(dnn_model, output)
error = C.squared_error(dnn_model, output)
lr_schedule = C.learning_rate_schedule(current_finetune_lr, C.UnitType.minibatch)
momentum_schedule = C.momentum_schedule(current_momentum)
learner = C.adam(dnn_model.parameters, lr_schedule, momentum_schedule, unit_gain = False, l1_regularization_weight=l1_reg, l2_regularization_weight= l2_reg)
trainer = C.Trainer(dnn_model, (loss, error), [learner])
And here is code for creating NN model
def create_model(features, hidden_layer_type, hidden_layer_size, n_out):
logger.debug('Creating cntk model')
assert len(hidden_layer_size) == len(hidden_layer_type)
n_layers = len(hidden_layer_size)
my_layers = list()
for i in xrange(n_layers):
if(hidden_layer_type[i] == 'TANH'):
my_layers.append(C.layers.Dense(hidden_layer_size[i], activation=C.tanh, init=C.layers.glorot_uniform()))
elif (hidden_layer_type[i] == 'LSTM'):
my_layers.append(C.layers.Recurrence(C.layers.LSTM(hidden_layer_size[i])))
else:
raise Exception('Unknown hidden layer type')
my_layers.append(C.layers.Dense(n_out, activation=None))
my_model = C.layers.Sequential([my_layers])
my_model = my_model(features)
return my_model
Now, I would like to change a backpropagation, so when the error is calculated not direct network output is used, but the output after some additional calculation. I tried to define something like this
def create_error_function(self, prediction, target):
prediction_denorm = C.element_times(prediction, self.std_vector)
prediction_denorm = C.plus(prediction_denorm, self.mean_vector)
prediction_denorm_rounded = C.round(C.element_times(prediction_denorm[0:5], C.round(prediction_denorm[5])))
prediction_denorm_rounded = C.element_divide(prediction_denorm_rounded, C.round(prediction_denorm[5]))
prediction_norm = C.minus(prediction_denorm_rounded, self.mean_vector[0:5])
prediction_norm = C.element_divide(prediction_norm, self.std_vector[0:5])
first = C.squared_error(prediction_norm, target[0:5])
second = C.minus(C.round(prediction_denorm[5]), self.mean_vector[5])
second = C.element_divide(second, self.std_vector[5])
return C.plus(first, C.squared_error(second, target[5]))
and use it instead standard squared_error.
And the part for NN training
dnn_model = cntk_model.create_model(input, hidden_layer_type, hidden_layer_size, n_outs)
error_function = cntk_model.ErrorFunction(cmp_mean_vector, cmp_std_vector)
loss = error_function.create_error_function(dnn_model, output)
error = error_function.create_error_function(dnn_model, output)
lr_schedule = C.learning_rate_schedule(current_finetune_lr, C.UnitType.minibatch)
momentum_schedule = C.momentum_schedule(current_momentum)
learner = C.adam(dnn_model.parameters, lr_schedule, momentum_schedule, unit_gain = False, l1_regularization_weight=l1_reg,
l2_regularization_weight= l2_reg)
trainer = C.Trainer(dnn_model, (loss, error), [learner])
trainer.train_minibatch({input: temp_train_x, output: temp_train_y})
But after two epochs I start gettting always the same average loss, as my network is not learning
Every time you want to change how backprop works, you need to use stop_gradient. This is the only function whose gradient is different from the gradient of the operation of the forward pass. In the forward pass stop_gradient acts as identity. In the backward pass it blocks the gradient from propagating.
To do an operation f(x) on some x in the forward pass and pretend as if it never happened in the backward pass you need to do something like:
C.stop_gradient(f(x) - x) + x. In your case that would be
norm_features = C.stop_gradient(features/normalization - features) + features