I'm building my own layer in Tensorflow 2.1 and using it in custom model. However when I'm trying to learn something, the layer is trying to build itself when called for the first time, and it needs input_shape to do it. As far as I know, it should compute it because it's getting an actual input, but it seems that input_size is None.
My question is: what I did wrong and how to correct that?
Below I'm attaching an example to reproduce the problem.
My code (MinimalRNNCell is copied from tensorflow website https://www.tensorflow.org/api_docs/python/tf/keras/layers/RNN):
import tensorflow as tf
from tensorflow.keras.layers import Layer
from tensorflow.keras import Model
import numpy as np
class MinimalRNNCell(Layer):
def __init__(self, units, **kwargs):
self.units = units
self.state_size = units
super(MinimalRNNCell, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(shape=(input_shape[-1], self.units),
initializer='uniform',
name='kernel')
self.recurrent_kernel = self.add_weight(
shape=(self.units, self.units),
initializer='uniform',
name='recurrent_kernel')
self.built = True
def call(self, inputs, states):
prev_output = states[0]
h = K.dot(inputs, self.kernel)
output = h + K.dot(prev_output, self.recurrent_kernel)
return output, [output]
class RNNXModel(Model):
def __init__(self, size):
super(RNNXModel, self).__init__()
self.minimalrnn=MinimalRNNCell(size)
def call(self, inputs):
out=self.minimalrnn(input)
return out
x=np.array([[[1,2,3],[4,5,6],[7,8,9]],[[10,11,12],[13,14,15],[16,17,18]]])
y=np.array([[1,2,3],[10,11,12]])
model=RNNXModel(3)
model.compile(optimizer='sgd', loss='mse')
model.fit(x,y,epochs=10, batch_size=1)
Error I'm getting:
Traceback (most recent call last):
File "/home/.../test.py", line 64, in <module>
model.fit(x,y,epochs=10, batch_size=1)
File "/home/.../.venv/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py", line 819, in fit
use_multiprocessing=use_multiprocessing)
File "/home/.../.venv/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 235, in fit
use_multiprocessing=use_multiprocessing)
File "/home/.../.venv/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 593, in _process_training_inputs
use_multiprocessing=use_multiprocessing)
File "/home/.../.venv/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 646, in _process_inputs
x, y, sample_weight=sample_weights)
File "/home/.../.venv/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py", line 2346, in _standardize_user_data
all_inputs, y_input, dict_inputs = self._build_model_with_inputs(x, y)
File "/home/.../.venv/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py", line 2572, in _build_model_with_inputs
self._set_inputs(cast_inputs)
File "/home/.../.venv/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/training.py", line 2659, in _set_inputs
outputs = self(inputs, **kwargs)
File "/home/.../.venv/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py", line 773, in __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
File "/home/.../.venv/lib/python3.6/site-packages/tensorflow_core/python/autograph/impl/api.py", line 237, in wrapper
raise e.ag_error_metadata.to_exception(e)
TypeError: in converted code:
/home/.../test.py:36 call *
out=self.minimalrnn(input)
/home/.../.venv/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py:818 __call__
self._maybe_build(inputs)
/home/.../.venv/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py:2116 _maybe_build
self.build(input_shapes)
/home/.../test.py:14 build
self.kernel = self.add_weight(shape=(input_shape[-1], self.units),
TypeError: 'NoneType' object is not subscriptable
There is a typo (input should be inputs). input is a built-in function (documentation).
class RNNXModel(Model):
def __init__(self, size):
super(RNNXModel, self).__init__()
self.minimalrnn=MinimalRNNCell(size)
def call(self, inputs):
out=self.minimalrnn(inputs) # changed from `input`
return out
Related
self.model = DQNetwork(11, 256, 3)
class DQNetwork(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.linear1 = nn.Linear(input_size, hidden_size)
self.linear2 = nn.Linear(hidden_size, hidden_size)
self.linear3 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = F.relu(self.linear1(x))
x = F.relu(self.linear2(x))
x = self.linear3(x)
return x
Traceback (most recent call last):
File "E:/Work/Programming/PyArk/main.py", line 32, in <module>
agent.train()
File "E:\Work\Programming\PyArk\Agent\agent.py", line 31, in train
self.step(states, actions, rewards, next_states, dones)
File "E:\Work\Programming\PyArk\Agent\agent.py", line 20, in step
self._trainer.train(state, action, reward, next_state, done)
File "E:\Work\Programming\PyArk\Agent\DQN\dqn_trainer.py", line 32, in train
prediction = self.model(state)
File "E:\Work\Programming\PyArk\venv\lib\site-packages\torch\nn\modules\module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Work\Programming\PyArk\Agent\DQN\dqn_network.py", line 19, in forward
x = F.relu(self.linear1(x))
File "E:\Work\Programming\PyArk\venv\lib\site-packages\torch\nn\modules\module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "E:\Work\Programming\PyArk\venv\lib\site-packages\torch\nn\modules\linear.py", line 103, in forward
return F.linear(input, self.weight, self.bias)
File "E:\Work\Programming\PyArk\venv\lib\site-packages\torch\nn\functional.py", line 1848, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (11x5 and 11x256)
I don't understand why this error is popping out
I use the same code in other projects... what is going on..?
model( torch.zeros(11,5) ) --> model( torch.zeros(5,11) )
I was working on a model and I used this custom attention layer,
Note: Here's a colab sample notebook to reproduce similar error,
https://colab.research.google.com/drive/1RDcJwpVbT6JR8_LA52r1nHPSK0w1HuY7?usp=sharing
class AttentionWeightedAverage(Layer):
def __init__(self, return_attention=False, **kwargs):
self.init = initializers.get('uniform')
self.supports_masking = True
self.return_attention = return_attention
super(AttentionWeightedAverage, self).__init__(** kwargs)
def build(self, input_shape):
self.input_spec = [InputSpec(ndim=3)]
assert len(input_shape) == 3
self.w = self.add_weight(shape=(input_shape[2], 1),
name='{}_w'.format(self.name),
initializer=self.init, trainable=True)
super(AttentionWeightedAverage, self).build(input_shape)
def call(self, h, mask=None):
h_shape = K.shape(h)
d_w, T = h_shape[0], h_shape[1]
logits = K.dot(h, self.w) # w^T h
logits = K.reshape(logits, (d_w, T))
alpha = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # exp
# masked timesteps have zero weight
if mask is not None:
mask = K.cast(mask, K.floatx())
alpha = alpha * mask
alpha = alpha / (K.sum(alpha, axis=1, keepdims=True) + K.epsilon()) # softmax
r = K.sum(h * K.expand_dims(alpha), axis=1) # r = h*alpha^T
h_star = K.tanh(r) # h^* = tanh(r)
if self.return_attention:
return [h_star, alpha]
return h_star
def get_output_shape_for(self, input_shape):
return self.compute_output_shape(input_shape)
def compute_output_shape(self, input_shape):
output_len = input_shape[2]
if self.return_attention:
return [(input_shape[0], output_len), (input_shape[0], input_shape[1])]
return (input_shape[0], output_len)
def compute_mask(self, input, input_mask=None):
if isinstance(input_mask, list):
return [None] * len(input_mask)
else:
return None
and my model architecture is something like below,
dense()(x)
Bidirectional(lstm(return_sequences=True))(x)
attentionweightedaverage()(x)
dense(1, 'softmax')
After training for few epochs when I try to save my model and I'm getting this below error which I think is related to the custom attention layer I have used.
I couldn't figure it out. Any help is appreciated.
The below error only occurs when I try to save entire model using model.save and if I use model.save_weights() it works.
I'm using tensorflow 2.1.0
Here's the Traceback,
Traceback (most recent call last):
File "classifiers/main.py", line 26, in <module>
main()
File "classifiers/main.py", line 18, in main
clf.model.save(f'./classifiers/saved_models/{args.model_name}')
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\engine\network.p
signatures, options)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\save.py",
signatures, options)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
save_lib.save(model, filepath, signatures, options)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\saved_model\save.py",
checkpoint_graph_view)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\saved_model\signature_
functions = saveable_view.list_functions(saveable_view.root)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\saved_model\save.py",
self._serialization_cache)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\engine\base_laye
.list_functions_for_serialization(serialization_cache))
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
fns = self.functions_to_serialize(serialization_cache)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
serialization_cache).functions_to_serialize)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
serialization_cache)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
serialization_cache))
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
functions = save_impl.wrap_layer_functions(self.obj, serialization_cache)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
original_fns = _replace_child_layer_functions(layer, serialization_cache)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
serialization_cache).functions)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
serialization_cache)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
functions = save_impl.wrap_layer_functions(self.obj, serialization_cache)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
'{}_layer_call_and_return_conditional_losses'.format(layer.name))
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
self.add_trace(*self._input_signature)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
fn.get_concrete_function(*args, **kwargs)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\saving\saved_mod
return super(LayerCall, self).get_concrete_function(*args, **kwargs)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\eager\def_function.py"
self._initialize(args, kwargs, add_initializers_to=initializers)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\eager\def_function.py"
*args, **kwds))
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\eager\function.py", li
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\eager\function.py", li
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\eager\function.py", li
capture_by_value=self._capture_by_value),
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\framework\func_graph.p
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\eager\def_function.py"
return weak_wrapped_fn().__wrapped__(*args, **kwds)
return layer_call(inputs, *args, **kwargs), layer.get_losses_for(inputs)
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\classifiers\blstm_attention.py", line 43, in
call
logits = K.dot(h, self.w) # w^T h
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\backend.py", line 1653, in dot
if ndim(x) is not None and (ndim(x) > 2 or ndim(y) > 2):
File "C:\Users\user\miniconda3\envs\user\lib\site-packages\tensorflow_core\python\keras\backend.py", line 1202, in ndim
dims = x.shape._dims
AttributeError: 'list' object has no attribute 'shape'
I'm trying to do dataparallel into GRU's network as explained in the docs and I keep getting the same error
"""Defines the neural network, losss function and metrics"""
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self, params, anchor_is_phrase):
"""
Simple LSTM, used to generate the LSTM for both the word and video
embeddings.
Args:
params: (Params) contains vocab_size, embedding_dim, lstm_hidden_dim
is_phrase: is word lstm or the vid lstm
"""
super(Net, self).__init__()
if anchor_is_phrase:
self.lstm = nn.DataParallel(nn.GRU(params.word_embedding_dim, params.hidden_dim, 1)).cuda()#, batch_first=True)
else:
self.lstm = nn.DataParallel(nn.GRU(params.vid_embedding_dim, params.hidden_dim, 1)).cuda() #, batch_first=True)
def forward(self, s, anchor_is_phrase = False):
"""
Forward prop.
"""
s, _ = self.lstm(s)
s.data.contiguous()
return s
The error happens at line s, _ = self.lstm(s) in the previous code :
here: s, _ = self.lstm(s)
s.data.contiguous()
return s
I get the following error message :
s, _ = self.lstm(s)
File "/home/pavelameen/miniconda3/envs/TD2/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in __call__
result = self.forward(*input, **kwargs)
File "/home/pavelameen/miniconda3/envs/TD2/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 152, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/home/pavelameen/miniconda3/envs/TD2/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 162, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/home/pavelameen/miniconda3/envs/TD2/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 83, in parallel_apply
raise output
File "/home/pavelameen/miniconda3/envs/TD2/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 59, in _worker
output = module(*input, **kwargs)
File "/home/pavelameen/miniconda3/envs/TD2/lib/python3.6/site-packages/torch/nn/modules/module.py", line 493, in __call__
result = self.forward(*input, **kwargs)
File "/home/pavelameen/miniconda3/envs/TD2/lib/python3.6/site-packages/torch/nn/modules/rnn.py", line 193, in forward
max_batch_size = input.size(0) if self.batch_first else input.size(1)
AttributeError: 'tuple' object has no attribute 'size'
the interesting part is i try to output the type of s in line 27 and i get PackedSequence , why it convert to tuple in lstm forward method?
nn.GRU expects (line 181) either a PackedSequence or a tesnor as input. As mentioned in the error, you are passing a tuple object s intead.
I have implemented a minimal example of Wavenet, closely following the steps from here - https://github.com/basveeling/wavenet.
The issue is, that the model uses a custom layer, which works fine during training but once the model is reloaded, Keras cannot find the Causal Layer, even though I am using custom objects.
I am using tensorflow 1.13 and keras 2.2.4
Here is an example of the first three key/value pairs for objects.
objects = {'initial_causal_conv': <class 'wavenet_utils.CausalConv1D'>,
'dilated_conv_1_tanh_s0': <class 'wavenet_utils.CausalConv1D'>,
'dilated_conv_1_sigm_s0': <class 'wavenet_utils.CausalConv1D'>,
'...': <class 'wavenet_utils.CausalConv1D'>,
'...': <class 'wavenet_utils.CausalConv1D'>}
model.fit(x=[x_tr1, x_tr2],
y=y_tr1,
epochs=epochs,
batch_size=batch_size,
validation_data=([x_vl1, x_vl2], y_vl1),
callbacks=[checkpoint, early_stopping],
verbose=verbose,
shuffle=True,
class_weight=class_weight)
model = load_model('model.h5', custom_objects=objects)
Which then returns this error:
Traceback (most recent call last):
File "/home/xxx/PycharmProjects/WAVE/DATA_NN.py", line 48, in <module>
objects=objects)
File "/home/xxx/PycharmProjects/WAVE/functions.py", line 572, in run_neural_net
model = load_model('model_conv.h5', custom_objects=objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/saving.py", line 419, in load_model
model = _deserialize_model(f, custom_objects, compile)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/saving.py", line 225, in _deserialize_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/saving.py", line 458, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 145, in deserialize_keras_object
list(custom_objects.items())))
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/network.py", line 1022, in from_config
process_layer(layer_data)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/network.py", line 1008, in process_layer
custom_objects=custom_objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 138, in deserialize_keras_object
': ' + class_name)
ValueError: Unknown layer: CausalConv1D
When building the model, CausalConv1D must be imported from wavenet_utils.py
Below is the full build_model function
And here is wavenet_utils, containing the class CausalConv1D:
from keras.layers import Conv1D
from keras.utils.conv_utils import conv_output_length
import tensorflow as tf
class CausalConv1D(Conv1D):
def __init__(self, filters, kernel_size, init='glorot_uniform', activation=None,
padding='valid', strides=1, dilation_rate=1, bias_regularizer=None,
activity_regularizer=None, kernel_constraint=None, bias_constraint=None, use_bias=True, causal=False,
output_dim=1,
**kwargs):
self.output_dim = output_dim
super(CausalConv1D, self).__init__(filters,
kernel_size=kernel_size,
strides=strides,
padding=padding,
dilation_rate=dilation_rate,
activation=activation,
use_bias=use_bias,
kernel_initializer=init,
activity_regularizer=activity_regularizer,
bias_regularizer=bias_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
**kwargs)
self.causal = causal
if self.causal and padding != 'valid':
raise ValueError("Causal mode dictates border_mode=valid.")
def build(self, input_shape):
super(CausalConv1D, self).build(input_shape)
def call(self, x):
if self.causal:
def asymmetric_temporal_padding(x, left_pad=1, right_pad=1):
pattern = [[0, 0], [left_pad, right_pad], [0, 0]]
return tf.pad(x, pattern)
x = asymmetric_temporal_padding(x, self.dilation_rate[0] * (self.kernel_size[0] - 1), 0)
return super(CausalConv1D, self).call(x)
def compute_output_shape(self, input_shape):
input_length = input_shape[1]
if self.causal:
input_length += self.dilation_rate[0] * (self.kernel_size[0] - 1)
length = conv_output_length(input_length,
self.kernel_size[0],
self.padding,
self.strides[0],
dilation=self.dilation_rate[0])
shape = tf.TensorShape(input_shape).as_list()
shape[-1] = self.output_dim
return (input_shape[0], length, self.filters)
def get_config(self):
base_config = super(CausalConv1D, self).get_config()
base_config['output_dim'] = self.output_dim
return base_config
EDIT:
I have tried this approach before as well.
objects = {'CausalConv1D': <class 'wavenet_utils.CausalConv1D'>}
model.fit(x=[x_tr1, x_tr2],
y=y_tr1,
epochs=epochs,
batch_size=batch_size,
validation_data=([x_vl1, x_vl2], y_vl1),
callbacks=[checkpoint, early_stopping],
verbose=verbose,
shuffle=True,
class_weight=class_weight)
model = load_model('model.h5', custom_objects=objects)
Which then returns this error:
Traceback (most recent call last):
File "/home/xxx/PycharmProjects/WAVE/DATA_NN.py", line 47, in <module>
objects=objects)
File "/home/xxx/PycharmProjects/WAVE/functions.py", line 574, in run_neural_net
model = load_model('model.h5', custom_objects=objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/saving.py", line 419, in load_model
model = _deserialize_model(f, custom_objects, compile)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/saving.py", line 225, in _deserialize_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/saving.py", line 458, in model_from_config
return deserialize(config, custom_objects=custom_objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 145, in deserialize_keras_object
list(custom_objects.items())))
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/network.py", line 1022, in from_config
process_layer(layer_data)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/network.py", line 1008, in process_layer
custom_objects=custom_objects)
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 147, in deserialize_keras_object
return cls.from_config(config['config'])
File "/home/xxx/PycharmProjects/WAVE/venv/lib/python3.6/site-packages/keras/engine/base_layer.py", line 1109, in from_config
return cls(**config)
File "/home/xxx/PycharmProjects/WAVE/wavenet_utils.py", line 26, in __init__
**kwargs)
TypeError: __init__() got multiple values for keyword argument 'kernel_initializer'
Could this be the issue mentioned here https://github.com/keras-team/keras/issues/12316?
And if so, is there any way around it?
There is only one custom object, which is CausalConv1D.
objects = {'CausalConv1D': wavenet_utils.CausalConv1D}
Now you must be sure that your get_config method is correct and has everything needed in the __init__ method of your layer.
It misses the causal property and has a kernel_initializer coming from the base class that is not supported by your __init__ method.
Let's list every property you need, and then check which ones are in the base config:
filters: in base
kernel_size: in base
init: not in base, but there is kernel_initializer in base!!!!!
kernel_initializer is a config item that your __init__ method doesn't support
rename this init parameter to kernel_initializer
activation: in base
padding: in base
strides: in base
dilation_rate: in base
bias_regularizer: in base
activity_regularizer: in base
kernel_constraint: in base
bias_constraint: in base
use_bias: in base
causal: not in base!
must add this in your config! (or the model will always use the default value)
output_dim: not in base!
**kwargs: in base
Layer's __init__:
def __init__(self, filters, kernel_size,
############## here:
kernel_initializer='glorot_uniform',
#############
activation=None,
padding='valid', strides=1, dilation_rate=1, bias_regularizer=None,
activity_regularizer=None, kernel_constraint=None, bias_constraint=None, use_bias=True, causal=False,
output_dim=1,
**kwargs):
Layer's get_config
It must contain all __init__ params that are not in the base class:
def get_config(self):
base_config = super(CausalConv1D, self).get_config()
base_config['causal'] = self.causal
base_config['output_dim'] = self.output_dim
return base_config
Somehow, no approach I've tried so far has been able to correctly load the model when using load_model. Below is a simple work around which only saves the weights, then deletes the existing model, builds a new one and compiles it again, and loads saved the weights which do save correctly, even with custom layers present.
model = build_model()
checkpoint = ModelCheckpoint('model.h5', monitor='val_acc',
verbose=1, save_best_only=True, save_weights_only=True, mode='max')
model.fit(x, y)
del model
model = build_model()
model.load_weights('model.h5')
model.predict(x_test)
I have a custom layer which reshapes the input tensor, perform some dot products with kernels and returns back a tensor with the same number of dimensions .
The input to my network is images, say of size 61x80. When the number of train images is a multiple of batch_size , model.fit works fine. eg total train images= 2700, batch_size=10.
But when the total train images =2701, batch_size() it doesnt work, throwing an error something like this-
Epoch 1/5 2520/2701 [==========================>...] - ETA: 0s - loss: 2.7465 - acc: 0.2516Traceback (most recent call last):
File "", line 5, in
history = model.fit(x_train, y_train,batch_size=10, epochs=5)
File
"/home/eee/anaconda3/lib/python3.6/site-packages/keras/engine/training.py",
line 1039, in fit
validation_steps=validation_steps)
File
"/home/eee/anaconda3/lib/python3.6/site-packages/keras/engine/training_arrays.py",
line 199, in fit_loop
outs = f(ins_batch)
File
"/home/eee/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py",
line 2715, in call
return self._call(inputs)
File
"/home/eee/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py",
line 2675, in _call
fetched = self._callable_fn(*array_vals)
File
"/home/eee/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py",
line 1439, in call
run_metadata_ptr)
File
"/home/eee/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py",
line 528, in exit
c_api.TF_GetCode(self.status.status))
InvalidArgumentError: Input to reshape is a tensor with 4880 values,
but the requested shape has 48800 [[{{node my_layer_3/Reshape}} =
Reshape[T=DT_FLOAT, Tshape=DT_INT32,
_device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_input_2_0_0,
my_layer_3/stack)]]
please help how to get around this problem.
Edit :- Adding the code of the custom layers
class MyLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(MyLayer, self).__init__(**kwargs)
def build(self, input_shape):
print(len(input_shape))
# Create a trainable weight variable for this layer.
assert len(input_shape) >= 3
input_dim = input_shape[1:]
print(input_shape)
self.kernel1 = self.add_weight(shape=self.output_dim[0],input_dim[0]),
name = 'kernel1',
initializer='uniform',
trainable=True)
print(self.kernel1)
self.kernel2 = self.add_weight(shape=self.output_dim[1],input_dim[1]),
name = 'kernel2',
initializer='uniform',
trainable=True)
print(self.kernel2)
super(MyLayer, self).build(input_shape)
def call(self, x):
print(x.shape)
input_shape=x.shape
mat1_shape =K.int_shape(self.kernel1)
mat2_shape =K.int_shape(self.kernel2)
output1 = Myoperation(x,self.kernel1,1)
output2 = Myoperation(output1,self.kernel2,2)
return output2
def compute_output_shape(self, input_shape):
return (input_shape[0],self.output_dim[0],self.output_dim[1])
The code for Myoperation function is-
def Myoperation(x,mat,mode):
shape1 = K.shape(x)
mode_list =[0,1,2]
mode_list.remove(mode)
mode_shape= shape1[mode]
new_shape = tf.stack( [mode_shape,(shape1[mode_list[0]]*shape1[mode_list[1]])])
input_reshaped = K.reshape(x, new_shape)
ten_mul = K.dot(mat,input_reshaped)
out_mode=K.int_shape(mat)
if (mode==1):
out_shape = tf.stack([shape1[mode_list[0]],out_mode[0],shape1[mode_list[1]]])
if (mode==2):
out_shape = tf.stack([shape1[mode_list[0]],shape1[mode_list[1]],out_mode[0]])
output_reshaped = K.reshape(ten_mul,out_shape)
return output_reshaped
The problem is coming in reshaping the tensor when the train image set is not a multiple of batch_size.