Custom keras model with custom loss function gives error - python

I'm building a custom three layered neural network with a custom loss and activation function. However it gives me the following error:
File "C:\Users\untitled1.py", line 196, in <module>
cModel.fit(X_train, y_train, batch_size=64, epochs=2)
File "C:\Users\\Anaconda3\lib\site-packages\keras\engine\training.py", line 1184, in fit
tmp_logs = self.train_function(iterator)
File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 885, in __call__
result = self._call(*args, **kwds)
File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 933, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 759, in _initialize
self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3066, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3463, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\function.py", line 3298, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 1007, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\eager\def_function.py", line 668, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
File "C:\Users\\Anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py", line 994, in wrapper
raise e.ag_error_metadata.to_exception(e)
TypeError: in user code:
C:\Users\\Anaconda3\lib\site-packages\keras\engine\training.py:853 train_function *
return step_function(self, iterator)
TypeError: tf__compile() missing 1 required positional argument: 'loss'
The code I use is shown below. I tried it with my own custom loss and with a loss from Keras and both gave the same error. If I just initialize a keras sequential model and incorporate my loss it works fine. But with my custom model it does not, I use a custom model because I also want to customize the optimization method after this. How can this error be resolved?
#%% Functions
class CustomModel(keras.Model):
#initialize the model with the beta needed and the dimensions
def __init__(self, b, input_dim):
#first i initialized it as self, model and without the call function
#but this gave me an error that said i needed a call function thus i changed it to this
super(CustomModel, self).__init__()
self.dim = keras.Input( shape=(input_dim,))
self.dense1 = keras.layers.Dense(20, name='hidden', kernel_initializer=initializer,
bias_initializer=initializer, activation = lambda x: K.tanh(b*x))
self.dense2 = keras.layers.Dense(2, activation='linear', name='output', use_bias=False, trainable=False,kernel_initializer= lambda shape,
dtype: initializeOutputWeights(shape, dtype))
def call(self):
x1 = self.dense1(self.dim)
return self.dense2(x1)
def compile(self, optimizer, loss):
#for the use of the custom loss function
super(CustomModel, self).compile()
self.optimizer = optimizer
self.loss = loss
def train_step(self, data):
# Unpack the data. Its structure depends on your model and
# on what you pass to `fit()`.
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True) # Forward pass
# Compute the loss value
# (the loss function is configured in `compile()`)
loss = self.loss(y, y_pred, regularization_losses=self.losses)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(y, y_pred)
# Return a dict mapping metric names to current value
return {m.name: m.result() for m in self.metrics}
def initializeOutputWeights(shape, dtype=None):
#output weights are initialized as 1 or -1 and not changed afterwards
randoms = np.random.randint(low=2, size=shape)
new = np.where(randoms==0, -1, randoms)
return K.variable(new, dtype=dtype)
class customLoss(keras.losses.Loss):
#the custom loss function of the GVM
def __init__(self, d=10, name = "CustomLoss"):
#need the margin d
super().__init__(name=name)
self.d = d
def call(self,y_true, y_pred):
#calculate the loss
N = len(y_true)
L = len(y_pred[0])
y_dot = y_pred*y_true
y_d = y_dot-self.d
y_square= y_d*y_d
index_replace = y_dot>self.d
idx_replace=tf.where(index_replace==True)
y_loss = tf.tensor_scatter_nd_update(y_square, idx_replace, tf.zeros(len(idx_replace)))
return tf.divide(K.sum(K.sum(y_loss, axis=1)),tf.cast(N*L, tf.float32))
seed(1)
tf.random.set_seed(2)
acc_metric = keras.metrics.SparseCategoricalAccuracy(name="accuracy")
initializer = tf.keras.initializers.RandomUniform(minval=-1, maxval=1)
b = np.ones(20)
cModel = CustomModel(b, 9)
Losscustom = customLoss(d=16)
cModel.compile(optimizer='adam',loss=Losscustom)
cModel.fit(X_train, y_train, batch_size=64, epochs=2)

You seem to have mixed up a few constructs that don't fit together. I suggest you define your own custom training loop that will give you the flexibility you need:
First define your model:
import tensorflow as tf
import random
import numpy as np
def initializeOutputWeights(shape, dtype=None):
#output weights are initialized as 1 or -1 and not changed afterwards
randoms = np.random.randint(low=2, size=shape)
new = np.where(randoms==0, -1, randoms)
return tf.keras.backend.variable(new, dtype=dtype)
class CustomModel(tf.keras.Model):
def __init__(self, b, input_dim):
#first i initialized it as self, model and without the call function
#but this gave me an error that said i needed a call function thus i changed it to this
super(CustomModel, self).__init__()
initializer = tf.keras.initializers.RandomUniform(minval=-1, maxval=1)
self.dense = tf.keras.layers.Dense(20, name='hidden', kernel_initializer=initializer,
bias_initializer=initializer, activation = lambda x: tf.tanh(b*x), input_shape=(input_dim,))
self._output = tf.keras.layers.Dense(2, activation='linear', name='output', use_bias=False, trainable=False,kernel_initializer= lambda shape,
dtype: initializeOutputWeights(shape, dtype))
def call(self, inputs):
x = self.dense(inputs)
return self._output(x)
Then define your loss function:
def compute_loss(d, y_pred, y_true):
#calculate the loss
N = len(y_true)
L = len(y_pred[0])
y_dot = y_pred*y_true
y_d = y_dot-d
y_square= y_d*y_d
index_replace = y_dot>d
idx_replace=tf.where(index_replace==True)
y_loss = tf.tensor_scatter_nd_update(y_square, idx_replace, tf.zeros(len(idx_replace)))
return tf.divide(tf.keras.backend.sum(tf.keras.backend.sum(y_loss, axis=1)),tf.cast(N*L, tf.float32))
Afterwards define your training loop:
#tf.function
def train_step(model, batch, optimizer):
with tf.GradientTape() as tape:
x, y = batch
d = 16
y_pred = model(x)
loss = compute_loss(d, y_pred, y)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
# update your metrics here how you want.
acc_metric.update_state(y, y_pred)
tf.print("Training loss (for one batch): ", loss)
def train(dataset, optimizer, epochs=25):
b = np.ones(20)
custom_model = CustomModel(b, 9)
for epoch in range(epochs):
for batch in dataset:
train_step(custom_model, batch, optimizer)
train_acc = acc_metric.result()
tf.print("Training acc over epoch: %.4f" % (float(train_acc),))
# Reset training metrics at the end of each epoch
acc_metric.reset_states()
And finally define your dataset and other important variables and train your model:
random.seed(1)
tf.random.set_seed(2)
acc_metric = tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy")
opt = tf.keras.optimizers.Adam()
#### Dummy data ####
y_train = tf.cast(tf.random.uniform((500, 1), minval=0, maxval=2, dtype=tf.int32), tf.float32)
X_train = tf.random.normal((500, 9))
BATCH_SIZE = 64
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(
X_train.shape[0]).batch(
BATCH_SIZE)
train(train_dataset, opt)

Related

Problem building a ANN Regressor model with Autoencoder in Tensorflow 2.11

My input is a 2D numpy array of dimensions (364660, 5052). The target is (364660, 1), a regression variable. I a trying to build a guided autoencoder + ANN regressor where encoded layer of autoencoder serves as input to ann regressor. I would like to train both models at one go. However, the loss for autoencoder should be a combined autoencoder loss + ann loss. Where as ANN loss remains the same. Here is my sample code
class AutoencoderRegressor(tf.keras.Model):
def __init__(self, encoder_layers, decoder_layers, regressor_layers, autoencoder_loss_weights):
super(AutoencoderRegressor, self).__init__()
self.autoencoder = tf.keras.models.Sequential(encoder_layers + decoder_layers)
self.regressor = tf.keras.models.Sequential(regressor_layers)
self.autoencoder_loss_weights = autoencoder_loss_weights
def call(self, inputs, training=None, mask=None):
autoencoder_output = self.autoencoder(inputs)
regressor_input = self.autoencoder.get_layer(index=2).output
regressor_output = self.regressor(regressor_input)
return autoencoder_output, regressor_output
def autoencoder_loss(self, autoencoder_output, inputs):
binary_crossentropy = tf.keras.losses.BinaryCrossentropy()
mean_squared_error = tf.keras.losses.MeanSquaredError()
autoencoder_reconstruction_loss = binary_crossentropy(inputs, autoencoder_output)
autoencoder_regression_loss = mean_squared_error(inputs, autoencoder_output)
#autoencoder_loss = self.autoencoder_loss_weights[0] * autoencoder_reconstruction_loss + self.autoencoder_loss_weights[1] * autoencoder_regression_loss
autoencoder_loss = autoencoder_reconstruction_loss+autoencoder_regression_loss
return autoencoder_loss
def regressor_loss(self, regressor_output, targets):
mean_squared_error = tf.keras.losses.MeanSquaredError()
regressor_loss = mean_squared_error(targets, regressor_output)
return regressor_loss
# define the encoder layers
encoder_layers = [
tf.keras.layers.Dense(64, activation='relu', input_shape=(reduced_x_train2.shape[1],)),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(16, activation='relu')]
# define the decoder layers
decoder_layers = [
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(reduced_x_train2.shape[1], activation='sigmoid')]
# define the regressor layers
regressor_layers = [
tf.keras.layers.Dense(8, activation='relu', input_shape=(16,)),
tf.keras.layers.Dense(1, activation='linear')]
# define the
autoencoder_loss_weights = [0.8, 0.2]
autoencoder_regressor = AutoencoderRegressor(encoder_layers, decoder_layers, regressor_layers, autoencoder_loss_weights)
autoencoder_regressor.compile(optimizer='adam', loss=[autoencoder_regressor.autoencoder_loss, autoencoder_regressor.regressor_loss])
autoencoder_regressor.fit(reduced_x_train2, [reduced_x_train2, y_train], epochs=100,
batch_size=32, validation_split=0.9,shuffle =True,
verbose = 2)
I get the following error:
TypeError Traceback (most recent call last)
Input In [14], in <cell line: 60>()
56 autoencoder_regressor = AutoencoderRegressor(encoder_layers, decoder_layers, regressor_layers, autoencoder_loss_weights)
58 autoencoder_regressor.compile(optimizer='adam', loss=[autoencoder_regressor.autoencoder_loss, autoencoder_regressor.regressor_loss])
---> 60 autoencoder_regressor.fit(reduced_x_train2, [reduced_x_train2, y_train], epochs=100,
61 batch_size=32, validation_split=0.9,shuffle =True,
62 verbose = 2)
TypeError: in user code:
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/training.py", line 1051, in train_function *
return step_function(self, iterator)
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/training.py", line 1040, in step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/training.py", line 1030, in run_step **
outputs = model.train_step(data)
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/training.py", line 890, in train_step
loss = self.compute_loss(x, y, y_pred, sample_weight)
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/training.py", line 948, in compute_loss
return self.compiled_loss(
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/compile_utils.py", line 215, in __call__
metric_obj.update_state(loss_metric_value, sample_weight=batch_dim)
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/utils/metrics_utils.py", line 70, in decorated
update_op = update_state_fn(*args, **kwargs)
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/metrics/base_metric.py", line 140, in update_state_fn
return ag_update_state(*args, **kwargs)
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/metrics/base_metric.py", line 449, in update_state **
sample_weight = tf.__internal__.ops.broadcast_weights(
File "/user/iibi/amudireddy/.conda/envs/tfni10_py38/lib/python3.8/site-packages/keras/engine/keras_tensor.py", line 254, in __array__
raise TypeError(
TypeError: You are passing KerasTensor(type_spec=TensorSpec(shape=(), dtype=tf.float32, name=None), name='Placeholder:0', description="created by layer 'tf.cast_15'"), an intermediate Keras symbolic input/output, to a TF API that does not allow registering custom dispatchers, such as 'tf.cond, 'tf.function', gradient tapes, or 'tf.map_fn'. Keras Functional model construction only supports TF API calls that *do* support dispatching, such as 'tf.math.add' or 'tf.reshape'. Other APIs cannot be called directly on symbolic Kerasinputs/outputs. You can work around this limitation by putting the operation in a custom Keras layer 'call' and calling that layer on this symbolic input/output.
Where am I going WRONG?
###EDITED question. I missed an implementation requirement.
In autoencoder_loss, autoencoder_reconstruction_loss should take inputs as (inputs, autoencoder_output) and autoencoder_regression_loss should take input as (targets, regressor_output)
I am not to sure how to implement this. Please help.
It seems you can't access a submodel's output layer from within your model like you did with the line regressor_input =self.autoencoder.get_layer(index=2).output. However, here is an alternative that works:
import numpy as np
import tensorflow as tf
class AutoEncoderRegressor(tf.keras.Model):
def __init__(self, encoder, decoder, regressor, loss_weights):
super(AutoEncoderRegressor, self).__init__()
self.encoder = encoder
self.decoder = decoder
self.regressor = regressor
self.loss_weights = loss_weights
def call(self, inputs, training=None, mask=None):
encoded = self.encoder(inputs)
decoded = self.decoder(encoded)
regression = self.regressor(encoded)
return decoded, regression
def autoencoder_loss(self, autoencoder_output, inputs):
return tf.keras.losses.BinaryCrossentropy()(inputs,autoencoder_output)
def regressor_loss(self, regressor_output, targets):
return tf.keras.losses.MeanSquaredError()(targets, regressor_output)
def main():
input_dim = 10
# generate random training data
rng = np.random.default_rng()
X = rng.random((100,input_dim))
y = [X, rng.random((100,1))]
encoder = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu', input_shape=(input_dim,)),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(16, activation='relu')
])
decoder = tf.keras.Sequential([
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(input_dim, activation='sigmoid')
])
regressor = tf.keras.Sequential([
tf.keras.layers.Dense(8, activation='relu', input_shape=(16,)),
tf.keras.layers.Dense(1, activation='linear')
])
model = AutoEncoderRegressor(encoder, decoder, regressor, loss_weights=[0.8, 0.2])
model.compile(
optimizer = 'adam',
loss = [model.autoencoder_loss, model.regressor_loss],
loss_weights = model.loss_weights
)
model.fit(X,y,
epochs = 100,
batch_size = 32,
validation_split = 0.9,
shuffle = True,
verbose = 2
)
if __name__ == "__main__":
main()
I used random training data with smaller dimensions for testing here, adjust this to your actual dataset.
EDIT Just for clarification: your model has 2 loss functions for its 2 outputs. You don't have to add those together yourself, because Tensorflow will do this automatically. With the loss_weights you've provided, the total loss of the model will be
total_loss = 0.8*autoencoder_loss+0.2*regressor_loss
Here, autoencoder_loss is the binary cross-entropy between X_true and X_pred and regressor_loss is the mean squared error between y_true and y_pred.

TensorFlow custom model fit

I want to customize TensorFlow model. I need a custom training algorithm like these:
I don't want my model to be inside the custom model just the training algorithm.
class CustomModel(keras.Model):
def __init__(self,inputs, outputs, echo=False):
super().__init__()
self.echo = echo
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, training=True)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
print(loss)
if self.echo:
print('*')
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
self.compiled_metrics.update_state(y, y_pred)
return {m.name: m.result() for m in self.metrics}
inputs = keras.Input(shape=(224,224,3))
x = keras.layers.Conv2D(32,(3,3))(inputs)
x = keras.layers.Conv2D(64,3)(x)
x = keras.layers.Conv2D(64,3)(x)
x = keras.layers.AveragePooling2D()(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(64, activation='relu')(x)
x = keras.layers.Dense(3, activation='softmax')(x)
model = CustomModel( inputs, x,echo= True)
model.compile(optimizer="adam", loss="mse", metrics=["mae"])
opt = Adam(learning_rate=0.0001)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
epochs = 5
history = model.fit_generator(train_generator,
validation_data=valid_generator, verbose=1, epochs=epochs)
error:
NotImplementedError: When subclassing the `Model` class, you should implement a `call` method.
You don't need to provide them (inputs, outputs) argument in the init function of your sub-class model. You can implement the call method in your sub-class model as follows:
class CustomModel(keras.Model):
...
...
# A call function needs to be implemented
def call(self, inputs, *args, **kwargs):
return self(inputs)
update
Based on the comments, here's a possible workaround. You build the model with the provided input/output within init.
class CustomModel(keras.Model):
def __init__(self, inputs, x, echo=False, **kwargs):#student
super().__init__(**kwargs)
self.model = keras.Model(inputs, x)
self.echo = echo
def call(self, inputs, *args, **kwargs):
return self.model(inputs)
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self.model(x, training=True) # Forward pass
...
# Compute gradients
trainable_vars = self.model.trainable_variables
gradients = ...
# Update weights
...
return {m.name: m.result() for m in self.metrics}

Torch Geometric - RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1024)

I am trying to create a GNN that models a protein. However, I am running into an error with GraphConv (I get the same error with GCNConv). I do not understand why I am getting this error when the shapes should be able to be multiplied. I think the error must have something to do with the custom dataset I created, but I am not 100% sure. Please let me know if you have had a similar issue or know how to fix this. Thank you.
EDIT: Even if I change embedding_size to 1479, I still get: RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1479).
Custom dataset:
class ProteinDataset(geom_data.Dataset):
def __init__(self, root, transform=None, pre_transform=None):
# root = where data set is stored
super(ProteinDataset, self).__init__(root, transform, pre_transform)
self.root = root
#property
def raw_file_names(self):
return os.listdir(f'{self.root}/raw')
#property
def processed_file_names(self):
inxs = []
for pdb in self.raw_paths:
inxs.append(pdb.split('/')[-1].split('.p')[0])
return [f'{i}.pt' for i in inxs]
def download(self):
pass
def process(self):
for pdb in self.raw_paths:
try:
mol_obj = Chem.rdmolfiles.MolFromPDBFile(pdb)
except AttributeError:
os.remove(pdb)
continue
# Get node features
node_feats = self._get_node_features(mol_obj).reshape([-1,1])
# Get edge features
edge_feats = self._get_edge_features(mol_obj).reshape([-1,1])
# Get adjacency info
edge_index = self._get_adjacency_info(mol_obj)
label = self._get_labels(pdb)
# Create Data object
data = geom_data.Data(x=node_feats,
edge_index=edge_index,
edge_attr=edge_feats,
y=label)
i = pdb.split('/')[-1].split('.p')[0]
torch.save(data, os.path.join(self.processed_dir,f'{i}.pt'))
def _get_node_features(self, mol):
all_node_feats = []
for atom in mol.GetAtoms():
all_node_feats.append(atom.GetMass())
all_node_feats = np.asarray(all_node_feats)
return torch.tensor(all_node_feats, dtype=torch.float)
def _get_edge_features(self, mol):
all_edge_feats = []
dists = Chem.rdmolops.Get3DDistanceMatrix(mol)
# CA-CA Distances
for bond in mol.GetBonds():
begin = bond.GetBeginAtomIdx()
end = bond.GetEndAtomIdx()
all_edge_feats.append(dists[begin,end])
all_edge_feats = np.asarray(all_edge_feats)
return torch.tensor(all_edge_feats, dtype=torch.float)
def _get_adjacency_info(self, mol):
adj_matrix = Chem.rdmolops.GetAdjacencyMatrix(mol)
row, col = np.where(adj_matrix)
coo = np.array(list(zip(row, col)))
coo = np.reshape(coo, (2, -1))
return torch.tensor(coo, dtype=torch.long)
def _get_labels(self, fn):
with open(fn, 'r') as f:
label = float(f.readline())
f.close()
label = np.asarray([label])
return torch.tensor(label, dtype=torch.float)
def len(self):
return len(self.raw_paths)
def get(self, inx):
data = torch.load(self.processed_paths[inx])
return data
Model:
class GNN(torch.nn.Module):
def __init__(self, feature_size):
super(GNN, self).__init__()
embedding_size = 1024
# GNN Layers
self.conv1 = GraphConv(feature_size, embedding_size)
self.head1 = Linear(embedding_size*3, embedding_size)
self.pool1 = TopKPooling(embedding_size, ratio=0.8)
self.conv2 = GraphConv(embedding_size, embedding_size)
self.head2 = Linear(embedding_size*3, embedding_size)
self.pool2 = TopKPooling(embedding_size, ratio=0.5)
self.conv3 = GraphConv(embedding_size, embedding_size)
self.head3 = Linear(embedding_size*3, embedding_size)
self.pool3 = TopKPooling(embedding_size, ratio=0.2)
# Linear Layers
self.fc1 = Linear(embedding_size*2, 1024)
self.fc2 = Linear(1024, 128)
self.fc3 = Linear(128, 1)
def forward(self, x, edge_attr, edge_index, batch_index):
# First block
x = self.conv1(x, edge_index).relu()
x = self.head1(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool1(x,
edge_index,
None,
batch_index)
x1 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Second block
x = self.conv2(x, edge_index).relu()
x = self.head2(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool2(x,
edge_index,
None,
batch_index)
x2 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Third block
x = self.conv3(x, edge_index).relu()
x = self.head3(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool3(x,
edge_index,
None,
batch_index)
x3 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Concat pooled vectors
x = x1 + x2 + x3
# Apply Linear Layers
x = self.fc1(x).relu()
x = self.fc2(x).relu()
x = self.fc3(x)
return x
Training:
device = torch.device('cuda')
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
# Loading the dataset
train_set = ProteinDataset(root='data/lys50_2/train')
test_set = ProteinDataset(root='data/lys50_2/test')
print('Shape of input:', train_set[0].x.shape[0])
# Loading the model
model = GNN(feature_size=train_set[0].x.shape[0])
model = model.to(device)
print(f'Number of parameters: {count_parameters(model)}')
print(model)
# Loss and Optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
print(optimizer)
# Prepare for training
train_loader = DataLoader(train_set, batch_size=1, shuffle=True)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
def train(m,opt):
loss_sum = 0.0
for _, batch in enumerate(train_loader):
# Use GPU
batch.to(device)
# Reset grad
opt.zero_grad()
# Pass node features and connections
pred = m(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculate loss and gradients
loss = loss_fn(pred, batch.y)
loss.backward()
loss_sum += loss.item()
# Update using the gradients
opt.step()
return loss_sum / len(train_loader)
def validate(m):
loss_sum = 0.0
for _, batch in enumerate(test_loader):
for _, batch in enumerate(test_loader):
# Use GPU
batch.to(device)
# No grad
with torch.no_grad():
pred = m(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculate loss and gradients
loss = loss_fn(pred, batch.y)
loss_sum += loss.item()
return loss_sum / len(test_loader)
model.zero_grad()
optimizer.zero_grad()
# Loop for training
for i in range(101):
loss = train(model,optimizer)
if (i%10==0):
loss_v = validate(model)
print(i, loss, loss_v)
else:
print(i, loss)
Error when running training:
Traceback (most recent call last):
File "/home/spencer/sh3/gnn/./train.py", line 79, in <module>
loss = train(model,optimizer)
File "/home/spencer/sh3/gnn/./train.py", line 44, in train
pred = m(batch.x.float(),
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/feig/s1/spencer/sh3/gnn/model2.py", line 32, in forward
x = self.conv1(x, edge_index).relu()
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/conv/graph_conv.py", line 71, in forward
out = self.lin_rel(out)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/dense/linear.py", line 109, in forward
return F.linear(x, self.weight, self.bias)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/functional.py", line 1848, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1024)
The error tells you that input shapes don't match.
You can reshape the input in the forward method like this: x = x.view(1, 1479) but make sure that this is what you need - this error usually indicates wrongly shaped dataset or passing the wrong input.

"Got OperatorNotAllowedInGraphError: iterating over tf.Tensor" while not obviously iterating over tensor

OS: Manjaro Linux x64
CUDA: 11.0.3
TF/Keras: 2.4(.1)
Py: 3.8
Hello,
I'm trying to build some kind of W-VAE-GAN. I keep un running into that very same error over and over again and I already had that problem with Keras 2.3, interestingly NOT using TF/K 2.2. Unfortunately I need to use Keras 2.4 because I'm supposed to run my code on our university server with these exact TF/K and CUDA-Versions. At this point I'm just trying to make sure my code works as intended.
The error I get is the following, with the exact lines commented in my code:
...
Epoch 1/20
Traceback (most recent call last):
File "/home/peer/Programmierkram/BA/Metal_GAN/wvaegan.py", line 282, in <module>
gan.fit(gen_trans, batch_size=batch_size, epochs=epochs, callbacks=[callback])
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 1100, in fit
tmp_logs = self.train_function(iterator)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 828, in __call__
result = self._call(*args, **kwds)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 871, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 725, in _initialize
self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 2969, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3361, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3196, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 990, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 634, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 977, in wrapper
raise e.ag_error_metadata.to_exception(e)
tensorflow.python.framework.errors_impl.OperatorNotAllowedInGraphError: in user code:
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/home/peer/Programmierkram/BA/Metal_GAN/wvaegan.py:190 train_step
z_mean, z_log_var, z = self.encoder(clip_img) # <------ WHY NO ERROR HERE?
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:505 __iter__
self._disallow_iteration()
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:498 _disallow_iteration
self._disallow_when_autograph_enabled("iterating over `tf.Tensor`")
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:474 _disallow_when_autograph_enabled
raise errors.OperatorNotAllowedInGraphError(
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
What I do not understand is how it comes to this error in the first place, as I successfully executed the VAE-part using TF2.2 without any error like this, and, more importantly, there is no iteration over any tensor obvious to me. Even if I commented out the for-loop in my train_step the same error occurs few lines later in the same context. I have also tried decorating the def train_step() with #tf.function, yet nothing changed.
The code I used is the following:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow.keras.backend as K
from keras.preprocessing.image import ImageDataGenerator
import itertools
import scipy.io
import matplotlib.pyplot as plt
import matplotlib.image as PIL
runOnGPU = 0
if runOnGPU==1:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
print(e)
else:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
path_clipped_train = os.path.join('./sinograms/clip')
path_transparent_train = os.path.join('./sinograms/transparent')
img_width, img_height = 512, 512
bottleneck = 1024 * 2
filters = (1024, 512, 256, 64)
filter_size = (3, 3, 3, 3)
batch_size = 4
epochs = 20
dsc_steps = 1
gp_w = 10.0
beta_v = 2
learning_rate = 125e-5
latent_dim = 2
input_shape = (1, img_width, img_height, 1)
dataset_gen1 = ImageDataGenerator(rescale=1 / 255, dtype="float32")
dataset_gen2 = ImageDataGenerator(rescale=1 / 255, dtype="float32")
gen_trans = dataset_gen1.flow_from_directory(path_transparent_train,
target_size=(img_width, img_height),
color_mode='grayscale',
classes=[''],
class_mode=None,
batch_size=batch_size,
shuffle=False,
)
gen_clip = dataset_gen2.flow_from_directory(path_clipped_train,
target_size=(img_width, img_height),
color_mode='grayscale',
classes=[''],
class_mode=None,
batch_size=batch_size,
shuffle=False,
)
class Sampling(layers.Layer):
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
def get_encoder():
encoder_inputs = keras.Input(shape=input_shape[1:], name="encoder_input")
enc = encoder_inputs
for (numFilters, szFilters) in zip(filters, filter_size):
enc = layers.Conv2D(numFilters, szFilters, activation='relu', strides=2, padding='same')(enc)
enc = layers.BatchNormalization()(enc)
enc = layers.Dropout(0.2)(enc)
conv_shape = K.int_shape(enc)[1:]
enc = layers.Flatten()(enc)
enc = layers.Dense(bottleneck, activation='relu', name="bottleneck")(enc)
enc = layers.BatchNormalization()(enc)
z_mean = layers.Dense(latent_dim, name="z_mean")(enc)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(enc)
latent_z = Sampling()([z_mean, z_log_var])
encoder_model = keras.models.Model(encoder_inputs, latent_z, name="encoder")
return encoder_model, conv_shape
enc_model, conv_shape = get_encoder()
enc_model.summary()
def get_decoder():
latent_input = keras.Input(shape=(latent_dim,))
dec = layers.Dense(conv_shape[0] * conv_shape[1] * conv_shape[2], activation='relu')(latent_input)
dec = layers.Reshape(conv_shape)(dec)
for (numFilters, szFilters) in zip(reversed(filters), reversed(filter_size)):
dec = layers.Conv2DTranspose(numFilters, szFilters, activation='relu', strides=2, padding='same')(dec)
dec = layers.BatchNormalization()(dec)
dec = layers.Dropout(0.2)(dec)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation='relu', padding='same')(dec)
decoder_model = keras.models.Model(latent_input, decoder_outputs, name="decoder")
return decoder_model
dec_model = get_decoder()
dec_model.summary()
def get_discriminator():
dscr_input = keras.Input(shape=input_shape[1:])
dscr = dscr_input
for numFilters in filters:
dscr = layers.Conv2D(numFilters, kernel_size=5, activation='relu', strides=2, padding='same')(dscr)
dscr = layers.Flatten()(dscr)
dscr = layers.Dense(1, activation="relu", name="dsc_end")(dscr)
discriminator_model = keras.models.Model(dscr_input, dscr, name="discriminator")
return discriminator_model
dsc_model = get_discriminator()
dsc_model.summary()
class GAN(keras.Model):
def __init__(self,
discriminator,
encoder,
decoder,
latent_dim,
dsc_steps=dsc_steps,
gp_w=gp_w,
):
super(GAN, self).__init__()
self.discriminator = discriminator
self.encoder = encoder
self.decoder = decoder
self.latent_dim = latent_dim
self.dsc_steps = dsc_steps
self.gp_w = gp_w
def compile(self,
dsc_optimizer, enc_optimizer, dec_optimizer,
dsc_loss_fn, enc_loss_fn, dec_loss_fn):
super(GAN, self).compile()
self.dsc_optimizer = dsc_optimizer
self.enc_optimizer = enc_optimizer
self.dec_optimizer = dec_optimizer
self.dsc_loss_fn = dsc_loss_fn
self.enc_loss_fn = enc_loss_fn
self.dec_loss_fn = dec_loss_fn
def call(self, data):
ds = self.discriminator(data)
e = self.encoder(data)
d = self.decoder(e)
def gradient_penalty(self, batch_size, ref_img, gen_img):
alpha = tf.random_normal([batch_size, 1, 1, 1], 0.0, 1.0)
diff = gen_img - ref_img
interpolated = ref_img + alpha * diff
with tf.GradientTape() as gp_tape:
gp_tape.watch(interpolated)
pred = self.discriminator(interpolated, training=True)
grads = gp_tape.gradient(pred, [interpolated])[0]
norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1, 2, 3]))
gp = tf.reduce_mean((norm - 1.0) ** 2)
return gp
#tf.function # doesn't make any difference if decorating with that
def train_step(self, data):
trans_img = data
clip_img = data
batch_size = tf.shape(trans_img)[:1]
for i in range(self.dsc_steps):
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(clip_img) # <------ ERROR HERE
gen_img = self.decoder(z)
gen_logits = self.discriminator(gen_img)
ref_logits = self.discriminator(trans_img)
dsc_cost = self.dsc_loss_fn(ref_img=ref_logits, gen_img=gen_logits)
gp = self.gradient_penalty(batch_size, trans_img, gen_img)
dsc_loss = dsc_cost + gp * self.gp_w
dsc_gradient = tape.gradient(dsc_loss, self.discriminator.trainable_variables)
self.dsc_optimizer.apply_gradients(zip(dsc_gradient, self.discriminator.trainable_variables))
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(clip_img) # <------ ERROR ALSO HERE IF dsc_steps = 0
gen_img = self.decoder(z)
gen_img_logits = self.discriminator(gen_img)
dec_loss = self.dec_loss_fn(gen_img_logits)
kl_loss = self.kl_loss(z_mean, z_log_var)
enc_gradient = tape.gradient(kl_loss, self.encoder.trainable_variables)
self.enc_optimizer.apply_gradients(zip(enc_gradient, self.encoder.trainable_variables))
dec_gradient = tape.gradient(dec_loss, self.decoder.trainable_variables)
self.dec_optimizer.apply_gradients(zip(dec_gradient, self.decoder.trainable_variables))
return {"dsc_loss": dsc_loss, "KL-Loss": kl_loss, "dec_loss": dec_loss}
class GANMonitor(keras.callbacks.Callback):
def __init__(self, num_img=6, latent_dim=latent_dim):
self.num_img = num_img
self.latent_dim = latent_dim
def on_epoch_end(self, epoch, logs=None):
generated_images = self.model.decoder()
generated_images = (generated_images * 127.5) + 127.5
for i in range(self.num_img):
img = generated_images[i].np()
img = keras.preprocessing.image.array_to_img(img)
img.save("generated_img_{i}_{epoch}.png".format(i=i, epoch=epoch))
encoder_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
decoder_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
discriminator_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
def discriminator_loss(real_img, fake_img):
real_loss = tf.reduce_mean(real_img)
fake_loss = tf.reduce_mean(fake_img)
return fake_loss - real_loss
def generator_loss(fake_img):
return -tf.reduce_mean(fake_img)
def kl_loss(z_mean, z_log_var):
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
return beta_v * kl_loss
def reconstruction_loss(data, reconstruction):
rec_loss = tf.reduce_mean(
tf.reduce_sum(keras.losses.mse(data, reconstruction), axis=(1, 2))
)
return rec_loss
callback = GANMonitor(num_img=3, latent_dim=latent_dim)
gan = GAN(
discriminator=dsc_model,
encoder=enc_model,
decoder=dec_model,
latent_dim=latent_dim,
dsc_steps=dsc_steps,
)
gan.compile(
dsc_optimizer=discriminator_optimizer,
enc_optimizer=encoder_optimizer,
dec_optimizer=decoder_optimizer,
dsc_loss_fn=discriminator_loss,
enc_loss_fn=kl_loss,
dec_loss_fn=generator_loss,
)
gan.fit(gen_trans, batch_size=batch_size, epochs=epochs, callbacks=[callback])
I'd be very thankful for any help because I haven't been able to find or work any solution to this. I've read that in TF2.5 some errors like this shouldn't occur anymore, but using TF2.5 is not an option.
I found the problem.
In the original Keras example for an VAE, the encoder ends in three layers, namely z_mean, z_log_var and latent_z. While it was possible to access all terminal layers of a model in TF 2.2, as I did in my train_step
z_mean, z_log_var, z = encoder(data)
only (lantent_) z is committed as defined in the encoder model initialization.
By defining the model with
encoder_model = keras.models.Model(encoder_inputs, ([z_mean, z_log_var, latent_z]), name="encoder")
with a list of output layers, all z* are accessible.
I assume that getting multiple variables from a single tensor with a singular output
x1, x2, x3 = model(data)
results in a loop that might look something like:
for i in x:
x{i} = model(data)
which is the only explaination for an iteration over a tensor I can think of.
However, reading code might be useful, I'll try to remember that.

Keras import model AttributeError: 'NoneType' object has no attribute 'op'

While trying to loading my trained Keras model for further inference with
self.model = tf.keras.models.load_model(filepath=weight_url, custom_objects={
'dice_coef': dice_coef,
'iou_coef': iou_coef,
'bce_dice_coef': bce_dice_coef,
})
I get the following error. Those three functions are the same I used to train my model, the weight URL is correct. The error message doesn't tell me anything.
file "/home/long/Desktop/bachelor-thesis/unet/pipeline.py", line 344, in <module>
binary.inference_blood_cell_unet()
File "/home/long/Desktop/bachelor-thesis/unet/pipeline.py", line 305, in inference_blood_cell_unet
self.blood_cell_unet = UNet(weight_url=self.blood_cell_final_name, class_weights=self.blut_koerperchen_classweights, num_classes=2)
File "/home/long/Desktop/bachelor-thesis/unet/model.py", line 39, in __init__
'bce_dice_coef': bce_dice_coef,
File "/home/long/Desktop/bachelor-thesis/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/save.py", line 146, in load_model
return hdf5_format.load_model_from_hdf5(filepath, custom_objects, compile)
File "/home/long/Desktop/bachelor-thesis/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/saving/hdf5_format.py", line 193, in load_model_from_hdf5
model._make_train_function()
File "/home/long/Desktop/bachelor-thesis/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 2116, in _make_train_function
params=self._collected_trainable_weights, loss=self.total_loss)
File "/home/long/Desktop/bachelor-thesis/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py", line 500, in get_updates
grads = self.get_gradients(loss, params)
File "/home/long/Desktop/bachelor-thesis/venv/lib/python3.7/site-packages/tensorflow_core/python/keras/optimizer_v2/optimizer_v2.py", line 391, in get_gradients
grads = gradients.gradients(loss, params)
File "/home/long/Desktop/bachelor-thesis/venv/lib/python3.7/site-packages/tensorflow_core/python/ops/gradients_impl.py", line 158, in gradients
unconnected_gradients)
File "/home/long/Desktop/bachelor-thesis/venv/lib/python3.7/site-packages/tensorflow_core/python/ops/gradients_util.py", line 550, in _GradientsHelper
gradient_uid)
File "/home/long/Desktop/bachelor-thesis/venv/lib/python3.7/site-packages/tensorflow_core/python/ops/gradients_util.py", line 166, in _DefaultGradYs
with _maybe_colocate_with(y.op, gradient_uid, colocate_gradients_with_ops):
AttributeError: 'NoneType' object has no attribute 'op'
Following is my code how the model is built:
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, LeakyReLU, Dropout, MaxPooling2D, Conv2DTranspose
from tensorflow.keras.layers import concatenate
from tensorflow.keras.optimizers import Adam
from unet.metrics import dice_coef, iou_coef, cce_dice_coef, weighted_loss, bce_dice_coef
fil_coef = 4
class UNet(UNetBase, DataHandlingBase):
def __init__(self,
class_weights,
weight_url=None,
width=256,
height=256,
channel=3,
learning_rate=0.0005,
num_classes=2,
alpha=0.01,
dropout_rate=0.25):
super().__init__(weight_url, width, height, channel, learning_rate, num_classes, alpha, dropout_rate)
self.class_weights = class_weights
if self.num_classes > 2:
self.loss_function = weighted_loss(cce_dice_coef, weights_list=self.class_weights)
elif self.num_classes == 2:
self.loss_function = bce_dice_coef
else:
raise Exception("Invalid num class: ", self.num_classes)
if weight_url:
self.model = tf.keras.models.load_model(filepath=weight_url, custom_objects={
'dice_coef': dice_coef,
'iou_coef': iou_coef,
'bce_dice_coef': bce_dice_coef,
'loss_function': self.loss_function
})
else:
model_input = Input((self.height, self.width, self.channel))
c1 = Conv2D(filters=16 * fil_coef, kernel_size=(3, 3), padding='same')(model_input)
c1 = BatchNormalization()(c1)
c1 = LeakyReLU(self.alpha)(c1)
c1 = Dropout(self.dropout_rate)(c1)
c1 = Conv2D(filters=16 * fil_coef, kernel_size=(3, 3), padding='same')(c1)
...
c9 = BatchNormalization()(c9)
c9 = LeakyReLU(self.alpha)(c9)
c9 = Dropout(self.dropout_rate)(c9)
if self.num_classes == 2:
output = Conv2D(1, kernel_size=(1, 1), activation='sigmoid')(c9)
else:
output = Conv2D(self.num_classes, kernel_size=(1, 1), activation='softmax')(c9)
self.model = tf.keras.Model(inputs=[model_input], outputs=[output])
self.reinitialize()
def reinitialize(self):
if self.num_classes > 2:
self.model.compile(optimizer=Adam(lr=self.learning_rate),
loss=self.loss_function,
metrics=[dice_coef, iou_coef])
else:
self.model.compile(optimizer=Adam(lr=self.learning_rate),
loss=self.loss_function,
metrics=[dice_coef, iou_coef],
class_weight=self.class_weights)
def fit(self, X, Y, x=None, y=None,
batch_size=params["batch_size"],
epochs=params["epochs"],
validation_split=params["validation_split"],
checkpoint_path="temp/models/model-{epoch:02d}-{val_loss:.2f}.h5",
patience=params["patience"],
min_delta=params["min_delta"]):
checkpoint = ModelCheckpoint(checkpoint_path,
verbose=0,
save_best_only=False,
monitor='val_loss',
mode='auto')
if x is None and y is None:
history = self.model.fit(X, Y,
validation_split=validation_split,
batch_size=batch_size,
epochs=epochs,
shuffle=True,
callbacks=[checkpoint])
return history
elif x is not None and y is not None:
history = self.model.fit(X, Y,
verbose=0,
steps_per_epoch=X.shape[0] // batch_size,
validation_data=[x, y],
validation_steps=x.shape[0] // batch_size,
batch_size=batch_size,
epochs=epochs,
shuffle=True)
return history
def save_weights(self, url):
self.model.save(url)
def predict(self, X):
return self.model.predict(X)
def summary(self):
log.info(f"Learning rate {self.learning_rate} Alpha {self.alpha} Dropout Rate {self.dropout_rate}")
self.model.summary()
And here are how my loss functions coded:
import tensorflow as tf
import tensorflow.keras.backend as K
def weighted_loss(original_loss_function, weights_list):
"""
Author: https://stackoverflow.com/questions/51793737/custom-loss-function-for-u-net-in-keras-using-class-weights-class-weight-not
"""
def loss_function(true, pred):
axis = -1 # if channels last
# axis= 1 #if channels first
# argmax returns the index of the element with the greatest value
# done in the class axis, it returns the class index
class_selectors = tf.cast(K.argmax(true, axis=axis), tf.int32)
# if your loss is sparse, use only true as classSelectors
# considering weights are ordered by class, for each class
# true(1) if the class index is equal to the weight index
class_selectors = [K.equal(i, class_selectors) for i in range(len(weights_list))]
# casting boolean to float for calculations
# each tensor in the list contains 1 where ground true class is equal to its index
# if you sum all these, you will get a tensor full of ones.
class_selectors = [K.cast(x, K.floatx()) for x in class_selectors]
# for each of the selections above, multiply their respective weight
weights = [sel * w for sel, w in zip(class_selectors, weights_list)]
# sums all the selections
# result is a tensor with the respective weight for each element in predictions
weight_multiplier = weights[0]
for i in range(1, len(weights)):
weight_multiplier = weight_multiplier + weights[i]
# make sure your originalLossFunc only collapses the class axis
# you need the other axes intact to multiply the weights tensor
loss = original_loss_function(true, pred)
loss = loss * weight_multiplier
return loss
return loss_function
#tf.function
def cce_iou_coef(y_true, y_pred, smooth=1, cat_weight=1, iou_weight=1):
return cat_weight * K.categorical_crossentropy(y_true, y_pred) + iou_weight * log_iou_coef(y_true, y_pred, smooth)
#tf.function
def cce_dice_coef(y_true, y_pred, smooth=1, cat_weight=1, dice_weight=1):
return cat_weight * K.categorical_crossentropy(y_true, y_pred) + dice_weight * log_dice_coef(y_true, y_pred, smooth)
#tf.function
def bce_iou_coef(y_true, y_pred, smooth=1, cat_weight=1, iou_weight=1):
return cat_weight * K.binary_crossentropy(y_true, y_pred) + iou_weight * log_iou_coef(y_true, y_pred, smooth)
#tf.function
def bce_dice_coef(y_true, y_pred, smooth=1, cat_weight=1, dice_weight=1):
return cat_weight * K.binary_crossentropy(y_true, y_pred) + dice_weight * log_dice_coef(y_true, y_pred, smooth)
#tf.function
def log_iou_coef(y_true, y_pred, smooth=1):
return - K.log(iou_coef(y_true, y_pred, smooth))
#tf.function
def log_dice_coef(y_true, y_pred, smooth=1):
return -K.log(dice_coef(y_true, y_pred, smooth))
#tf.function
def iou_coef(y_true, y_pred, smooth=1):
intersection = K.sum(K.abs(y_true * y_pred), axis=-1)
union = K.sum(y_true, axis=-1) + K.sum(y_pred, axis=-1) - intersection
iou = K.mean((intersection + smooth) / (union + smooth), axis=0)
return iou
#tf.function
def dice_coef(y_true, y_pred, smooth=1):
intersection = K.sum(y_true * y_pred, axis=-1)
union = K.sum(y_true, axis=-1) + K.sum(y_pred, axis=-1)
dice = K.mean((2. * intersection + smooth) / (union + smooth), axis=0)
return dice
The error happens after I trained a binary model, saved it and loads it again for inference. As you can see the loss functions imported for inference are the same for the training phase. So I don't really understand why I get the error. And I don't understand what the error means, either.
You can try with compile = False parameter in your load_model call. This will remove any metadata related to optimizers and loss function and since you have a reinitialize function and if you don't need to train it from where you stopped last time, that won't be a problem I guess.
As you said it works with compile = False, I think the problem might be in your custom functions for loss/optimizer etc, I can't pin point what exactly is the problem, you can try tf. instead of tf.keras.backend. if you want.

Categories

Resources