Error in using model.fit in keras with custom train_step

Error in using model.fit in keras with custom train_step - python

BATCH_SIZE =32
dataset = tf.data.Dataset.from_tensor_slices((question, ans)).shuffle(1000)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)
class EncoderDecoder(tf.keras.Model):
def __init__(self,vocab_input=1000,vocab_output=1000,BATCH_SIZE=32):
super().__init__()
#Encoder
self.encoder_vector = tokens.TextVectorization(vocab_input,output_sequence_length=24)
self.encoder_embedding = tf.keras.layers.Embedding(vocab_input,256)
self.encoder_lstm = tf.keras.layers.LSTM(512,return_sequences=True)
self.attention = tf.keras.layers.Attention()
#Decoder
self.decoder_vector = tokens.TextVectorization(vocab_output,output_sequence_length=20)
self.decoder_embedding = tf.keras.layers.Embedding(vocab_output,256)
self.fc = tf.keras.layers.Dense(vocab_output)
self.decoder_lstm = tf.keras.layers.LSTM(512,return_sequences=True)
def loss_function(real, pred):
mask = tf.math.logical_not(tf.math.equal(real, 0))
loss_ = loss_object(real, pred)
mask = tf.cast(mask, dtype=loss_.dtype)
loss_ *= mask
return tf.reduce_mean(loss_)
def train_step(self,data):
loss =0
input = p[0]
targ = p[1]
input = self.encoder_vector(input)
targ = self.decoder_vector(targ)
with tf.GradientTape() as tape:
input = self.encoder_embedding(input)
enc_output,enc_h,enc_c = self.encoder_lstm(input)
attn_output = self.attention([enc_output,enc_h])
dec_h = tf.concat([tf.expand_dims(attn_output, 1), dec_h], axis=-1)
dec_input = tf.expand_dims([self.decoder_vector(['SOS'])]*BATCH_SIZE,1)
predictions = []
for t in range(1,targ.shape[1]):
dec_input = self.decoder_embedding(dec_input)
dec_output,dec_h,dec_c = self.decoder_lstm(dec_input,initial_states=[dec_h,enc_c])
predictions = self.fc(dec_output)
loss+=loss_function(targ[:,t],predictions)
dec_input = tf.expand_dims(targ[:, t], 1)
gradients = tape.gradient(loss,trainable_variables)
self.optimezer.apply_gradients(zip(gradients,trainable_variables))
self.compiled_metrics.update_state(targ,predictions)
return {m.name:m.result() for m in self.metrics}
model = EncoderDecoder()
model.compile(optimizer = 'adam',metrics = [tf.keras.metrics.SparseCategoricalCrossentropy()])
model.fit(dataset,epochs=10)
Epoch 1/10
---------------------------------------------------------------------------
StagingError Traceback (most recent call last)
<ipython-input-153-573fadf7e010> in <module>()
----> 1 model.fit(dataset,epochs=10)
10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
966 except Exception as e: # pylint:disable=broad-except
967 if hasattr(e, "ag_error_metadata"):
--> 968 raise e.ag_error_metadata.to_exception(e)
969 else:
970 raise
StagingError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
<ipython-input-151-46787da81d42>:33 train_step *
input = self.encoder_vector(input)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:897 __call__ **
self._maybe_build(inputs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py:2416 _maybe_build
self.build(input_shapes) # pylint:disable=not-callable
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/preprocessing/text_vectorization.py:528 build
if self._split is not None and not input_shape[1] == 1: # pylint: disable=g-comparison-negation
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/tensor_shape.py:870 __getitem__
return self._dims[key].value
IndexError: list index out of range
I am trying to override the keras model train_step in my custom class.
question is set of English text questions and ans is their respective answers. They are of random length and are not padded as TextVectorization will add padding. So dataset contains question answer pair. Now I batch this and use model.fit.
The error is when assigning input = data[0] and targ = data[1] in train_step. I need help on how to assign the input and output in the train step for a batch.

Related

Layer "triplet_snn" expects 3 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(1, 224, 224, 3)

I just need a little help regarding my project. I've already written a code but I am facing an error in that. I am using a few-shot learning technique triplet neural network. The triplet neural network(TNN) is a horizontal concatenation triplet consisting of three identical Convolutional Neural Networks (with common parameters) that are trained with triplets of inputs. An anchor instance, a positive instance (of the same class as the anchor), and a negative instance make up the input triplet (different class from the anchor). After that, the network is trained to learn a triplet loss embedding function. To compute triplet loss, three training examples are required. Each triplet is formed by intentionally selecting training examples such that each triplet has:
• a reference image called anchor image
• an image having the same label as anchor is called a positive image
• an image has a different label than the anchor called a negative image.
The TNN learns to create k-dimensional feature vector representation of images in such a manner that similar images lie closer in an embedding space of k-dimensions.
embedding_dimension = 128
from tensorflow.keras.applications.vgg16 import VGG16
pre_trained_vgg16 = tf.keras.applications.VGG16(
input_shape=(size, size,3),
include_top=False,
weights="imagenet",
input_tensor=None,
pooling=None,
classes=1000,
classifier_activation=None
)
pre_trained_vgg16.save('vgg16.h5')
pre_trained_vgg16 = tf.keras.models.load_model('vgg16.h5')
def build_embedding_network(embedding_dimension):
embedding_network = pre_trained_vgg16
embedding_network.trainable = False
x = embedding_network.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(2*embedding_dimension, activation='sigmoid')(x)
x = tf.keras.layers.Dense(embedding_dimension, activation='sigmoid')(x)
embedding_network = tf.keras.Model(embedding_network.input, x, name="embedding_network")
return embedding_network
def build_metric_network(single_embedding_dim):
input1 = tf.keras.layers.Input((single_embedding_dim), name="input1")
input2 = tf.keras.layers.Input((single_embedding_dim), name="input2")
embedded_distance = tf.keras.layers.Subtract(name='subtract_embeddings')([input1, input2])
embedded_distance = tf.keras.layers.Lambda(lambda x: K.sqrt(K.sum(K.square(x), axis=-1, keepdims=True)),
name='euclidean_distance')(embedded_distance)
metric_network = tf.keras.Model(inputs=[input1, input2],
outputs=[embedded_distance],
name="metric_network")
return metric_network
class TripletLossLayer(tf.keras.layers.Layer):
def __init__(self, margin, **kwargs):
self.margin = margin
super(TripletLossLayer, self).__init__(**kwargs)
def triplet_loss(self, inputs):
ap_dist, an_dist = inputs
#square
ap_dist2 = K.square(ap_dist)
an_dist2 = K.square(an_dist)
return K.sum(K.maximum(ap_dist2 - an_dist2 + self.margin, 0))
def call(self, inputs):
loss = self.triplet_loss(inputs)
self.add_loss(loss)
return loss
def get_config(self):
config = super().get_config().copy()
config.update({
'margin': self.margin
})
return config
def build_triplet_snn(input_shape, embedding_network, metric_network, margin=0.1):
# Define the tensors for the three input images
anchor_input = tf.keras.layers.Input(input_shape, name="anchor_input")
positive_input = tf.keras.layers.Input(input_shape, name="positive_input")
negative_input = tf.keras.layers.Input(input_shape, name="negative_input")
# Generate the embeddings (feature vectors) for the three images
embedding_a = embedding_network(anchor_input)
embedding_p = embedding_network(positive_input)
embedding_n = embedding_network(negative_input)
ap_dist = metric_network([embedding_a,embedding_p])
an_dist = metric_network([embedding_a,embedding_n])
# Triplet loss layer
loss_layer = TripletLossLayer(margin=margin, name='TripletLossLayer')([ap_dist, an_dist])
# Compute the concatenated pairs
all_concatenated = tf.keras.layers.Concatenate(axis=-1,name="All-Embeddings")([embedding_a,embedding_p,embedding_n])
# Connect the inputs with the outputs
triplet_snn = tf.keras.Model(inputs=[anchor_input, positive_input, negative_input],
outputs=[loss_layer, all_concatenated],
name="triplet_snn")
# Return the model
return triplet_snn
embedding_network = build_embedding_network(embedding_dimension)
metric_network = build_metric_network(embedding_dimension)
triplet_snn = build_triplet_snn(
input_shape=(size, size,3),
embedding_network=embedding_network,
metric_network=metric_network,
margin=0.1
)
learning_rate = 0.0001
epochs = 5
class TripletDataGenerator(tf.keras.utils.Sequence):
def __init__(self, triplet_dataset, shuffle=False):
self.triplet_dataset = triplet_dataset
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
return len(self.triplet_dataset)
def __getitem__(self, index):
return triplet_dataset[index][0]
#return (np.array(triplet_dataset[index][0]).reshape(1,224,224,3))
def on_epoch_end(self):
if self.shuffle == True:
random.shuffle(self.triplet_dataset)
data_gen = TripletDataGenerator(triplet_dataset)
filepath = 'C:\\Users\\Y540\\Desktop\\Retinal Disease\\TrainedSNN\\temp\\weights.{epoch}'
save_model_weights_at_every_epoch = tf.keras.callbacks.ModelCheckpoint(
filepath,
monitor="loss",
verbose=1,
save_best_only=False,
save_weights_only=True,
mode="auto",
save_freq="epoch"
)
optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
triplet_snn.compile(loss=None, optimizer=optimizer, run_eagerly=True)
%%time
history = triplet_snn.fit(data_gen, epochs=epochs, verbose=1, callbacks=[save_model_weights_at_every_epoch])
The error I am getting is:
this is the error that I am getting
Epoch 1/5
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<timed exec> in <module>
~\anaconda3\lib\site-packages\keras\utils\traceback_utils.py in error_handler(*args, **kwargs)
65 except Exception as e: # pylint: disable=broad-except
66 filtered_tb = _process_traceback_frames(e.__traceback__)
---> 67 raise e.with_traceback(filtered_tb) from None
68 finally:
69 del filtered_tb
~\anaconda3\lib\site-packages\keras\engine\input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
198
199 if len(inputs) != len(input_spec):
--> 200 raise ValueError(f'Layer "{layer_name}" expects {len(input_spec)} input(s),'
201 f' but it received {len(inputs)} input tensors. '
202 f'Inputs received: {inputs}')
ValueError: Layer "triplet_snn" expects 3 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(1, 224, 224, 3), dtype=float32, numpy=

Torch Geometric - RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1024)

I am trying to create a GNN that models a protein. However, I am running into an error with GraphConv (I get the same error with GCNConv). I do not understand why I am getting this error when the shapes should be able to be multiplied. I think the error must have something to do with the custom dataset I created, but I am not 100% sure. Please let me know if you have had a similar issue or know how to fix this. Thank you.
EDIT: Even if I change embedding_size to 1479, I still get: RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1479).
Custom dataset:
class ProteinDataset(geom_data.Dataset):
def __init__(self, root, transform=None, pre_transform=None):
# root = where data set is stored
super(ProteinDataset, self).__init__(root, transform, pre_transform)
self.root = root
#property
def raw_file_names(self):
return os.listdir(f'{self.root}/raw')
#property
def processed_file_names(self):
inxs = []
for pdb in self.raw_paths:
inxs.append(pdb.split('/')[-1].split('.p')[0])
return [f'{i}.pt' for i in inxs]
def download(self):
pass
def process(self):
for pdb in self.raw_paths:
try:
mol_obj = Chem.rdmolfiles.MolFromPDBFile(pdb)
except AttributeError:
os.remove(pdb)
continue
# Get node features
node_feats = self._get_node_features(mol_obj).reshape([-1,1])
# Get edge features
edge_feats = self._get_edge_features(mol_obj).reshape([-1,1])
# Get adjacency info
edge_index = self._get_adjacency_info(mol_obj)
label = self._get_labels(pdb)
# Create Data object
data = geom_data.Data(x=node_feats,
edge_index=edge_index,
edge_attr=edge_feats,
y=label)
i = pdb.split('/')[-1].split('.p')[0]
torch.save(data, os.path.join(self.processed_dir,f'{i}.pt'))
def _get_node_features(self, mol):
all_node_feats = []
for atom in mol.GetAtoms():
all_node_feats.append(atom.GetMass())
all_node_feats = np.asarray(all_node_feats)
return torch.tensor(all_node_feats, dtype=torch.float)
def _get_edge_features(self, mol):
all_edge_feats = []
dists = Chem.rdmolops.Get3DDistanceMatrix(mol)
# CA-CA Distances
for bond in mol.GetBonds():
begin = bond.GetBeginAtomIdx()
end = bond.GetEndAtomIdx()
all_edge_feats.append(dists[begin,end])
all_edge_feats = np.asarray(all_edge_feats)
return torch.tensor(all_edge_feats, dtype=torch.float)
def _get_adjacency_info(self, mol):
adj_matrix = Chem.rdmolops.GetAdjacencyMatrix(mol)
row, col = np.where(adj_matrix)
coo = np.array(list(zip(row, col)))
coo = np.reshape(coo, (2, -1))
return torch.tensor(coo, dtype=torch.long)
def _get_labels(self, fn):
with open(fn, 'r') as f:
label = float(f.readline())
f.close()
label = np.asarray([label])
return torch.tensor(label, dtype=torch.float)
def len(self):
return len(self.raw_paths)
def get(self, inx):
data = torch.load(self.processed_paths[inx])
return data
Model:
class GNN(torch.nn.Module):
def __init__(self, feature_size):
super(GNN, self).__init__()
embedding_size = 1024
# GNN Layers
self.conv1 = GraphConv(feature_size, embedding_size)
self.head1 = Linear(embedding_size*3, embedding_size)
self.pool1 = TopKPooling(embedding_size, ratio=0.8)
self.conv2 = GraphConv(embedding_size, embedding_size)
self.head2 = Linear(embedding_size*3, embedding_size)
self.pool2 = TopKPooling(embedding_size, ratio=0.5)
self.conv3 = GraphConv(embedding_size, embedding_size)
self.head3 = Linear(embedding_size*3, embedding_size)
self.pool3 = TopKPooling(embedding_size, ratio=0.2)
# Linear Layers
self.fc1 = Linear(embedding_size*2, 1024)
self.fc2 = Linear(1024, 128)
self.fc3 = Linear(128, 1)
def forward(self, x, edge_attr, edge_index, batch_index):
# First block
x = self.conv1(x, edge_index).relu()
x = self.head1(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool1(x,
edge_index,
None,
batch_index)
x1 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Second block
x = self.conv2(x, edge_index).relu()
x = self.head2(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool2(x,
edge_index,
None,
batch_index)
x2 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Third block
x = self.conv3(x, edge_index).relu()
x = self.head3(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool3(x,
edge_index,
None,
batch_index)
x3 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Concat pooled vectors
x = x1 + x2 + x3
# Apply Linear Layers
x = self.fc1(x).relu()
x = self.fc2(x).relu()
x = self.fc3(x)
return x
Training:
device = torch.device('cuda')
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
# Loading the dataset
train_set = ProteinDataset(root='data/lys50_2/train')
test_set = ProteinDataset(root='data/lys50_2/test')
print('Shape of input:', train_set[0].x.shape[0])
# Loading the model
model = GNN(feature_size=train_set[0].x.shape[0])
model = model.to(device)
print(f'Number of parameters: {count_parameters(model)}')
print(model)
# Loss and Optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
print(optimizer)
# Prepare for training
train_loader = DataLoader(train_set, batch_size=1, shuffle=True)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
def train(m,opt):
loss_sum = 0.0
for _, batch in enumerate(train_loader):
# Use GPU
batch.to(device)
# Reset grad
opt.zero_grad()
# Pass node features and connections
pred = m(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculate loss and gradients
loss = loss_fn(pred, batch.y)
loss.backward()
loss_sum += loss.item()
# Update using the gradients
opt.step()
return loss_sum / len(train_loader)
def validate(m):
loss_sum = 0.0
for _, batch in enumerate(test_loader):
for _, batch in enumerate(test_loader):
# Use GPU
batch.to(device)
# No grad
with torch.no_grad():
pred = m(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculate loss and gradients
loss = loss_fn(pred, batch.y)
loss_sum += loss.item()
return loss_sum / len(test_loader)
model.zero_grad()
optimizer.zero_grad()
# Loop for training
for i in range(101):
loss = train(model,optimizer)
if (i%10==0):
loss_v = validate(model)
print(i, loss, loss_v)
else:
print(i, loss)
Error when running training:
Traceback (most recent call last):
File "/home/spencer/sh3/gnn/./train.py", line 79, in <module>
loss = train(model,optimizer)
File "/home/spencer/sh3/gnn/./train.py", line 44, in train
pred = m(batch.x.float(),
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/feig/s1/spencer/sh3/gnn/model2.py", line 32, in forward
x = self.conv1(x, edge_index).relu()
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/conv/graph_conv.py", line 71, in forward
out = self.lin_rel(out)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/dense/linear.py", line 109, in forward
return F.linear(x, self.weight, self.bias)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/functional.py", line 1848, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1024)

The error tells you that input shapes don't match.
You can reshape the input in the forward method like this: x = x.view(1, 1479) but make sure that this is what you need - this error usually indicates wrongly shaped dataset or passing the wrong input.

"Got OperatorNotAllowedInGraphError: iterating over tf.Tensor" while not obviously iterating over tensor

OS: Manjaro Linux x64
CUDA: 11.0.3
TF/Keras: 2.4(.1)
Py: 3.8
Hello,
I'm trying to build some kind of W-VAE-GAN. I keep un running into that very same error over and over again and I already had that problem with Keras 2.3, interestingly NOT using TF/K 2.2. Unfortunately I need to use Keras 2.4 because I'm supposed to run my code on our university server with these exact TF/K and CUDA-Versions. At this point I'm just trying to make sure my code works as intended.
The error I get is the following, with the exact lines commented in my code:
...
Epoch 1/20
Traceback (most recent call last):
File "/home/peer/Programmierkram/BA/Metal_GAN/wvaegan.py", line 282, in <module>
gan.fit(gen_trans, batch_size=batch_size, epochs=epochs, callbacks=[callback])
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 1100, in fit
tmp_logs = self.train_function(iterator)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 828, in __call__
result = self._call(*args, **kwds)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 871, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 725, in _initialize
self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 2969, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3361, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3196, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 990, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 634, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 977, in wrapper
raise e.ag_error_metadata.to_exception(e)
tensorflow.python.framework.errors_impl.OperatorNotAllowedInGraphError: in user code:
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/home/peer/Programmierkram/BA/Metal_GAN/wvaegan.py:190 train_step
z_mean, z_log_var, z = self.encoder(clip_img) # <------ WHY NO ERROR HERE?
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:505 __iter__
self._disallow_iteration()
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:498 _disallow_iteration
self._disallow_when_autograph_enabled("iterating over `tf.Tensor`")
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:474 _disallow_when_autograph_enabled
raise errors.OperatorNotAllowedInGraphError(
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
What I do not understand is how it comes to this error in the first place, as I successfully executed the VAE-part using TF2.2 without any error like this, and, more importantly, there is no iteration over any tensor obvious to me. Even if I commented out the for-loop in my train_step the same error occurs few lines later in the same context. I have also tried decorating the def train_step() with #tf.function, yet nothing changed.
The code I used is the following:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow.keras.backend as K
from keras.preprocessing.image import ImageDataGenerator
import itertools
import scipy.io
import matplotlib.pyplot as plt
import matplotlib.image as PIL
runOnGPU = 0
if runOnGPU==1:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
print(e)
else:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
path_clipped_train = os.path.join('./sinograms/clip')
path_transparent_train = os.path.join('./sinograms/transparent')
img_width, img_height = 512, 512
bottleneck = 1024 * 2
filters = (1024, 512, 256, 64)
filter_size = (3, 3, 3, 3)
batch_size = 4
epochs = 20
dsc_steps = 1
gp_w = 10.0
beta_v = 2
learning_rate = 125e-5
latent_dim = 2
input_shape = (1, img_width, img_height, 1)
dataset_gen1 = ImageDataGenerator(rescale=1 / 255, dtype="float32")
dataset_gen2 = ImageDataGenerator(rescale=1 / 255, dtype="float32")
gen_trans = dataset_gen1.flow_from_directory(path_transparent_train,
target_size=(img_width, img_height),
color_mode='grayscale',
classes=[''],
class_mode=None,
batch_size=batch_size,
shuffle=False,
)
gen_clip = dataset_gen2.flow_from_directory(path_clipped_train,
target_size=(img_width, img_height),
color_mode='grayscale',
classes=[''],
class_mode=None,
batch_size=batch_size,
shuffle=False,
)
class Sampling(layers.Layer):
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
def get_encoder():
encoder_inputs = keras.Input(shape=input_shape[1:], name="encoder_input")
enc = encoder_inputs
for (numFilters, szFilters) in zip(filters, filter_size):
enc = layers.Conv2D(numFilters, szFilters, activation='relu', strides=2, padding='same')(enc)
enc = layers.BatchNormalization()(enc)
enc = layers.Dropout(0.2)(enc)
conv_shape = K.int_shape(enc)[1:]
enc = layers.Flatten()(enc)
enc = layers.Dense(bottleneck, activation='relu', name="bottleneck")(enc)
enc = layers.BatchNormalization()(enc)
z_mean = layers.Dense(latent_dim, name="z_mean")(enc)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(enc)
latent_z = Sampling()([z_mean, z_log_var])
encoder_model = keras.models.Model(encoder_inputs, latent_z, name="encoder")
return encoder_model, conv_shape
enc_model, conv_shape = get_encoder()
enc_model.summary()
def get_decoder():
latent_input = keras.Input(shape=(latent_dim,))
dec = layers.Dense(conv_shape[0] * conv_shape[1] * conv_shape[2], activation='relu')(latent_input)
dec = layers.Reshape(conv_shape)(dec)
for (numFilters, szFilters) in zip(reversed(filters), reversed(filter_size)):
dec = layers.Conv2DTranspose(numFilters, szFilters, activation='relu', strides=2, padding='same')(dec)
dec = layers.BatchNormalization()(dec)
dec = layers.Dropout(0.2)(dec)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation='relu', padding='same')(dec)
decoder_model = keras.models.Model(latent_input, decoder_outputs, name="decoder")
return decoder_model
dec_model = get_decoder()
dec_model.summary()
def get_discriminator():
dscr_input = keras.Input(shape=input_shape[1:])
dscr = dscr_input
for numFilters in filters:
dscr = layers.Conv2D(numFilters, kernel_size=5, activation='relu', strides=2, padding='same')(dscr)
dscr = layers.Flatten()(dscr)
dscr = layers.Dense(1, activation="relu", name="dsc_end")(dscr)
discriminator_model = keras.models.Model(dscr_input, dscr, name="discriminator")
return discriminator_model
dsc_model = get_discriminator()
dsc_model.summary()
class GAN(keras.Model):
def __init__(self,
discriminator,
encoder,
decoder,
latent_dim,
dsc_steps=dsc_steps,
gp_w=gp_w,
):
super(GAN, self).__init__()
self.discriminator = discriminator
self.encoder = encoder
self.decoder = decoder
self.latent_dim = latent_dim
self.dsc_steps = dsc_steps
self.gp_w = gp_w
def compile(self,
dsc_optimizer, enc_optimizer, dec_optimizer,
dsc_loss_fn, enc_loss_fn, dec_loss_fn):
super(GAN, self).compile()
self.dsc_optimizer = dsc_optimizer
self.enc_optimizer = enc_optimizer
self.dec_optimizer = dec_optimizer
self.dsc_loss_fn = dsc_loss_fn
self.enc_loss_fn = enc_loss_fn
self.dec_loss_fn = dec_loss_fn
def call(self, data):
ds = self.discriminator(data)
e = self.encoder(data)
d = self.decoder(e)
def gradient_penalty(self, batch_size, ref_img, gen_img):
alpha = tf.random_normal([batch_size, 1, 1, 1], 0.0, 1.0)
diff = gen_img - ref_img
interpolated = ref_img + alpha * diff
with tf.GradientTape() as gp_tape:
gp_tape.watch(interpolated)
pred = self.discriminator(interpolated, training=True)
grads = gp_tape.gradient(pred, [interpolated])[0]
norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1, 2, 3]))
gp = tf.reduce_mean((norm - 1.0) ** 2)
return gp
#tf.function # doesn't make any difference if decorating with that
def train_step(self, data):
trans_img = data
clip_img = data
batch_size = tf.shape(trans_img)[:1]
for i in range(self.dsc_steps):
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(clip_img) # <------ ERROR HERE
gen_img = self.decoder(z)
gen_logits = self.discriminator(gen_img)
ref_logits = self.discriminator(trans_img)
dsc_cost = self.dsc_loss_fn(ref_img=ref_logits, gen_img=gen_logits)
gp = self.gradient_penalty(batch_size, trans_img, gen_img)
dsc_loss = dsc_cost + gp * self.gp_w
dsc_gradient = tape.gradient(dsc_loss, self.discriminator.trainable_variables)
self.dsc_optimizer.apply_gradients(zip(dsc_gradient, self.discriminator.trainable_variables))
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(clip_img) # <------ ERROR ALSO HERE IF dsc_steps = 0
gen_img = self.decoder(z)
gen_img_logits = self.discriminator(gen_img)
dec_loss = self.dec_loss_fn(gen_img_logits)
kl_loss = self.kl_loss(z_mean, z_log_var)
enc_gradient = tape.gradient(kl_loss, self.encoder.trainable_variables)
self.enc_optimizer.apply_gradients(zip(enc_gradient, self.encoder.trainable_variables))
dec_gradient = tape.gradient(dec_loss, self.decoder.trainable_variables)
self.dec_optimizer.apply_gradients(zip(dec_gradient, self.decoder.trainable_variables))
return {"dsc_loss": dsc_loss, "KL-Loss": kl_loss, "dec_loss": dec_loss}
class GANMonitor(keras.callbacks.Callback):
def __init__(self, num_img=6, latent_dim=latent_dim):
self.num_img = num_img
self.latent_dim = latent_dim
def on_epoch_end(self, epoch, logs=None):
generated_images = self.model.decoder()
generated_images = (generated_images * 127.5) + 127.5
for i in range(self.num_img):
img = generated_images[i].np()
img = keras.preprocessing.image.array_to_img(img)
img.save("generated_img_{i}_{epoch}.png".format(i=i, epoch=epoch))
encoder_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
decoder_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
discriminator_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
def discriminator_loss(real_img, fake_img):
real_loss = tf.reduce_mean(real_img)
fake_loss = tf.reduce_mean(fake_img)
return fake_loss - real_loss
def generator_loss(fake_img):
return -tf.reduce_mean(fake_img)
def kl_loss(z_mean, z_log_var):
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
return beta_v * kl_loss
def reconstruction_loss(data, reconstruction):
rec_loss = tf.reduce_mean(
tf.reduce_sum(keras.losses.mse(data, reconstruction), axis=(1, 2))
)
return rec_loss
callback = GANMonitor(num_img=3, latent_dim=latent_dim)
gan = GAN(
discriminator=dsc_model,
encoder=enc_model,
decoder=dec_model,
latent_dim=latent_dim,
dsc_steps=dsc_steps,
)
gan.compile(
dsc_optimizer=discriminator_optimizer,
enc_optimizer=encoder_optimizer,
dec_optimizer=decoder_optimizer,
dsc_loss_fn=discriminator_loss,
enc_loss_fn=kl_loss,
dec_loss_fn=generator_loss,
)
gan.fit(gen_trans, batch_size=batch_size, epochs=epochs, callbacks=[callback])
I'd be very thankful for any help because I haven't been able to find or work any solution to this. I've read that in TF2.5 some errors like this shouldn't occur anymore, but using TF2.5 is not an option.

I found the problem.
In the original Keras example for an VAE, the encoder ends in three layers, namely z_mean, z_log_var and latent_z. While it was possible to access all terminal layers of a model in TF 2.2, as I did in my train_step
z_mean, z_log_var, z = encoder(data)
only (lantent_) z is committed as defined in the encoder model initialization.
By defining the model with
encoder_model = keras.models.Model(encoder_inputs, ([z_mean, z_log_var, latent_z]), name="encoder")
with a list of output layers, all z* are accessible.
I assume that getting multiple variables from a single tensor with a singular output
x1, x2, x3 = model(data)
results in a loop that might look something like:
for i in x:
x{i} = model(data)
which is the only explaination for an iteration over a tensor I can think of.
However, reading code might be useful, I'll try to remember that.

'ValueError : No gradients provided for any variable' in Tensorflow 2.2.0

I am fine-tuning a BERT model from huggingface transformers for Named Entity Recognition Task. The input to the model is a single word and output is a tag of that word. I have created a custom generator function (data_generator) from where I am getting data while training. I have freezed the bert layer in training mode and added some layers on top of it to predict the tag of the given word.
The code is this:
from tensorflow.keras.layers import Input, Dense, Activation, Dropout, LSTM, GlobalMaxPool1D
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from transformers import BertTokenizer, TFBertModel, BertConfig
# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
bert = 'bert-base-uncased'
config = BertConfig(dropout=0.2, attention_dropout=0.2)
config.output_hidden_states = False
transformer_model = TFBertModel.from_pretrained(bert, config = config)
input_ids_in = Input(shape=(max_len,), name='input_token', dtype='int32')
input_masks_in = Input(shape=(max_len,), name='masked_token', dtype='int32')
embedding_layer = transformer_model(input_ids_in, attention_mask=input_masks_in)[0]
X = LSTM(50, return_sequences=True)(embedding_layer)
X = GlobalMaxPool1D()(X)
X = Dense(50, activation='relu')(X)
X = Dropout(0.2)(X)
X = Dense(num_labels, activation='softmax')(X)
model = Model(inputs=[input_ids_in, input_masks_in], outputs = X)
for layer in model.layers[:3]:
layer.trainable = False
model.compile(loss='categorical_crossentropy', optimizer='adam')
train_gen = data_generator(sentences, tags, tag2ix, max_len, number_sent_per_batch)
model.fit(train_gen, epochs=1, steps_per_epoch=steps, verbose=1)
The generator function is this :
# data generator, intended to be used in a call to model.fit_generator()
def data_generator(sentences, tags, tag2ix, max_len, num_samples_per_batch):
X1, X2, y = list(), list(), list()
n=0
while 1:
for i in range(len(sentences)):
n += 1
encoded_dict = tokenizer.encode_plus(
sentences[i], # Sentence to encode.
add_special_tokens = True, # Add '[CLS]' and '[SEP]'
max_length = max_len, # Pad & truncate all sentences.
pad_to_max_length = True,
return_attention_mask = True, # Construct attn. masks.
#return_tensors = 'tf',
truncation=True
)
for idx in encoded_dict['input_ids']:
#idx = [encoded_dict['input_ids'][j]]
# Add the encoded sentence to the list.
idx = pad_sequences([[idx]], maxlen=max_len, padding='post')[0]
X1.append(idx)
for att_mask in encoded_dict['attention_mask']:
#att_mask = [encoded_dict['attention_mask'][j]]
# And its attention mask (simply differentiates padding from non-padding).
att_mask = pad_sequences([[att_mask]], maxlen=max_len, padding='post')[0]
X2.append(att_mask)
for k in tags[i]:
out = to_categorical([tag2ix[k]], num_classes=num_labels)[0]
y.append(out)
if n == num_samples_per_batch:
yield [[array(X1), array(X2)], array(y)]
X1, X2, y = list(), list(), list()
n=0
The error I am getting is this :
--------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-64-a19b18bb0230> in <module>()
----> 1 model.fit(train_gen, epochs=1, steps_per_epoch=steps, verbose=1)
2 #, validation_data=val_gen, validation_steps=val_steps
10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
966 except Exception as e: # pylint:disable=broad-except
967 if hasattr(e, "ag_error_metadata"):
--> 968 raise e.ag_error_metadata.to_exception(e)
969 else:
970 raise
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:951 run **
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:541 train_step **
self.trainable_variables)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:1804 _minimize
trainable_variables))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:521 _aggregate_gradients
filtered_grads_and_vars = _filter_grads(grads_and_vars)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:1219 _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['lstm_2/lstm_cell_2/kernel:0', 'lstm_2/lstm_cell_2/recurrent_kernel:0', 'lstm_2/lstm_cell_2/bias:0', 'dense_8/kernel:0', 'dense_8/bias:0', 'dense_9/kernel:0', 'dense_9/bias:0'].
I have gone through many links like :
https://github.com/tensorflow/tensorflow/issues/1511
https://github.com/tensorflow/tensorflow/issues/27949
and many more.
There are many solutions provided in these github issues but couldn't find the solution of my error. I think mine is an error in the code not in the tensorflow library. That's why I don't know where I am getting wrong. Please help. Thanks in advance.
And I also want to know what is the reason of this error, as I have seen people getting same error on different problems (see above links). I am a beginner in tensorflow, so if you know this please let me know.

During creating VAE model throws exception "you should implement a `call` method."

I want to create VAE(variational autoencoder). During model creating it throws exception.
When subclassing the Model class, you should implement a call method.
I am using Tensorflow 2.0
def vae():
models ={}
def apply_bn_and_dropout(x):
return l.Dropout(dropout_rate)(l.BatchNormalization()(x))
input_image = l.Input(batch_shape=(batch_size,28,28,1))
x = l.Flatten()(input_image)
x = l.Dense(256,activation="relu")(x)
x = apply_bn_and_dropout(x)
x = l.Dense(128,activation="relu")(x)
x = apply_bn_and_dropout(x)
z_mean = l.Dense(latent_dim)(x)
z_log_var = l.Dense(latent_dim)(x)
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(batch_size,latent_dim),mean=0., stddev=1.0)
return z_mean + K.exp(z_log_var/2) * epsilon
lambda_layer = l.Lambda(sampling,output_shape=(latent_dim,))([z_mean,z_log_var])
models["encoder"] = Model(input_image,lambda_layer,"Encoder")
models["z_meaner"] = Model(input_image,z_mean,"Enc_z_mean")
models["z_lvarer"] = Model(input_image, z_log_var,"Enc_z_log_var")
z = l.Input(shape=(latent_dim,))
x = l.Dense(128)(z)
x = l.LeakyReLU()(x)
x = apply_bn_and_dropout(x)
x = l.Dense(256)(x)
x = l.LeakyReLU()(x)
x = apply_bn_and_dropout(x)
x = l.Dense(28*28,activation="sigmoid")(x)
decoded = l.Reshape((28,28,1))(x)
models["decoder"] = Model(z,decoded,name="Decoder")
models["vae"] = Model(input_image, models["decoder"](models["encoder"](input_image)), name="VAE")
def vae_loss(x,decoded):
x = K.reshape(x,shape=(batch_size,28*28))
decoded = K.reshape(decoded,shape=(batch_size,28*28))
xent_loss = 28*28*binary_crossentropy(x, decoded)
kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
return (xent_loss + kl_loss)/2/28/28
return models, vae_loss
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-34-186b31069dc3> in <module>
----> 1 models, vae_loss = vae()
2 vae = models["vae"]
<ipython-input-33-0fa06b39e41c> in vae()
36
37 models["decoder"] = Model(z,decoded,name="Decoder")
---> 38 models["vae"] = Model(input_image, models["decoder"](models["encoder"](input_image)), name="VAE")
39
40 def vae_loss(x,decoded):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs)
610 base_layer_utils.AutoAddUpdates(self,
611 inputs)) as auto_updater:
--> 612 outputs = self.call(inputs, *args, **kwargs)
613 auto_updater.set_outputs(outputs)
614
~\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\keras\engine\network.py in call(self, inputs, training, mask)
865 """
866 if not self._is_graph_network:
--> 867 raise NotImplementedError('When subclassing the `Model` class, you should'
868 ' implement a `call` method.')
869
NotImplementedError: When subclassing the `Model` class, you should implement a `call` method.
Models with names
def create_dense_ae():
encoding_dim = 64
input_img = layers.Input(shape=(28, 28, 1))
flat_img = layers.Flatten()(input_img)
encoded = layers.Dense(encoding_dim, activation='relu')(flat_img)
input_encoded = layers.Input(shape=(encoding_dim,))
flat_decoded = layers.Dense(28*28, activation='sigmoid')(input_encoded)
decoded = layers.Reshape((28, 28, 1))(flat_decoded)
encoder = tf.keras.Model(input_img, encoded, name="encoder")
decoder = tf.keras.Model(input_encoded, decoded, name="decoder")
autoencoder = tf.keras.Model(input_img, decoder(encoder(input_img)), name="autoencoder")
return encoder, decoder, autoencoder
I want to get model.

The problem is here:
models["encoder"] = Model(input_image,lambda_layer,"Encoder")
models["z_meaner"] = Model(input_image,z_mean,"Enc_z_mean")
models["z_lvarer"] = Model(input_image, z_log_var,"Enc_z_log_var")
You are passing three arguments to the construction, where only two are needed (inputs and outputs). Models do not have names. The problem is that three parameters will break the detection of network or sub-classed model as shown in the keras source code.
So just replace the code with:
models["encoder"] = Model(input_image,lambda_layer)
models["z_meaner"] = Model(input_image,z_mean)
models["z_lvarer"] = Model(input_image, z_log_var)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Error in using model.fit in keras with custom train_step - python

Related

Layer "triplet_snn" expects 3 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor: shape=(1, 224, 224, 3)

Torch Geometric - RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1024)

"Got OperatorNotAllowedInGraphError: iterating over tf.Tensor" while not obviously iterating over tensor

'ValueError : No gradients provided for any variable' in Tensorflow 2.2.0

During creating VAE model throws exception "you should implement a `call` method."

Categories

Resources