My code
import os
import math
import random
import librosa
import warnings
import numpy as np
import pandas as pd
from tqdm import tqdm
from glob import glob
import tensorflow as tf
from tensorflow import keras
from functools import partial
import IPython.display as ipd
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from tensorflow.keras import mixed_precision
music_files = glob("gtzan/genres_original/jazz/*.wav")
def sinusoidal_embedding(x):
embedding_min_frequency = 1.0
embedding_max_frequency = 1000.0
embedding_dims = 32
frequencies = tf.exp(
tf.linspace(
tf.math.log(embedding_min_frequency),
tf.math.log(embedding_max_frequency),
embedding_dims // 2,
)
)
angular_speeds = 2.0 * math.pi * frequencies
embeddings = tf.concat(
[tf.sin(angular_speeds * x), tf.cos(angular_speeds * x)], axis=3
)
return embeddings
def ResidualBlock(width):
def apply(x):
input_width = x.shape[3]
if input_width == width:
residual = x
else:
residual = layers.Conv2D(width, kernel_size=1)(x)
x = layers.BatchNormalization(center=False, scale=False)(x)
x = layers.Conv2D(
width, kernel_size=3, padding="same", activation=keras.activations.swish
)(x)
x = layers.Conv2D(width, kernel_size=3, padding="same")(x)
x = layers.Add()([x, residual])
return x
return apply
def DownBlock(width, block_depth):
def apply(x):
x, skips = x
for _ in range(block_depth):
x = ResidualBlock(width)(x)
skips.append(x)
x = layers.AveragePooling2D(pool_size=2)(x)
return x
return apply
def UpBlock(width, block_depth, attention=False):
def apply(x):
x, skips = x
x = layers.UpSampling2D(size=2, interpolation="bilinear")(x)
for _ in range(block_depth):
skip = skips.pop()
x = layers.Concatenate()([x, skip] if not attention else [
x, skip, layers.MultiHeadAttention(
num_heads=4, key_dim=1, attention_axes=(1,2)
)(x, skip)
])
x = ResidualBlock(width)(x)
return x
return apply
def get_network(widths, block_depth, dim1=256, dim2=128, channels=1, attention=False):
noisy_input = keras.Input(shape=(dim1, dim2, channels))
noise_variances = keras.Input(shape=(1, 1, 1))
upsample_shape = (dim1, dim2)
e = layers.Lambda(sinusoidal_embedding)(noise_variances)
e = layers.UpSampling2D(size=upsample_shape, interpolation="nearest")(e)
x = layers.Conv2D(widths[0], kernel_size=1)(noisy_input)
x = layers.Concatenate()([x, e])
skips = []
for width in widths[:-1]:
x = DownBlock(width, block_depth)([x, skips])
for _ in range(block_depth):
x = ResidualBlock(widths[-1])(x)
for idx, width in enumerate(reversed(widths[:-1])):
x = UpBlock(width, block_depth, attention=attention and idx ==0)([x, skips])
x = layers.Conv2D(channels, kernel_size=1, kernel_initializer="zeros")(x)
return keras.Model([noisy_input, noise_variances], x, name="residual_unet")
min_signal_rate = 0.02
max_signal_rate = 0.95
ema = 0.999
def spectral_norm(pred, real):
norm_real = tf.norm(real, axis=(1,2)) + 1e-6
norm_pred = tf.norm(pred, axis=(1,2)) + 1e-6
return tf.reduce_mean(tf.abs(norm_real - norm_pred) / norm_real)
def time_derivative(pred, real, window=1):
real_derivative = real[:, :-window, :, :] - real[:, window:, :, :]
pred_derivative = pred[:, :-window, :, :] - pred[:, window:, :, :]
return tf.reduce_mean(tf.keras.losses.MSE(real_derivative, pred_derivative))
class DDIM(keras.Model):
def __init__(self, widths, block_depth, attention=False, dim1=256, dim2=128):
super().__init__()
self.normalizer = layers.Normalization(axis=(2,3))
self.network = get_network(widths, block_depth, attention=attention, dim1=dim1, dim2=dim2)
self.ema_network = keras.models.clone_model(self.network)
self.spec_mod = 0
self.dx_mod = 0
def compile(self, **kwargs):
super().compile(**kwargs)
self.noise_loss_tracker = keras.metrics.Mean(name="n_loss")
self.data_loss_tracker = keras.metrics.Mean(name="d_loss")
self.noise_spec_tracker = keras.metrics.Mean(name="n_spec")
self.data_spec_tracker = keras.metrics.Mean(name="d_spec")
self.noise_dx_tracker = keras.metrics.Mean(name="n_dx")
self.data_dx_tracker = keras.metrics.Mean(name="d_dx")
self.noise_total_tracker = keras.metrics.Mean(name="n_total")
self.data_total_tracker = keras.metrics.Mean(name="d_total")
#property
def metrics(self):
return [
self.noise_loss_tracker,
self.data_loss_tracker,
self.noise_spec_tracker,
self.data_spec_tracker,
self.noise_dx_tracker,
self.data_dx_tracker,
self.noise_total_tracker,
self.data_total_tracker
]
def update_trackers(self, n_l, n_s, n_d, d_l, d_s, d_d):
n_t = n_l + n_s + n_d
d_t = d_l + d_s + d_d
for loss, tracker in zip([n_l, n_s, n_d, n_t, d_l, d_s, d_d, d_t],
[self.noise_loss_tracker, self.noise_spec_tracker, self.noise_dx_tracker, self.noise_total_tracker,
self.data_loss_tracker, self.data_spec_tracker, self.data_dx_tracker, self.data_total_tracker]):
tracker.update_state(loss)
def get_losses(self, y_true, y_pred):
return (tf.reduce_mean(self.loss(y_pred, y_true)), spectral_norm(y_pred, y_true), time_derivative(y_pred, y_true))
def denormalize(self, data):
data = self.normalizer.mean + data * self.normalizer.variance**0.5
return tf.clip_by_value(data, -128.0, 128.0)
def diffusion_schedule(self, diffusion_times):
start_angle = tf.acos(max_signal_rate)
end_angle = tf.acos(min_signal_rate)
diffusion_angles = start_angle + diffusion_times * (end_angle - start_angle)
signal_rates = tf.cos(diffusion_angles)
noise_rates = tf.sin(diffusion_angles)
return noise_rates, signal_rates
def denoise(self, noisy_data, noise_rates, signal_rates, training):
if training:
network = self.network
else:
network = self.ema_network
pred_noises = network([noisy_data, noise_rates**2], training=training)
pred_data = (noisy_data - noise_rates * pred_noises) / signal_rates
return pred_noises, pred_data
def reverse_diffusion(self, initial_noise, diffusion_steps):
num_examples = tf.shape(initial_noise)[0]
step_size = 1.0 / diffusion_steps
next_noisy_data = initial_noise
for step in tqdm(range(diffusion_steps)):
noisy_data = next_noisy_data
diffusion_times = tf.ones((num_examples, 1, 1, 1)) - step * step_size
noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
pred_noises, pred_data = self.denoise(
noisy_data, noise_rates, signal_rates, training=False
)
next_diffusion_times = diffusion_times - step_size
next_noise_rates, next_signal_rates = self.diffusion_schedule(
next_diffusion_times
)
next_noisy_data = (
next_signal_rates * pred_data + next_noise_rates * pred_noises
)
return pred_data
def generate(self, num_examples, shape, diffusion_steps):
initial_noise = tf.random.normal(shape=(num_examples, shape[0], shape[1], shape[2]))
generated_data = self.reverse_diffusion(initial_noise, diffusion_steps)
generated_data = self.denormalize(generated_data)
return generated_data
def train_step(self, data):
batch_size = tf.shape(data)[0]
data = self.normalizer(data, training=True)
noises = tf.random.normal(shape=tf.shape(data))
diffusion_times = tf.random.uniform(
shape=(batch_size, 1, 1, 1), minval=0.0, maxval=1.0
)
noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
noise_rates = noise_rates
signal_rates = signal_rates
noisy_data = signal_rates * data + noise_rates * noises
with tf.GradientTape() as tape:
# train the network to separate noisy data to their components
pred_noises, pred_data = self.denoise(
noisy_data, noise_rates, signal_rates, training=True
)
noise_loss, noise_spec, noise_dx = self.get_losses(noises, pred_noises) #safe_reduce_mean(self.loss(noises, pred_noises)) # used for training
total_noise_loss = tf.reduce_sum([
noise_loss,
self.spec_mod*noise_spec,
self.dx_mod*noise_dx
])
data_loss, data_spec, data_dx = self.get_losses(data, pred_data) #safe_reduce_mean(self.loss(data, pred_data)) # only used as metric
gradients = tape.gradient(noise_loss, self.network.trainable_weights)
self.optimizer.apply_gradients(zip(gradients, self.network.trainable_weights))
self.update_trackers(
noise_loss, noise_spec, noise_dx,
data_loss, data_spec, data_dx
)
for weight, ema_weight in zip(self.network.weights, self.ema_network.weights):
ema_weight.assign(ema * ema_weight + (1 - ema) * weight)
return {m.name: m.result() for m in self.metrics}
def test_step(self, data):
batch_size = tf.shape(data)[0]
data = self.normalizer(data, training=False)
noises = tf.random.normal(shape=tf.shape(data))
diffusion_times = tf.random.uniform(
shape=(batch_size, 1, 1, 1), minval=0.0, maxval=1.0
)
noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
noisy_data = signal_rates * data + noise_rates * noises
pred_noises, pred_data = self.denoise(
noisy_data, noise_rates, signal_rates, training=False
)
noise_loss = self.loss(noises, pred_noises)
data_loss = self.loss(data, pred_data)
self.data_loss_tracker.update_state(data_loss)
self.noise_loss_tracker.update_state(noise_loss)
return {m.name: m.result() for m in self.metrics}
def load_at_interval(x, rate=10_000, feats=256, duration=3.3):
file = x[0].numpy().decode()
idx = x[1].numpy()
audio, sr = librosa.load(file, duration=duration, sr=rate, offset=idx)
audio_fill = np.zeros(int(rate*duration), dtype=np.float32)
audio_fill[:len(audio)] = audio
spec = tf.signal.mdct(audio_fill, feats)
return spec
def load_audio(x,y, rate=10_000, mdct_feats=256, duration=3.3):
out = tf.py_function(lambda x,y: load_at_interval((x,y), rate=rate, feats=mdct_feats, duration=duration), inp=[x,y], Tout=tf.float32)
return out
def get_files_dataset(
glob_location,
total_seconds=2,
out_len = 3.3,
hop_size=1,
max_feats = 2048,
batch_size=4,
shuffer_size=1000,
scale=1,
rate=10_000,
mdct_feats=256
):
files = glob(
glob_location,
recursive=True
)
def file_list_generator():
for _ in range(total_seconds):
for file in files:
yield file, _*hop_size
load_fn = partial(load_audio, duration=out_len, rate=rate, mdct_feats=mdct_feats)
dg =tf.data.Dataset.from_generator(file_list_generator, output_signature = (
tf.TensorSpec(shape=(), dtype=tf.string),
tf.TensorSpec(shape=(), dtype=tf.int32))).shuffle(shuffer_size).map(
load_fn, num_parallel_calls=tf.data.AUTOTUNE
).map(
lambda x: tf.expand_dims(x, -1)[:max_feats, :, :]*scale
).map(
lambda x: tf.ensure_shape(x, (max_feats, mdct_feats//2, 1))
).batch(batch_size).prefetch(tf.data.AUTOTUNE)
return dg
dataset = get_files_dataset(
"gtzan/genres_original/jazz/*.wav",
out_len=3.3,
max_feats=256,
total_seconds=26,
scale=1,
batch_size=16
)
for test_batch in dataset.take(1):
shape = test_batch.shape
print(shape)
num_total_examples = (len(music_files) * 26) // shape[0]
model = DDIM(widths = [128, 128, 128, 128], block_depth = 2,
attention=True, dim1=shape[1], dim2=shape[2])
model.normalizer.adapt(dataset, steps=10)
model.compile(
loss=tf.keras.losses.MSE,
optimizer= tfa.optimizers.AdamW(
learning_rate = 3e-4,
weight_decay = 1e-4
)
)
dataset = dataset.cache()
history = model.fit(dataset.repeat(), steps_per_epoch=num_total_examples, epochs=1)
model.save_weights('jazz_gan.h5')
model.save('jazz_gan',save_format='tf')
model.spec_mod = 1
model.dx_mod = 1
history = model.fit(dataset.repeat(), steps_per_epoch=num_total_examples, epochs=100)
specs = model.generate(8, shape[1:], 1000)
for i in range(4):
plt.pcolormesh(np.log(np.abs(test_batch[i, :, :, 0].numpy().T)))
plt.colorbar()
plt.title(f"Real example {i+1}")
plt.show()
ipd.display(ipd.Audio(tf.signal.inverse_mdct(test_batch[i, :, :, 0]), rate=10_000))
dataset - https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification
Error
WARNING:tensorflow:From /usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/pyct/static_analysis/liveness.py:83: Analyzer.lamba_check (from tensorflow.python.autograph.pyct.static_analysis.liveness) is deprecated and will be removed after 2023-09-23.
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
(16, 256, 128, 1)
37/160 [=====>........................] - ETA: 7:24 - n_loss: 0.3691 - d_loss: 5.2594 - n_spec: 0.3038 - d_spec: 1.4172 - n_dx: 0.7294 - d_dx: 10.2440 - n_total: 1.4023 - d_total: 16.9206
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-4-f8f615ccb63e> in <module>
370 )
371 dataset = dataset.cache()
--> 372 history = model.fit(dataset.repeat(), steps_per_epoch=num_total_examples, epochs=1)
373
374 model.save_weights('jazz_gan.h5')
1 frames
/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
50 try:
51 ctx.ensure_initialized()
---> 52 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
53 inputs, attrs, num_outputs)
54 except core._NotOkStatusException as e:
InvalidArgumentError: Graph execution error:
2 root error(s) found.
(0) INVALID_ARGUMENT: ValueError: Tensor's shape (257, 256) is not compatible with supplied shape [256, 256].
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
return func(device, token, args)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/script_ops.py", line 147, in __call__
outputs = self._call(device, args)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/script_ops.py", line 154, in _call
ret = self._func(*args)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
return func(*args, **kwargs)
File "/tmp/__autograph_generated_file_0wo6uu2.py", line 10, in <lambda>
out = ag__.converted_call(ag__.ld(tf).py_function, (ag__.autograph_artifact((lambda x, y: ag__.converted_call(ag__.ld(load_at_interval), ((ag__.ld(x), ag__.ld(y)),), dict(rate=ag__.ld(rate), feats=ag__.ld(mdct_feats), duration=ag__.ld(duration)), fscope))),), dict(inp=[ag__.ld(x), ag__.ld(y)], Tout=ag__.ld(tf).float32), fscope)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 335, in converted_call
return _call_unconverted(f, args, kwargs, options, False)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 458, in _call_unconverted
return f(*args, **kwargs)
File "<ipython-input-4-f8f615ccb63e>", line 302, in load_at_interval
spec = tf.signal.mdct(audio_fill, feats)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py", line 1299, in set_shape
raise ValueError(f"Tensor's shape {self.shape} is not compatible "
ValueError: Tensor's shape (257, 256) is not compatible with supplied shape [256, 256].
[[{{node EagerPyFunc}}]]
[[IteratorGetNext]]
[[Shape/_4]]
(1) INVALID_ARGUMENT: ValueError: Tensor's shape (257, 256) is not compatible with supplied shape [256, 256].
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
return func(device, token, args)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/script_ops.py", line 147, in __call__
outputs = self._call(device, args)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/script_ops.py", line 154, in _call
ret = self._func(*args)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
return func(*args, **kwargs)
File "/tmp/__autograph_generated_file_0wo6uu2.py", line 10, in <lambda>
out = ag__.converted_call(ag__.ld(tf).py_function, (ag__.autograph_artifact((lambda x, y: ag__.converted_call(ag__.ld(load_at_interval), ((ag__.ld(x), ag__.ld(y)),), dict(rate=ag__.ld(rate), feats=ag__.ld(mdct_feats), duration=ag__.ld(duration)), fscope))),), dict(inp=[ag__.ld(x), ag__.ld(y)], Tout=ag__.ld(tf).float32), fscope)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 335, in converted_call
return _call_unconverted(f, args, kwargs, options, False)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 458, in _call_unconverted
return f(*args, **kwargs)
File "<ipython-input-4-f8f615ccb63e>", line 302, in load_at_interval
spec = tf.signal.mdct(audio_fill, feats)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py", line 1299, in set_shape
raise ValueError(f"Tensor's shape {self.shape} is not compatible "
ValueError: Tensor's shape (257, 256) is not compatible with supplied shape [256, 256].
[[{{node EagerPyFunc}}]]
[[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_35945]
This problem appeared after it started showing this error. A few days ago this was not the case
WARNING:tensorflow:From /usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/pyct/static_analysis/liveness.py:83: Analyzer.lamba_check (from tensorflow.python.autograph.pyct.static_analysis.liveness) is deprecated and will be removed after 2023-09-23.
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
I looked for a solution on many sites and did not find it
Related
I am studying by designing a model structure using Transformer encoder and decoder.
I trained the classification model as a result of the encoder and trained the generative model with the decoder result (the result of the encoder as an input).
Exports multiple results to output.
The following error occurred while learning:
I tracked the error using torch.autograd.set_detect_anomaly(True).
I saw an article about the same error on the PyTorch forum.
However, they were mostly using inplace operations such as += or x[:, 0]=0. So it was solved when I fixed.
But I didn't use any of these operations.
I tried to change unsqueeze() and squeeze() to view(), and also attach clone() to tensor maipulation. but error hasn't be fixed.
What is the problem?
model code
import torch
import torch.nn as nn
import random
from torch.nn.utils.rnn import pad_sequence
import math
from pytorch_pretrained_bert import BertTokenizer, BertForSequenceClassification, BertForQuestionAnswering
from tqdm import tqdm
import pandas as pd
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
class SelfAttention(nn.Module):
def __init__(self, embedding_dim, num_heads):
super(SelfAttention, self).__init__()
self.multihead_attn = nn.MultiheadAttention(embed_dim=embedding_dim, num_heads=num_heads,
batch_first=True)
def forward(self, x):
query = x
key = x
value = x
attn_output = self.multihead_attn(query, key, value, need_weights=False)
return attn_output
class Encoder(nn.Module):
def __init__(self, embedding_dim):
super(Encoder, self).__init__()
self.embedding_dim = embedding_dim
# self.pos_encoder = PositionalEncoding()
self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.embedding_dim, nhead=8, batch_first=True)
self.encoder = nn.TransformerEncoder(encoder_layer=self.encoder_layer, num_layers=6)
self.feedforward = nn.Linear(self.embedding_dim, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.encoder(x)
cls_out = torch.mean(out, dim=-2)
cls_out = self.feedforward(cls_out)
cls_out = self.sigmoid(cls_out)
return out, cls_out
class Decoder(nn.Module):
def __init__(self, embedding_dim):
super(Decoder, self).__init__()
# self.bert = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
self.embedding_dim = embedding_dim
self.decoder_layer = nn.TransformerDecoderLayer(d_model=self.embedding_dim, nhead=8, batch_first=True)
self.decoder = nn.TransformerDecoder(decoder_layer=self.decoder_layer, num_layers=6)
def forward(self, tgt, memory):
out = self.decoder(tgt, memory)
return out
class AlzhBERT(nn.Module):
def __init__(self, embedding_dim):
super(AlzhBERT, self).__init__()
self.embedding_dim = embedding_dim
self.max_sent_length = 7
self.token_level_attn = nn.ModuleList([SelfAttention(self.embedding_dim, num_heads=8) for _ in range(10)])
self.token_level_attn_single = SelfAttention(self.embedding_dim, num_heads=8)
self.sentence_level_attn = SelfAttention(self.embedding_dim, num_heads=8)
self.encoder = Encoder(embedding_dim=embedding_dim)
self.decoder = Decoder(embedding_dim=embedding_dim)
def forward(self, X_batch):
i = 0
enc_outs = {}
dec_outs = {}
for datastruct in X_batch:
enc_outs[i] = []
dec_outs[i] = []
j=0
for section in datastruct.sections:
print(i, " + ", j)
inv = section.inv.requires_grad_(True).to(device)
y_dec = section.next_uttr.requires_grad_(True).to(device)
par = section.par
# print(par)
try:
tmp = par.dim()
except AttributeError:
print(par)
print("attr err")
j = j+1
continue
# par = par.permute(1,0,2) # (seq_len, sent_len, embed) => 한 번에 self attention
# 여러개 self_attention
# for p in par:
result = self.token_level_attn_single(par.to(device).requires_grad_(True))[0]
res = torch.mean(result, dim=-2).unsqueeze(0)
res_sent = self.sentence_level_attn(res.to(device))[0]
context = torch.mean(res_sent, dim=-3)
inv_input = torch.mean(inv, dim=-2)
# x_enc = torch.concat((inv_input, context))
# x_enc = x_enc.view([1, -1, self.embedding_dim])
enc_out, cls_out = self.encoder(torch.concat([inv_input, context]).unsqueeze(0))
# y_dec = torch.mean(y_dec, dim=-2).to(device)
# enc_out = torch.mean(enc_out, dim=-2).unsqueeze(0).to(device)
dec_out = self.decoder(y_dec, enc_out.to(device))
enc_outs[i].append(cls_out)
dec_outs[i].append(dec_out)
j = j+1
enc_outs[i] = torch.tensor(enc_outs[i], requires_grad=True)
i = i + 1
return enc_outs, dec_outs
train code
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("device: ", device)
torch.autograd.set_detect_adnomaly(True)
def train_loop(dataloader, model, loss_fn, optimizer, epochs):
# dataloader = dataloader["train"]
size = len(dataloader.dataset)
writer = SummaryWriter()
enc_optimizer = optimizer[0]
dec_optimizer = optimizer[1]
for epoch in range(epochs):
enc_loss_hist = []
dec_loss_hist = []
accuracy = []
print("======== epoch ", epoch, "==========\n")
for i, (Xs, ys) in tqdm(enumerate(dataloader), desc="Train..."):
X_folds, y_folds = cross_validation(10, Xs, ys)
model.train()
for X, y in zip(X_folds['train'], y_folds['train']): # Xf는 DataStruct의 리스트임
# print("<Check Data>")
# print("X 0: ", X[0])
# print("label 0: ", y[0])
# Prediction and Loss
# X = batch_to_tensor(X)
# X = torch.tensor(X).to(device)
y = torch.tensor(y, dtype=torch.float32).to(device)
enc_preds, dec_preds = model(X)
for k in range(len(X)):
for t in range(len(enc_preds[k])):
enc_loss = loss_fn(y[k].to(device), enc_preds[k][t].to(device)).requires_grad_(True)
dec_loss = loss_fn(X[k].sections[t].next_uttr.to(device), dec_preds[k][t].to(device)).requires_grad_(True)
cls_out = torch.tensor(1 if enc_preds[k][t] >= 0.5 else 0)
cls_loss = torch.sum(cls_out == y[k])
accuracy.append(cls_loss)
# Backpropagation
enc_optimizer.zero_grad()
dec_optimizer.zero_grad()
enc_loss.backward(retain_graph=True)
enc_optimizer.step()
dec_loss.backward()
dec_optimizer.step()
enc_loss_hist.append(enc_loss)
dec_loss_hist.append(dec_loss)
cross_validation_loop(X_folds["valid"], y_folds["valid"], model, loss_fn, epoch)
enc_loss_save = torch.mean(torch.tensor(enc_loss_hist))
dec_loss_save = torch.mean(torch.tensor(dec_loss_hist))
accuracy_save = torch.mean(torch.tensor(accuracy, dtype=torch.float32))
writer.add_scalar("Avg Enc Loss/train", enc_loss_save, epoch)
writer.add_scalar("Avg Dec Loss/train", dec_loss_save, epoch)
writer.add_scalar("Avg Accuracy/train", accuracy_save)
if device == "cuda":
saved_model_dir = "/home/juny/AlzheimerModel/checkpoint"
else:
saved_model_dir = "./saved_model"
now = datetime.now()
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': enc_optimizer.state_dict(),
'loss': [enc_loss_save, dec_loss_save],
}, os.path.join('/home/juny/AlzheimerModel/checkpoint',
now.strftime("%Y-%m-%d-%H-%M") + "-e" + str(epoch) + ".pt"))
torch.save(model.state_dict(), os.path.join(saved_model_dir, "saved_model" + now.strftime("%Y-%m-%d-%H-%M") + ".pt"))
encloss, decloss, current = enc_loss_save, dec_loss_save.item(), i * len(X)
print(f"enc loss: {encloss:>7f} dec loss: {decloss:>7f} [{current:>5d}/{size:>5d}")
writer.flush()
writer.close()
error
C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\autograd\__init__.py:173: UserWarning: Error detected in NativeLayerNormBackward0. Traceback of forward call that caused the error:
File "C:/Users/usr/PycharmProjects/project/train.py", line 265, in <module>
train_loop(dataloader=train_dataloader, model=model, loss_fn=loss_fn, optimizer=(enc_optimizer, dec_optimizer), epochs=epochs)
File "C:/Users/usr/PycharmProjects/project/train.py", line 47, in train_loop
enc_preds, dec_preds = model(X)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\PycharmProjects\project\AlzhBERT.py", line 139, in forward
dec_out = self.decoder(y_dec, enc_out.to(device))
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\PycharmProjects\project\AlzhBERT.py", line 84, in forward
out = self.decoder(tgt, memory)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\transformer.py", line 291, in forward
output = mod(output, memory, tgt_mask=tgt_mask,
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\transformer.py", line 578, in forward
x = self.norm3(x + self._ff_block(x))
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\normalization.py", line 189, in forward
return F.layer_norm(
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\functional.py", line 2503, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
(Triggered internally at C:\cb\pytorch_1000000000000\work\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
Train...: 0it [00:05, ?it/s]
Traceback (most recent call last):
File "C:/Users/usr/PycharmProjects/project/train.py", line 265, in <module>
train_loop(dataloader=train_dataloader, model=model, loss_fn=loss_fn, optimizer=(enc_optimizer, dec_optimizer), epochs=epochs)
File "C:/Users/usr/PycharmProjects/project/train.py", line 65, in train_loop
loss.backward(retain_graph=True)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\_tensor.py", line 396, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\autograd\__init__.py", line 173, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [768]] is at version 1; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
Process finished with exit code 1
I am trying to run a Resnet model through Skorch for classification which I found in a research paper. I am still learning the ways of Torch and Skorch, and I'm unable to find what to fix to get this to work.
ResNet class:
class ResNet(nn.Module):
def __init__(
self,
*,
d_numerical: int,
categories: ty.Optional[ty.List[int]],
d_embedding: int,
d: int,
d_hidden_factor: float,
n_layers: int,
activation: str,
normalization: str,
hidden_dropout: float,
residual_dropout: float,
d_out: int,
regression: bool,
categorical_indicator
) -> None:
super().__init__()
#categories = None #TODO
def make_normalization():
return {'batchnorm': nn.BatchNorm1d, 'layernorm': nn.LayerNorm}[
normalization[0]
](d)
self.categorical_indicator = categorical_indicator #Added
self.regression = regression
self.main_activation = deep.get_activation_fn(activation)
self.last_activation = deep.get_nonglu_activation_fn(activation)
self.residual_dropout = residual_dropout
self.hidden_dropout = hidden_dropout
d_in = d_numerical
d_hidden = int(d * d_hidden_factor)
if categories is not None:
d_in += len(categories) * d_embedding
category_offsets = torch.tensor([0] + categories[:-1]).cumsum(0)
self.register_buffer('category_offsets', category_offsets)
self.category_embeddings = nn.Embedding(int(sum(categories)), d_embedding)
nn.init.kaiming_uniform_(self.category_embeddings.weight, a=math.sqrt(5))
print(f'{self.category_embeddings.weight.shape}')
self.first_layer = nn.Linear(d_in, d) # 1, 256
self.layers = nn.ModuleList(
[
nn.ModuleDict(
{
'norm': make_normalization(),
'linear0': nn.Linear(
d, d_hidden * (2 if activation.endswith('glu') else 1)
),
'linear1': nn.Linear(d_hidden, d),
}
)
for _ in range(n_layers)
]
)
self.last_normalization = make_normalization()
self.head = nn.Linear(d, d_out) # 256, 1
def forward(self, x) -> Tensor:
if not self.categorical_indicator is None:
x_num = x[:, ~self.categorical_indicator].float()
x_cat = x[:, self.categorical_indicator].long() #TODO
else:
x_num = x
x_cat = None
x = []
if x_num is not None:
x.append(x_num)
if x_cat is not None:
x.append(
self.category_embeddings(x_cat + self.category_offsets[None]).view(
x_cat.size(0), -1
)
)
x = torch.cat(x, dim=-1)
x = self.first_layer(x)
for layer in self.layers:
layer = ty.cast(ty.Dict[str, nn.Module], layer)
z = x
z = layer['norm'](z)
z = layer['linear0'](z)
z = self.main_activation(z)
if self.hidden_dropout:
z = F.dropout(z, self.hidden_dropout, self.training)
z = layer['linear1'](z)
if self.residual_dropout:
z = F.dropout(z, self.residual_dropout, self.training)
x = x + z
x = self.last_normalization(x)
x = self.last_activation(x)
x = self.head(x)
if not self.regression:
x = x.squeeze(-1)
return x
class InputShapeSetterResnet(skorch.callbacks.Callback):
def __init__(self, regression=False, batch_size=None,
categorical_indicator=None):
self.categorical_indicator = categorical_indicator
self.regression = regression
self.batch_size = batch_size
def on_train_begin(self, net, X, y):
print("categorical_indicator", self.categorical_indicator)
if self.categorical_indicator is None:
d_numerical = X.shape[1]
categories = None
else:
d_numerical = X.shape[1] - sum(self.categorical_indicator)
# categories = list((X[:, self.categorical_indicator].max(0) + 1).astype(int))
categories = [sum(self.categorical_indicator)]
net.set_params(module__d_numerical=d_numerical,
module__categories=categories, #FIXME #lib.get_categories(X_cat),
module__d_out=2 if self.regression == False else 1) #FIXME#D.info['n_classes'] if D.is_multiclass else 1,
print("Numerical features: {}".format(d_numerical))
print("Categories {}".format(categories))
Skorch Wrapper:
def create_resnet_skorch(id, wandb_run=None, use_checkpoints=True,
categorical_indicator=None, **kwargs):
print(kwargs)
if "verbose" not in kwargs:
verbose = 0
else:
verbose = kwargs.pop("verbose")
if "lr_scheduler" not in kwargs:
lr_scheduler = False
else:
lr_scheduler = kwargs.pop("lr_scheduler")
if "es_patience" not in kwargs.keys():
es_patience = 40
else:
es_patience = kwargs.pop('es_patience')
if "lr_patience" not in kwargs.keys():
lr_patience = 30
else:
lr_patience = kwargs.pop('lr_patience')
optimizer = kwargs.pop('optimizer')
if optimizer == "adam":
optimizer = Adam
elif optimizer == "adamw":
optimizer = AdamW
elif optimizer == "sgd":
optimizer = SGD
device = kwargs.pop('device')
if device == "cuda": # ! only for CPU training, is cuda by default
device = "cpu"
batch_size = kwargs.pop('batch_size')
callbacks = [InputShapeSetterResnet(categorical_indicator=categorical_indicator),
EarlyStopping(monitor="valid_loss",
patience=es_patience)]
callbacks.append(EpochScoring(scoring='accuracy', name='train_accuracy', on_train=True))
if lr_scheduler:
callbacks.append(LRScheduler(policy=ReduceLROnPlateau, patience=lr_patience, min_lr=2e-5,
factor=0.2)) # FIXME make customizable
if use_checkpoints:
callbacks.append(Checkpoint(dirname="skorch_cp", f_params=r"params_{}.pt".format(id), f_optimizer=None,
f_criterion=None))
if not wandb_run is None:
callbacks.append(WandbLogger(wandb_run, save_model=False))
callbacks.append(LearningRateLogger())
if not categorical_indicator is None:
categorical_indicator = torch.BoolTensor(categorical_indicator)
mlp_skorch = NeuralNetClassifier(
ResNet,
# Shuffle training data on each epoch
criterion=torch.nn.CrossEntropyLoss,
optimizer=optimizer,
batch_size=max(batch_size, 1), # if batch size is float, it will be reset during fit
iterator_train__shuffle=True,
module__d_numerical=1, # will be change when fitted
module__categories=None, # will be change when fitted
module__d_out=1, # idem
module__regression=False,
module__categorical_indicator=categorical_indicator,
verbose=verbose,
callbacks=callbacks,
**kwargs
)
return mlp_skorch
Skorch Model:
<class 'skorch.classifier.NeuralNetClassifier'>[uninitialized](
module=<class 'tabular.bin.resnet.ResNet'>,
module__activation=reglu,
module__categorical_indicator=tensor([False, True, False, False, False, False, False, False]),
module__categories=None,
module__d=256,
module__d_embedding=128,
module__d_hidden_factor=2,
module__d_numerical=1,
module__d_out=1,
module__hidden_dropout=0.2,
module__n_layers=8,
module__normalization=['batchnorm'],
module__regression=False,
module__residual_dropout=0.2,
)
I have 8 columns in X for training, 1 of which is a categorical column which is to be embedding through an embedding layer in the NN. From what I've found so far, that is the root of this error since it's coming across this categorical class in execution. But in the forward method, it's supposed to have an embedding layer for the same. Any idea what changes I might need to make for the same?
Error stack:
Traceback (most recent call last):
File "/test.py", line 639, in <module>
model.fit(X_train, y_train)
File "/anaconda3/lib/python3.9/site-packages/skorch/classifier.py", line 142, in fit
return super(NeuralNetClassifier, self).fit(X, y, **fit_params)
File "/anaconda3/lib/python3.9/site-packages/skorch/net.py", line 917, in fit
self.partial_fit(X, y, **fit_params)
File "/anaconda3/lib/python3.9/site-packages/skorch/net.py", line 876, in partial_fit
self.fit_loop(X, y, **fit_params)
File "/anaconda3/lib/python3.9/site-packages/skorch/net.py", line 789, in fit_loop
self.run_single_epoch(dataset_train, training=True, prefix="train",
File "/anaconda3/lib/python3.9/site-packages/skorch/net.py", line 822, in run_single_epoch
for data in self.get_iterator(dataset, training=training):
File "/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 521, in __next__
data = self._next_data()
File "/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 561, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
return self.collate_fn(data)
File "/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 84, in default_collate
return [default_collate(samples) for samples in transposed]
File "/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 84, in <listcomp>
return [default_collate(samples) for samples in transposed]
File "/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 74, in default_collate
return {key: default_collate([d[key] for d in batch]) for key in elem}
File "/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 74, in <dictcomp>
return {key: default_collate([d[key] for d in batch]) for key in elem}
File "/anaconda3/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 86, in default_collate
raise TypeError(default_collate_err_msg_format.format(elem_type))
TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'pandas.core.arrays.categorical.Categorical'>
I am new to Tensorflow, this is a project I found on GitHub, but it was written in Tensorflow 1.x fashion, so I rewrote it into Tensorflow 2.x style following some examples. And here's the error I got when training the model:
Traceback (most recent call last):
File "train.py", line 78, in <module>
train(model)
File "train.py", line 24, in train
model.optimize()
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 889, in __call__
result = self._call(*args, **kwds)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 933, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 763, in _initialize
self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3050, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3444, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3279, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 999, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 672, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3971, in bound_method_wrapper
return wrapped_fn(*args, **kwargs)
File "/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 986, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:
/root/autodl-tmp/HyperBox-main/script/model_v2/box_model.py:328 optimize *
self.create_optimizer()
/root/autodl-tmp/HyperBox-main/script/model_v2/box_model.py:139 create_optimizer *
self.optimizer = tf.keras.optimizers.Adam(
/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:530 minimize **
return self.apply_gradients(grads_and_vars, name=name)
/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:630 apply_gradients
grads_and_vars = optimizer_utils.filter_empty_gradients(grads_and_vars)
/root/miniconda3/lib/python3.8/site-packages/tensorflow/python/keras/optimizer_v2/utils.py:75 filter_empty_gradients
raise ValueError("No gradients provided for any variable: %s." %
ValueError: No gradients provided for any variable: ['rel_bases:0', 'rel_shapes:0', 'rel_multiples:0', 'base_weight_ent_emb:0', 'bump_weight_ent_emb:0', 'ent_emb:0', 'ent_emb_bmp:0'].
I checked on the Internet, this error occurred basically because the calculation of loss has nothing to do with the trainable variables in 'var_list', so that the gradients couldn't be passed along. But the fact is, as I reviewed the code, loss is calculated directly or indirectly using all the trainable variables in var_list for sure. Read the code below from top to bottom, and you'll see how loss is calculated using all the trainable variables step by step:
class BoxE:
def __init__(self, params, corpus_type, work_dir):
self.params = params
self.alpha = params.alpha
self.num_rel = 1
self.bounded_norm = params.bounded_norm
self.normed_bumps = params.normed_bumps
self.fixed_width = params.fixed_width
self.hard_size = params.hard_size
self.total_size = params.total_size
self.learnable_shape = params.learnable_shape
self.corpus_type = corpus_type
self.cwd = work_dir
self.word_vectors = np.load(
f"{self.cwd}/../../word_vectors_processed/{self.corpus_type}_word_vectors_processed.npy"
)
def create_optimizer(self):
self.loss = -1 * tf.math.reduce_mean(
input_tensor=tf.math.log_sigmoid(self.params.gamma - self.pos_dissims)
) - tf.math.reduce_mean(
input_tensor=tf.math.log_sigmoid(self.neg_dissims - self.params.gamma)
)
pprint("type of loss:{}".format(type(self.loss)))
self.optimizer = tf.keras.optimizers.Adam(
self.params.learning_rate
).minimize(self.loss, self.var_list, tape=tf.GradientTape())
def create_train_model(self):
self.pos_h_points = tf.expand_dims(self.ph_base_emb + self.pt_bump_emb, 1)
self.pos_t_points = tf.expand_dims(self.pt_base_emb + self.ph_bump_emb, 1)
self.neg_h_points = tf.expand_dims(self.nh_base_emb + self.nt_bump_emb, 1)
self.neg_t_points = tf.expand_dims(self.nt_base_emb + self.nh_bump_emb, 1)
self.pos_points = tf.math.tanh(
tf.concat([self.pos_h_points, self.pos_t_points], 1)
)
self.neg_points = tf.math.tanh(
tf.concat([self.neg_h_points, self.neg_t_points], 1)
)
#### concat dimension is batch*2*100 ####
self.pos_dissims = self.distance_function(self.pos_points)
self.neg_dissims = self.distance_function(self.neg_points)
def gather_train_embeddings(self, ph:tf.int32, pt:tf.int32, nh:tf.int32, nt:tf.int32, r:tf.int32):
temp = tf.matmul(self.ent_emb, self.base_weight_ent_emb)
self.ph_base_emb = tf.gather(temp, ph)
self.pt_base_emb = tf.gather(temp, pt)
self.nh_base_emb = tf.gather(temp, nh)
self.nt_base_emb = tf.gather(temp, nt)
temp1 = tf.matmul(self.ent_emb_bmp, self.bump_weight_ent_emb)
if self.normed_bumps: # Normalization of bumps option
temp1 = tf.math.l2_normalize(temp1, axis=1)
self.ph_bump_emb = tf.gather(temp1, ph)
self.pt_bump_emb = tf.gather(temp1, pt)
self.nh_bump_emb = tf.gather(temp1, nh)
self.nt_bump_emb = tf.gather(temp1, nt)
self.rel_bases_emb = tf.math.tanh(tf.gather(self.rel_bases, r))
self.rel_deltas_emb = tf.math.tanh(tf.gather(self.rel_deltas, r))
#tf.function
def optimize(self):
for itr in range(0, self.params.max_iterate + 1):
total_loss = 0.0
for b in range(self.num_batch):
ph, pt, nh, nt, r = self.reader.next_batch()
self.gather_train_embeddings(ph, pt, nh, nt, r)
self.create_train_model()
self.create_optimizer()
total_loss += self.loss
if math.isnan(total_loss):
break
print("Loss in iteration", itr, "=", total_loss)
if itr % self.params.save_each == 0 and itr >= self.params.save_after:
self.save_model(itr)
print("done saving model")
def setup_weights(self):
sqrt_size = 6.0 / math.sqrt(self.params.emb_size)
self.ent_emb = tf.Variable(self.word_vectors, dtype=tf.float32, name="ent_emb")
self.ent_emb_bmp = tf.Variable(
self.word_vectors, dtype=tf.float32, name="ent_emb_bmp"
)
self.base_weight_ent_emb = tf.Variable(
name="base_weight_ent_emb",
initial_value=tf.random.uniform(
# shape=[300, self.params.emb_size], minval=-sqrt_size, maxval=sqrt_size
shape=[400, self.params.emb_size], minval=-sqrt_size, maxval=sqrt_size
),
)
self.bump_weight_ent_emb = tf.Variable(
name="bump_weight_ent_emb",
initial_value=tf.random.uniform(
# shape=[300, self.params.emb_size], minval=-sqrt_size, maxval=sqrt_size
shape=[400, self.params.emb_size], minval=-sqrt_size, maxval=sqrt_size
),
)
if self.learnable_shape: # If shape is learnable, define variables accordingly
self.rel_shapes = tf.Variable(
name="rel_shapes",
initial_value=tf.random.uniform(
shape=[self.num_rel, 2, self.params.emb_size],
minval=-sqrt_size,
maxval=sqrt_size,
),
)
self.norm_rel_shapes = self.product_normalise(
self.rel_shapes, self.bounded_norm
)
else:
self.norm_rel_shapes = tf.ones(
[self.num_rel, 2, self.params.emb_size], name="norm_rel_shapes"
)
self.rel_bases = tf.Variable(
name="rel_bases",
initial_value=tf.random.uniform(
shape=[self.num_rel, 2, self.params.emb_size],
minval=-sqrt_size,
maxval=sqrt_size,
),
)
if self.fixed_width:
self.rel_multiples1 = tf.zeros([self.num_rel, 2, 1])
else:
self.rel_multiples1 = tf.Variable(
name="rel_multiples",
initial_value=tf.random.uniform(
shape=[self.num_rel, 2, 1], minval=-sqrt_size, maxval=sqrt_size
),
)
if self.hard_size:
self.rel_multiples = self.total_size * tf.nn.softmax(
self.rel_multiples1, axis=0
)
else:
self.rel_multiples = tf.nn.elu(self.rel_multiples1) + tf.constant(1.0)
self.rel_deltas = tf.multiply(
self.rel_multiples, self.norm_rel_shapes, name="rel_deltas"
)
self.var_list = [
self.rel_bases,
self.rel_shapes,
self.rel_multiples1,
self.base_weight_ent_emb,
self.bump_weight_ent_emb,
self.ent_emb,
self.ent_emb_bmp,
]
def setup_reader(self):
self.reader = Reader(self.corpus_type)
self.reader.read_triples()
self.reader.set_batch_size(self.params.batch_size)
self.reader.set_neg_samples(self.params.no_neg_samples)
self.num_batch = self.reader.num_batch()
self.num_ent = self.reader.num_ent()
def product_normalise(self, input_tensor, bounded_norm=True):
step1_tensor = tf.abs(input_tensor)
step2_tensor = step1_tensor + (10 ** -8)
log_norm_tensor = tf.math.log(step2_tensor)
step3_tensor = tf.reduce_mean(input_tensor=log_norm_tensor, axis=2, keepdims=True)
norm_volume = tf.math.exp(step3_tensor)
pre_norm_out = input_tensor / norm_volume
if not bounded_norm:
return pre_norm_out
else:
minsize_tensor = tf.minimum(
tf.reduce_min(input_tensor=log_norm_tensor, axis=2, keepdims=True), -1
)
maxsize_tensor = tf.maximum(
tf.reduce_max(input_tensor=log_norm_tensor, axis=2, keepdims=True), 1
)
minsize_ratio = -1 / minsize_tensor
maxsize_ratio = 1 / maxsize_tensor
size_norm_ratio = tf.minimum(minsize_ratio, maxsize_ratio)
normed_tensor = log_norm_tensor * size_norm_ratio
return tf.exp(normed_tensor)
def distance_function(self, points):
self.rel_bx_low, self.rel_bx_high = self.compute_box(
self.rel_bases_emb, self.rel_deltas_emb
)
lower_corner = self.rel_bx_low
upper_corner = self.rel_bx_high
centres = 1 / 2 * (lower_corner + upper_corner)
widths = upper_corner - lower_corner
widths_p1 = widths + tf.constant(1.0)
width_cond = tf.compat.v1.where(
tf.logical_and(lower_corner <= points, points <= upper_corner),
tf.abs(points - centres) / widths_p1,
widths_p1 * tf.abs(points - centres)
- (widths / 2) * (widths_p1 - 1 / widths_p1),
)
distance = tf.norm(
tensor=width_cond, axis=2, ord=self.params.p_norm
) ###batch*2*1 after norm
distance = tf.reduce_sum(input_tensor=distance, axis=1)
return distance
def save_model(self, itr):
# filename = (
# f"{self.cwd}/BoxModel_"
# + self.corpus_type
# + "_weights/"
# + str(itr)
# + ".ckpt"
# )
filename = "all_trained_models"
if not os.path.exists(os.path.dirname(filename)):
os.makedirs(os.path.dirname(filename))
tf.saved_model.save(self, filename)
tf.keras.models.save_model(filename, save_format='tf')
def compute_box(self, box_base, box_delta):
box_second = box_base + tf.constant(0.5) * box_delta
box_first = box_base - tf.constant(0.5) * box_delta
box_low = tf.minimum(box_first, box_second, "box_low")
box_high = tf.maximum(box_first, box_second, "box_high")
return box_low, box_high
OS: Manjaro Linux x64
CUDA: 11.0.3
TF/Keras: 2.4(.1)
Py: 3.8
Hello,
I'm trying to build some kind of W-VAE-GAN. I keep un running into that very same error over and over again and I already had that problem with Keras 2.3, interestingly NOT using TF/K 2.2. Unfortunately I need to use Keras 2.4 because I'm supposed to run my code on our university server with these exact TF/K and CUDA-Versions. At this point I'm just trying to make sure my code works as intended.
The error I get is the following, with the exact lines commented in my code:
...
Epoch 1/20
Traceback (most recent call last):
File "/home/peer/Programmierkram/BA/Metal_GAN/wvaegan.py", line 282, in <module>
gan.fit(gen_trans, batch_size=batch_size, epochs=epochs, callbacks=[callback])
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 1100, in fit
tmp_logs = self.train_function(iterator)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 828, in __call__
result = self._call(*args, **kwds)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 871, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 725, in _initialize
self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 2969, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3361, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3196, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 990, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 634, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 977, in wrapper
raise e.ag_error_metadata.to_exception(e)
tensorflow.python.framework.errors_impl.OperatorNotAllowedInGraphError: in user code:
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/home/peer/Programmierkram/BA/Metal_GAN/wvaegan.py:190 train_step
z_mean, z_log_var, z = self.encoder(clip_img) # <------ WHY NO ERROR HERE?
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:505 __iter__
self._disallow_iteration()
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:498 _disallow_iteration
self._disallow_when_autograph_enabled("iterating over `tf.Tensor`")
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:474 _disallow_when_autograph_enabled
raise errors.OperatorNotAllowedInGraphError(
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
What I do not understand is how it comes to this error in the first place, as I successfully executed the VAE-part using TF2.2 without any error like this, and, more importantly, there is no iteration over any tensor obvious to me. Even if I commented out the for-loop in my train_step the same error occurs few lines later in the same context. I have also tried decorating the def train_step() with #tf.function, yet nothing changed.
The code I used is the following:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow.keras.backend as K
from keras.preprocessing.image import ImageDataGenerator
import itertools
import scipy.io
import matplotlib.pyplot as plt
import matplotlib.image as PIL
runOnGPU = 0
if runOnGPU==1:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
print(e)
else:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
path_clipped_train = os.path.join('./sinograms/clip')
path_transparent_train = os.path.join('./sinograms/transparent')
img_width, img_height = 512, 512
bottleneck = 1024 * 2
filters = (1024, 512, 256, 64)
filter_size = (3, 3, 3, 3)
batch_size = 4
epochs = 20
dsc_steps = 1
gp_w = 10.0
beta_v = 2
learning_rate = 125e-5
latent_dim = 2
input_shape = (1, img_width, img_height, 1)
dataset_gen1 = ImageDataGenerator(rescale=1 / 255, dtype="float32")
dataset_gen2 = ImageDataGenerator(rescale=1 / 255, dtype="float32")
gen_trans = dataset_gen1.flow_from_directory(path_transparent_train,
target_size=(img_width, img_height),
color_mode='grayscale',
classes=[''],
class_mode=None,
batch_size=batch_size,
shuffle=False,
)
gen_clip = dataset_gen2.flow_from_directory(path_clipped_train,
target_size=(img_width, img_height),
color_mode='grayscale',
classes=[''],
class_mode=None,
batch_size=batch_size,
shuffle=False,
)
class Sampling(layers.Layer):
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
def get_encoder():
encoder_inputs = keras.Input(shape=input_shape[1:], name="encoder_input")
enc = encoder_inputs
for (numFilters, szFilters) in zip(filters, filter_size):
enc = layers.Conv2D(numFilters, szFilters, activation='relu', strides=2, padding='same')(enc)
enc = layers.BatchNormalization()(enc)
enc = layers.Dropout(0.2)(enc)
conv_shape = K.int_shape(enc)[1:]
enc = layers.Flatten()(enc)
enc = layers.Dense(bottleneck, activation='relu', name="bottleneck")(enc)
enc = layers.BatchNormalization()(enc)
z_mean = layers.Dense(latent_dim, name="z_mean")(enc)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(enc)
latent_z = Sampling()([z_mean, z_log_var])
encoder_model = keras.models.Model(encoder_inputs, latent_z, name="encoder")
return encoder_model, conv_shape
enc_model, conv_shape = get_encoder()
enc_model.summary()
def get_decoder():
latent_input = keras.Input(shape=(latent_dim,))
dec = layers.Dense(conv_shape[0] * conv_shape[1] * conv_shape[2], activation='relu')(latent_input)
dec = layers.Reshape(conv_shape)(dec)
for (numFilters, szFilters) in zip(reversed(filters), reversed(filter_size)):
dec = layers.Conv2DTranspose(numFilters, szFilters, activation='relu', strides=2, padding='same')(dec)
dec = layers.BatchNormalization()(dec)
dec = layers.Dropout(0.2)(dec)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation='relu', padding='same')(dec)
decoder_model = keras.models.Model(latent_input, decoder_outputs, name="decoder")
return decoder_model
dec_model = get_decoder()
dec_model.summary()
def get_discriminator():
dscr_input = keras.Input(shape=input_shape[1:])
dscr = dscr_input
for numFilters in filters:
dscr = layers.Conv2D(numFilters, kernel_size=5, activation='relu', strides=2, padding='same')(dscr)
dscr = layers.Flatten()(dscr)
dscr = layers.Dense(1, activation="relu", name="dsc_end")(dscr)
discriminator_model = keras.models.Model(dscr_input, dscr, name="discriminator")
return discriminator_model
dsc_model = get_discriminator()
dsc_model.summary()
class GAN(keras.Model):
def __init__(self,
discriminator,
encoder,
decoder,
latent_dim,
dsc_steps=dsc_steps,
gp_w=gp_w,
):
super(GAN, self).__init__()
self.discriminator = discriminator
self.encoder = encoder
self.decoder = decoder
self.latent_dim = latent_dim
self.dsc_steps = dsc_steps
self.gp_w = gp_w
def compile(self,
dsc_optimizer, enc_optimizer, dec_optimizer,
dsc_loss_fn, enc_loss_fn, dec_loss_fn):
super(GAN, self).compile()
self.dsc_optimizer = dsc_optimizer
self.enc_optimizer = enc_optimizer
self.dec_optimizer = dec_optimizer
self.dsc_loss_fn = dsc_loss_fn
self.enc_loss_fn = enc_loss_fn
self.dec_loss_fn = dec_loss_fn
def call(self, data):
ds = self.discriminator(data)
e = self.encoder(data)
d = self.decoder(e)
def gradient_penalty(self, batch_size, ref_img, gen_img):
alpha = tf.random_normal([batch_size, 1, 1, 1], 0.0, 1.0)
diff = gen_img - ref_img
interpolated = ref_img + alpha * diff
with tf.GradientTape() as gp_tape:
gp_tape.watch(interpolated)
pred = self.discriminator(interpolated, training=True)
grads = gp_tape.gradient(pred, [interpolated])[0]
norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1, 2, 3]))
gp = tf.reduce_mean((norm - 1.0) ** 2)
return gp
#tf.function # doesn't make any difference if decorating with that
def train_step(self, data):
trans_img = data
clip_img = data
batch_size = tf.shape(trans_img)[:1]
for i in range(self.dsc_steps):
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(clip_img) # <------ ERROR HERE
gen_img = self.decoder(z)
gen_logits = self.discriminator(gen_img)
ref_logits = self.discriminator(trans_img)
dsc_cost = self.dsc_loss_fn(ref_img=ref_logits, gen_img=gen_logits)
gp = self.gradient_penalty(batch_size, trans_img, gen_img)
dsc_loss = dsc_cost + gp * self.gp_w
dsc_gradient = tape.gradient(dsc_loss, self.discriminator.trainable_variables)
self.dsc_optimizer.apply_gradients(zip(dsc_gradient, self.discriminator.trainable_variables))
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(clip_img) # <------ ERROR ALSO HERE IF dsc_steps = 0
gen_img = self.decoder(z)
gen_img_logits = self.discriminator(gen_img)
dec_loss = self.dec_loss_fn(gen_img_logits)
kl_loss = self.kl_loss(z_mean, z_log_var)
enc_gradient = tape.gradient(kl_loss, self.encoder.trainable_variables)
self.enc_optimizer.apply_gradients(zip(enc_gradient, self.encoder.trainable_variables))
dec_gradient = tape.gradient(dec_loss, self.decoder.trainable_variables)
self.dec_optimizer.apply_gradients(zip(dec_gradient, self.decoder.trainable_variables))
return {"dsc_loss": dsc_loss, "KL-Loss": kl_loss, "dec_loss": dec_loss}
class GANMonitor(keras.callbacks.Callback):
def __init__(self, num_img=6, latent_dim=latent_dim):
self.num_img = num_img
self.latent_dim = latent_dim
def on_epoch_end(self, epoch, logs=None):
generated_images = self.model.decoder()
generated_images = (generated_images * 127.5) + 127.5
for i in range(self.num_img):
img = generated_images[i].np()
img = keras.preprocessing.image.array_to_img(img)
img.save("generated_img_{i}_{epoch}.png".format(i=i, epoch=epoch))
encoder_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
decoder_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
discriminator_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
def discriminator_loss(real_img, fake_img):
real_loss = tf.reduce_mean(real_img)
fake_loss = tf.reduce_mean(fake_img)
return fake_loss - real_loss
def generator_loss(fake_img):
return -tf.reduce_mean(fake_img)
def kl_loss(z_mean, z_log_var):
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
return beta_v * kl_loss
def reconstruction_loss(data, reconstruction):
rec_loss = tf.reduce_mean(
tf.reduce_sum(keras.losses.mse(data, reconstruction), axis=(1, 2))
)
return rec_loss
callback = GANMonitor(num_img=3, latent_dim=latent_dim)
gan = GAN(
discriminator=dsc_model,
encoder=enc_model,
decoder=dec_model,
latent_dim=latent_dim,
dsc_steps=dsc_steps,
)
gan.compile(
dsc_optimizer=discriminator_optimizer,
enc_optimizer=encoder_optimizer,
dec_optimizer=decoder_optimizer,
dsc_loss_fn=discriminator_loss,
enc_loss_fn=kl_loss,
dec_loss_fn=generator_loss,
)
gan.fit(gen_trans, batch_size=batch_size, epochs=epochs, callbacks=[callback])
I'd be very thankful for any help because I haven't been able to find or work any solution to this. I've read that in TF2.5 some errors like this shouldn't occur anymore, but using TF2.5 is not an option.
I found the problem.
In the original Keras example for an VAE, the encoder ends in three layers, namely z_mean, z_log_var and latent_z. While it was possible to access all terminal layers of a model in TF 2.2, as I did in my train_step
z_mean, z_log_var, z = encoder(data)
only (lantent_) z is committed as defined in the encoder model initialization.
By defining the model with
encoder_model = keras.models.Model(encoder_inputs, ([z_mean, z_log_var, latent_z]), name="encoder")
with a list of output layers, all z* are accessible.
I assume that getting multiple variables from a single tensor with a singular output
x1, x2, x3 = model(data)
results in a loop that might look something like:
for i in x:
x{i} = model(data)
which is the only explaination for an iteration over a tensor I can think of.
However, reading code might be useful, I'll try to remember that.
I am trying to train an unsupervised classification model for which i am using deep clustering with my model on Keras.
The code I am referring for clustering is this.
While running the code i am getting an error in the cutom layer while adding weights. Below you can see the Code and the error.
import metrics
import numpy as np
from tensorflow.keras.layers import Layer, InputSpec
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from sklearn.cluster import KMeans
class ClusteringLayer(Layer):
"""
Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
sample belonging to each cluster. The probability is calculated with student's t-distribution.
# Example
```
model.add(ClusteringLayer(n_clusters=10))
```
# Arguments
n_clusters: number of clusters.
weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
alpha: parameter in Student's t-distribution. Default to 1.0.
# Input shape
2D tensor with shape: `(n_samples, n_features)`.
# Output shape
2D tensor with shape: `(n_samples, n_clusters)`.
"""
def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(ClusteringLayer, self).__init__(**kwargs)
self.n_clusters = n_clusters
self.alpha = alpha
self.initial_weights = weights
self.input_spec = InputSpec(ndim=2)
def build(self, input_shape):
assert len(input_shape) == 2
input_dim = input_shape[1]
self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
def call(self, inputs, **kwargs):
""" student t-distribution, as same as used in t-SNE algorithm.
q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.
Arguments:
inputs: the variable containing data, shape=(n_samples, n_features)
Return:
q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
"""
q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
q **= (self.alpha + 1.0) / 2.0
q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
return q
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) == 2
return input_shape[0], self.n_clusters
def get_config(self):
config = {'n_clusters': self.n_clusters}
base_config = super(ClusteringLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class Inf:
def __init__(self, D1, D2, n_clusters):
from tensorflow.keras.models import model_from_json
self.n_clusters = n_clusters
json_file = open(D1, 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights(D2)
print("Loaded model from disk")
loaded_model.summary()
self.model = loaded_model
def create_model(self):
hidden = self.model.get_layer(name='encoded').output
self.encoder = Model(inputs = self.model.input, outputs = hidden)
clustering_layer = ClusteringLayer(n_clusters=self.n_clusters)(hidden)
self.model = Model(inputs = self.model.input, outputs = clustering_layer)
self.model = model
def compile(self, loss='kld', optimizer='adam'):
self.model.compile(loss=loss, optimizer=optimizer)
def fit(self, x, y=None, batch_size=16, maxiter=2e4, tol=1e-3, update_interval=140, save_dir='./results/temp'):
print('Update interval', update_interval)
save_interval = x.shape[0] / batch_size * 5
print('Save interval', save_interval)
print('Initializing cluster centers with k-means.')
kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
self.y_pred = kmeans.fit_predict(self.encoder.predict(x))
y_pred_last = np.copy(self.y_pred)
self.model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
# Step : deep clustering
# logging file
import csv, os
if not os.path.exists(save_dir):
os.makedirs(save_dir)
logfile = open(save_dir + '/dcec_log.csv', 'w')
logwriter = csv.DictWriter(logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L', 'Lc', 'Lr'])
logwriter.writeheader()
loss = [0, 0, 0]
index = 0
for ite in range(int(maxiter)):
if ite % update_interval == 0:
q, _ = self.model.predict(x, verbose=0)
p = self.target_distribution(q) # update the auxiliary target distribution p
# evaluate the clustering performance
self.y_pred = q.argmax(1)
if y is not None:
acc = np.round(metrics.acc(y, self.y_pred), 5)
nmi = np.round(metrics.nmi(y, self.y_pred), 5)
ari = np.round(metrics.ari(y, self.y_pred), 5)
loss = np.round(loss, 5)
logdict = dict(iter=ite, acc=acc, nmi=nmi, ari=ari, L=loss[0], Lc=loss[1], Lr=loss[2])
logwriter.writerow(logdict)
print('Iter', ite, ': Acc', acc, ', nmi', nmi, ', ari', ari, '; loss=', loss)
# check stop criterion
delta_label = np.sum(self.y_pred != y_pred_last).astype(np.float32) / self.y_pred.shape[0]
y_pred_last = np.copy(self.y_pred)
if ite > 0 and delta_label < tol:
print('delta_label ', delta_label, '< tol ', tol)
print('Reached tolerance threshold. Stopping training.')
logfile.close()
break
# train on batch
if (index + 1) * batch_size > x.shape[0]:
loss = self.model.train_on_batch(x=x[index * batch_size::],
y=[p[index * batch_size::], x[index * batch_size::]])
index = 0
else:
loss = self.model.train_on_batch(x=x[index * batch_size:(index + 1) * batch_size],
y=[p[index * batch_size:(index + 1) * batch_size],
x[index * batch_size:(index + 1) * batch_size]])
index += 1
# save intermediate model
if ite % save_interval == 0:
# save DCEC model checkpoints
print('saving model to:', save_dir + '/dcec_model_' + str(ite) + '.h5')
self.model.save_weights(save_dir + '/dcec_model_' + str(ite) + '.h5')
ite += 1
# save the trained model
logfile.close()
print('saving model to:', save_dir + '/dcec_model_final.h5')
self.model.save_weights(save_dir + '/dcec_model_final.h5')
My Output layer is a dense layer with output dimension(?,128).
I am getting a following error in the clustering layer.
File "C:/Users/u/Desktop/trained/inference.py", line 45, in build
self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 384, in add_weight
aggregation=aggregation)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\training\tracking\base.py", line 663, in _add_variable_with_custom_getter
**kwargs_for_getter)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\keras\engine\base_layer_utils.py", line 155, in make_variable
shape=variable_shape if variable_shape.rank else None)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variables.py", line 259, in __call__
return cls._variable_v1_call(*args, **kwargs)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variables.py", line 220, in _variable_v1_call
shape=shape)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variables.py", line 198, in <lambda>
previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 2495, in default_variable_creator
shape=shape)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variables.py", line 263, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\resource_variable_ops.py", line 460, in __init__
shape=shape)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\resource_variable_ops.py", line 604, in _init_from_args
initial_value() if init_from_fn else initial_value,
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\keras\engine\base_layer_utils.py", line 135, in <lambda>
init_val = lambda: initializer(shape, dtype=dtype)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\init_ops.py", line 533, in __call__
shape, -limit, limit, dtype, seed=self.seed)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\random_ops.py", line 239, in random_uniform
shape = _ShapeTensor(shape)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\random_ops.py", line 44, in _ShapeTensor
return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1087, in convert_to_tensor
return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1145, in convert_to_tensor_v2
as_ref=False)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1224, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\constant_op.py", line 305, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\constant_op.py", line 246, in constant
allow_broadcast=True)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\constant_op.py", line 284, in _constant_impl
allow_broadcast=allow_broadcast))
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 562, in make_tensor_proto
"supported type." % (type(values), values))
TypeError: Failed to convert object of type <class 'tuple'> to Tensor. Contents: (17, Dimension(128)). Consider casting elements to a supported type.
I have used an autoencoder's encoder past as an input. Following is the encoder part of the autoencoder.
ip = Input(shape=(256,256,1))
x = Conv2D(16, (3,3), padding='same')(ip)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.2)(x)
x = MaxPooling2D((2,2), padding='same')(x)
x = Flatten()(x)
x = Dense(128, name="encoded")(x)
Replace
input_dim = input_shape[1]
with
input_dim = input_shape[1].value
in the build() method of ClusteringLayer, so that input_dim will be 128 instead of Dimension(128).
Replace
input_dim = input_shape[1].value
With
input_dim = input_shape[1]
and also Replace
if (index + 1) * batch_size > x.shape[0]:
loss = self.model.train_on_batch(x=x[index * batch_size::], y=[p[index * batch_size::], x[index * batch_size::]])
index = 0
else:
loss = self.model.train_on_batch(x=x[index * batch_size:(index + 1) * batch_size], y=[p[index * batch_size:(index + 1) * batch_size], x[index * batch_size:(index + 1) * batch_size]])
index += 1
With
if (index + 1) * batch_size > x.shape[0]:
loss = self.model.train_on_batch(x=x[index * batch_size::], y=p[index * batch_size::])
index = 0
else:
loss = self.model.train_on_batch(x=x[index * batch_size:(index + 1) * batch_size], y=p[index * batch_size:(index + 1) * batch_size])
index += 1