I am trying to train an unsupervised classification model for which i am using deep clustering with my model on Keras.
The code I am referring for clustering is this.
While running the code i am getting an error in the cutom layer while adding weights. Below you can see the Code and the error.
import metrics
import numpy as np
from tensorflow.keras.layers import Layer, InputSpec
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from sklearn.cluster import KMeans
class ClusteringLayer(Layer):
"""
Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
sample belonging to each cluster. The probability is calculated with student's t-distribution.
# Example
```
model.add(ClusteringLayer(n_clusters=10))
```
# Arguments
n_clusters: number of clusters.
weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
alpha: parameter in Student's t-distribution. Default to 1.0.
# Input shape
2D tensor with shape: `(n_samples, n_features)`.
# Output shape
2D tensor with shape: `(n_samples, n_clusters)`.
"""
def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(ClusteringLayer, self).__init__(**kwargs)
self.n_clusters = n_clusters
self.alpha = alpha
self.initial_weights = weights
self.input_spec = InputSpec(ndim=2)
def build(self, input_shape):
assert len(input_shape) == 2
input_dim = input_shape[1]
self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
def call(self, inputs, **kwargs):
""" student t-distribution, as same as used in t-SNE algorithm.
q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.
Arguments:
inputs: the variable containing data, shape=(n_samples, n_features)
Return:
q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
"""
q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
q **= (self.alpha + 1.0) / 2.0
q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
return q
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) == 2
return input_shape[0], self.n_clusters
def get_config(self):
config = {'n_clusters': self.n_clusters}
base_config = super(ClusteringLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class Inf:
def __init__(self, D1, D2, n_clusters):
from tensorflow.keras.models import model_from_json
self.n_clusters = n_clusters
json_file = open(D1, 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights(D2)
print("Loaded model from disk")
loaded_model.summary()
self.model = loaded_model
def create_model(self):
hidden = self.model.get_layer(name='encoded').output
self.encoder = Model(inputs = self.model.input, outputs = hidden)
clustering_layer = ClusteringLayer(n_clusters=self.n_clusters)(hidden)
self.model = Model(inputs = self.model.input, outputs = clustering_layer)
self.model = model
def compile(self, loss='kld', optimizer='adam'):
self.model.compile(loss=loss, optimizer=optimizer)
def fit(self, x, y=None, batch_size=16, maxiter=2e4, tol=1e-3, update_interval=140, save_dir='./results/temp'):
print('Update interval', update_interval)
save_interval = x.shape[0] / batch_size * 5
print('Save interval', save_interval)
print('Initializing cluster centers with k-means.')
kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
self.y_pred = kmeans.fit_predict(self.encoder.predict(x))
y_pred_last = np.copy(self.y_pred)
self.model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
# Step : deep clustering
# logging file
import csv, os
if not os.path.exists(save_dir):
os.makedirs(save_dir)
logfile = open(save_dir + '/dcec_log.csv', 'w')
logwriter = csv.DictWriter(logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L', 'Lc', 'Lr'])
logwriter.writeheader()
loss = [0, 0, 0]
index = 0
for ite in range(int(maxiter)):
if ite % update_interval == 0:
q, _ = self.model.predict(x, verbose=0)
p = self.target_distribution(q) # update the auxiliary target distribution p
# evaluate the clustering performance
self.y_pred = q.argmax(1)
if y is not None:
acc = np.round(metrics.acc(y, self.y_pred), 5)
nmi = np.round(metrics.nmi(y, self.y_pred), 5)
ari = np.round(metrics.ari(y, self.y_pred), 5)
loss = np.round(loss, 5)
logdict = dict(iter=ite, acc=acc, nmi=nmi, ari=ari, L=loss[0], Lc=loss[1], Lr=loss[2])
logwriter.writerow(logdict)
print('Iter', ite, ': Acc', acc, ', nmi', nmi, ', ari', ari, '; loss=', loss)
# check stop criterion
delta_label = np.sum(self.y_pred != y_pred_last).astype(np.float32) / self.y_pred.shape[0]
y_pred_last = np.copy(self.y_pred)
if ite > 0 and delta_label < tol:
print('delta_label ', delta_label, '< tol ', tol)
print('Reached tolerance threshold. Stopping training.')
logfile.close()
break
# train on batch
if (index + 1) * batch_size > x.shape[0]:
loss = self.model.train_on_batch(x=x[index * batch_size::],
y=[p[index * batch_size::], x[index * batch_size::]])
index = 0
else:
loss = self.model.train_on_batch(x=x[index * batch_size:(index + 1) * batch_size],
y=[p[index * batch_size:(index + 1) * batch_size],
x[index * batch_size:(index + 1) * batch_size]])
index += 1
# save intermediate model
if ite % save_interval == 0:
# save DCEC model checkpoints
print('saving model to:', save_dir + '/dcec_model_' + str(ite) + '.h5')
self.model.save_weights(save_dir + '/dcec_model_' + str(ite) + '.h5')
ite += 1
# save the trained model
logfile.close()
print('saving model to:', save_dir + '/dcec_model_final.h5')
self.model.save_weights(save_dir + '/dcec_model_final.h5')
My Output layer is a dense layer with output dimension(?,128).
I am getting a following error in the clustering layer.
File "C:/Users/u/Desktop/trained/inference.py", line 45, in build
self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 384, in add_weight
aggregation=aggregation)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\training\tracking\base.py", line 663, in _add_variable_with_custom_getter
**kwargs_for_getter)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\keras\engine\base_layer_utils.py", line 155, in make_variable
shape=variable_shape if variable_shape.rank else None)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variables.py", line 259, in __call__
return cls._variable_v1_call(*args, **kwargs)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variables.py", line 220, in _variable_v1_call
shape=shape)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variables.py", line 198, in <lambda>
previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 2495, in default_variable_creator
shape=shape)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variables.py", line 263, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\resource_variable_ops.py", line 460, in __init__
shape=shape)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\resource_variable_ops.py", line 604, in _init_from_args
initial_value() if init_from_fn else initial_value,
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\keras\engine\base_layer_utils.py", line 135, in <lambda>
init_val = lambda: initializer(shape, dtype=dtype)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\init_ops.py", line 533, in __call__
shape, -limit, limit, dtype, seed=self.seed)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\random_ops.py", line 239, in random_uniform
shape = _ShapeTensor(shape)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\random_ops.py", line 44, in _ShapeTensor
return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1087, in convert_to_tensor
return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1145, in convert_to_tensor_v2
as_ref=False)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1224, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\constant_op.py", line 305, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\constant_op.py", line 246, in constant
allow_broadcast=True)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\constant_op.py", line 284, in _constant_impl
allow_broadcast=allow_broadcast))
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 562, in make_tensor_proto
"supported type." % (type(values), values))
TypeError: Failed to convert object of type <class 'tuple'> to Tensor. Contents: (17, Dimension(128)). Consider casting elements to a supported type.
I have used an autoencoder's encoder past as an input. Following is the encoder part of the autoencoder.
ip = Input(shape=(256,256,1))
x = Conv2D(16, (3,3), padding='same')(ip)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.2)(x)
x = MaxPooling2D((2,2), padding='same')(x)
x = Flatten()(x)
x = Dense(128, name="encoded")(x)
Replace
input_dim = input_shape[1]
with
input_dim = input_shape[1].value
in the build() method of ClusteringLayer, so that input_dim will be 128 instead of Dimension(128).
Replace
input_dim = input_shape[1].value
With
input_dim = input_shape[1]
and also Replace
if (index + 1) * batch_size > x.shape[0]:
loss = self.model.train_on_batch(x=x[index * batch_size::], y=[p[index * batch_size::], x[index * batch_size::]])
index = 0
else:
loss = self.model.train_on_batch(x=x[index * batch_size:(index + 1) * batch_size], y=[p[index * batch_size:(index + 1) * batch_size], x[index * batch_size:(index + 1) * batch_size]])
index += 1
With
if (index + 1) * batch_size > x.shape[0]:
loss = self.model.train_on_batch(x=x[index * batch_size::], y=p[index * batch_size::])
index = 0
else:
loss = self.model.train_on_batch(x=x[index * batch_size:(index + 1) * batch_size], y=p[index * batch_size:(index + 1) * batch_size])
index += 1
Related
My code
import os
import math
import random
import librosa
import warnings
import numpy as np
import pandas as pd
from tqdm import tqdm
from glob import glob
import tensorflow as tf
from tensorflow import keras
from functools import partial
import IPython.display as ipd
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from tensorflow.keras import mixed_precision
music_files = glob("gtzan/genres_original/jazz/*.wav")
def sinusoidal_embedding(x):
embedding_min_frequency = 1.0
embedding_max_frequency = 1000.0
embedding_dims = 32
frequencies = tf.exp(
tf.linspace(
tf.math.log(embedding_min_frequency),
tf.math.log(embedding_max_frequency),
embedding_dims // 2,
)
)
angular_speeds = 2.0 * math.pi * frequencies
embeddings = tf.concat(
[tf.sin(angular_speeds * x), tf.cos(angular_speeds * x)], axis=3
)
return embeddings
def ResidualBlock(width):
def apply(x):
input_width = x.shape[3]
if input_width == width:
residual = x
else:
residual = layers.Conv2D(width, kernel_size=1)(x)
x = layers.BatchNormalization(center=False, scale=False)(x)
x = layers.Conv2D(
width, kernel_size=3, padding="same", activation=keras.activations.swish
)(x)
x = layers.Conv2D(width, kernel_size=3, padding="same")(x)
x = layers.Add()([x, residual])
return x
return apply
def DownBlock(width, block_depth):
def apply(x):
x, skips = x
for _ in range(block_depth):
x = ResidualBlock(width)(x)
skips.append(x)
x = layers.AveragePooling2D(pool_size=2)(x)
return x
return apply
def UpBlock(width, block_depth, attention=False):
def apply(x):
x, skips = x
x = layers.UpSampling2D(size=2, interpolation="bilinear")(x)
for _ in range(block_depth):
skip = skips.pop()
x = layers.Concatenate()([x, skip] if not attention else [
x, skip, layers.MultiHeadAttention(
num_heads=4, key_dim=1, attention_axes=(1,2)
)(x, skip)
])
x = ResidualBlock(width)(x)
return x
return apply
def get_network(widths, block_depth, dim1=256, dim2=128, channels=1, attention=False):
noisy_input = keras.Input(shape=(dim1, dim2, channels))
noise_variances = keras.Input(shape=(1, 1, 1))
upsample_shape = (dim1, dim2)
e = layers.Lambda(sinusoidal_embedding)(noise_variances)
e = layers.UpSampling2D(size=upsample_shape, interpolation="nearest")(e)
x = layers.Conv2D(widths[0], kernel_size=1)(noisy_input)
x = layers.Concatenate()([x, e])
skips = []
for width in widths[:-1]:
x = DownBlock(width, block_depth)([x, skips])
for _ in range(block_depth):
x = ResidualBlock(widths[-1])(x)
for idx, width in enumerate(reversed(widths[:-1])):
x = UpBlock(width, block_depth, attention=attention and idx ==0)([x, skips])
x = layers.Conv2D(channels, kernel_size=1, kernel_initializer="zeros")(x)
return keras.Model([noisy_input, noise_variances], x, name="residual_unet")
min_signal_rate = 0.02
max_signal_rate = 0.95
ema = 0.999
def spectral_norm(pred, real):
norm_real = tf.norm(real, axis=(1,2)) + 1e-6
norm_pred = tf.norm(pred, axis=(1,2)) + 1e-6
return tf.reduce_mean(tf.abs(norm_real - norm_pred) / norm_real)
def time_derivative(pred, real, window=1):
real_derivative = real[:, :-window, :, :] - real[:, window:, :, :]
pred_derivative = pred[:, :-window, :, :] - pred[:, window:, :, :]
return tf.reduce_mean(tf.keras.losses.MSE(real_derivative, pred_derivative))
class DDIM(keras.Model):
def __init__(self, widths, block_depth, attention=False, dim1=256, dim2=128):
super().__init__()
self.normalizer = layers.Normalization(axis=(2,3))
self.network = get_network(widths, block_depth, attention=attention, dim1=dim1, dim2=dim2)
self.ema_network = keras.models.clone_model(self.network)
self.spec_mod = 0
self.dx_mod = 0
def compile(self, **kwargs):
super().compile(**kwargs)
self.noise_loss_tracker = keras.metrics.Mean(name="n_loss")
self.data_loss_tracker = keras.metrics.Mean(name="d_loss")
self.noise_spec_tracker = keras.metrics.Mean(name="n_spec")
self.data_spec_tracker = keras.metrics.Mean(name="d_spec")
self.noise_dx_tracker = keras.metrics.Mean(name="n_dx")
self.data_dx_tracker = keras.metrics.Mean(name="d_dx")
self.noise_total_tracker = keras.metrics.Mean(name="n_total")
self.data_total_tracker = keras.metrics.Mean(name="d_total")
#property
def metrics(self):
return [
self.noise_loss_tracker,
self.data_loss_tracker,
self.noise_spec_tracker,
self.data_spec_tracker,
self.noise_dx_tracker,
self.data_dx_tracker,
self.noise_total_tracker,
self.data_total_tracker
]
def update_trackers(self, n_l, n_s, n_d, d_l, d_s, d_d):
n_t = n_l + n_s + n_d
d_t = d_l + d_s + d_d
for loss, tracker in zip([n_l, n_s, n_d, n_t, d_l, d_s, d_d, d_t],
[self.noise_loss_tracker, self.noise_spec_tracker, self.noise_dx_tracker, self.noise_total_tracker,
self.data_loss_tracker, self.data_spec_tracker, self.data_dx_tracker, self.data_total_tracker]):
tracker.update_state(loss)
def get_losses(self, y_true, y_pred):
return (tf.reduce_mean(self.loss(y_pred, y_true)), spectral_norm(y_pred, y_true), time_derivative(y_pred, y_true))
def denormalize(self, data):
data = self.normalizer.mean + data * self.normalizer.variance**0.5
return tf.clip_by_value(data, -128.0, 128.0)
def diffusion_schedule(self, diffusion_times):
start_angle = tf.acos(max_signal_rate)
end_angle = tf.acos(min_signal_rate)
diffusion_angles = start_angle + diffusion_times * (end_angle - start_angle)
signal_rates = tf.cos(diffusion_angles)
noise_rates = tf.sin(diffusion_angles)
return noise_rates, signal_rates
def denoise(self, noisy_data, noise_rates, signal_rates, training):
if training:
network = self.network
else:
network = self.ema_network
pred_noises = network([noisy_data, noise_rates**2], training=training)
pred_data = (noisy_data - noise_rates * pred_noises) / signal_rates
return pred_noises, pred_data
def reverse_diffusion(self, initial_noise, diffusion_steps):
num_examples = tf.shape(initial_noise)[0]
step_size = 1.0 / diffusion_steps
next_noisy_data = initial_noise
for step in tqdm(range(diffusion_steps)):
noisy_data = next_noisy_data
diffusion_times = tf.ones((num_examples, 1, 1, 1)) - step * step_size
noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
pred_noises, pred_data = self.denoise(
noisy_data, noise_rates, signal_rates, training=False
)
next_diffusion_times = diffusion_times - step_size
next_noise_rates, next_signal_rates = self.diffusion_schedule(
next_diffusion_times
)
next_noisy_data = (
next_signal_rates * pred_data + next_noise_rates * pred_noises
)
return pred_data
def generate(self, num_examples, shape, diffusion_steps):
initial_noise = tf.random.normal(shape=(num_examples, shape[0], shape[1], shape[2]))
generated_data = self.reverse_diffusion(initial_noise, diffusion_steps)
generated_data = self.denormalize(generated_data)
return generated_data
def train_step(self, data):
batch_size = tf.shape(data)[0]
data = self.normalizer(data, training=True)
noises = tf.random.normal(shape=tf.shape(data))
diffusion_times = tf.random.uniform(
shape=(batch_size, 1, 1, 1), minval=0.0, maxval=1.0
)
noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
noise_rates = noise_rates
signal_rates = signal_rates
noisy_data = signal_rates * data + noise_rates * noises
with tf.GradientTape() as tape:
# train the network to separate noisy data to their components
pred_noises, pred_data = self.denoise(
noisy_data, noise_rates, signal_rates, training=True
)
noise_loss, noise_spec, noise_dx = self.get_losses(noises, pred_noises) #safe_reduce_mean(self.loss(noises, pred_noises)) # used for training
total_noise_loss = tf.reduce_sum([
noise_loss,
self.spec_mod*noise_spec,
self.dx_mod*noise_dx
])
data_loss, data_spec, data_dx = self.get_losses(data, pred_data) #safe_reduce_mean(self.loss(data, pred_data)) # only used as metric
gradients = tape.gradient(noise_loss, self.network.trainable_weights)
self.optimizer.apply_gradients(zip(gradients, self.network.trainable_weights))
self.update_trackers(
noise_loss, noise_spec, noise_dx,
data_loss, data_spec, data_dx
)
for weight, ema_weight in zip(self.network.weights, self.ema_network.weights):
ema_weight.assign(ema * ema_weight + (1 - ema) * weight)
return {m.name: m.result() for m in self.metrics}
def test_step(self, data):
batch_size = tf.shape(data)[0]
data = self.normalizer(data, training=False)
noises = tf.random.normal(shape=tf.shape(data))
diffusion_times = tf.random.uniform(
shape=(batch_size, 1, 1, 1), minval=0.0, maxval=1.0
)
noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
noisy_data = signal_rates * data + noise_rates * noises
pred_noises, pred_data = self.denoise(
noisy_data, noise_rates, signal_rates, training=False
)
noise_loss = self.loss(noises, pred_noises)
data_loss = self.loss(data, pred_data)
self.data_loss_tracker.update_state(data_loss)
self.noise_loss_tracker.update_state(noise_loss)
return {m.name: m.result() for m in self.metrics}
def load_at_interval(x, rate=10_000, feats=256, duration=3.3):
file = x[0].numpy().decode()
idx = x[1].numpy()
audio, sr = librosa.load(file, duration=duration, sr=rate, offset=idx)
audio_fill = np.zeros(int(rate*duration), dtype=np.float32)
audio_fill[:len(audio)] = audio
spec = tf.signal.mdct(audio_fill, feats)
return spec
def load_audio(x,y, rate=10_000, mdct_feats=256, duration=3.3):
out = tf.py_function(lambda x,y: load_at_interval((x,y), rate=rate, feats=mdct_feats, duration=duration), inp=[x,y], Tout=tf.float32)
return out
def get_files_dataset(
glob_location,
total_seconds=2,
out_len = 3.3,
hop_size=1,
max_feats = 2048,
batch_size=4,
shuffer_size=1000,
scale=1,
rate=10_000,
mdct_feats=256
):
files = glob(
glob_location,
recursive=True
)
def file_list_generator():
for _ in range(total_seconds):
for file in files:
yield file, _*hop_size
load_fn = partial(load_audio, duration=out_len, rate=rate, mdct_feats=mdct_feats)
dg =tf.data.Dataset.from_generator(file_list_generator, output_signature = (
tf.TensorSpec(shape=(), dtype=tf.string),
tf.TensorSpec(shape=(), dtype=tf.int32))).shuffle(shuffer_size).map(
load_fn, num_parallel_calls=tf.data.AUTOTUNE
).map(
lambda x: tf.expand_dims(x, -1)[:max_feats, :, :]*scale
).map(
lambda x: tf.ensure_shape(x, (max_feats, mdct_feats//2, 1))
).batch(batch_size).prefetch(tf.data.AUTOTUNE)
return dg
dataset = get_files_dataset(
"gtzan/genres_original/jazz/*.wav",
out_len=3.3,
max_feats=256,
total_seconds=26,
scale=1,
batch_size=16
)
for test_batch in dataset.take(1):
shape = test_batch.shape
print(shape)
num_total_examples = (len(music_files) * 26) // shape[0]
model = DDIM(widths = [128, 128, 128, 128], block_depth = 2,
attention=True, dim1=shape[1], dim2=shape[2])
model.normalizer.adapt(dataset, steps=10)
model.compile(
loss=tf.keras.losses.MSE,
optimizer= tfa.optimizers.AdamW(
learning_rate = 3e-4,
weight_decay = 1e-4
)
)
dataset = dataset.cache()
history = model.fit(dataset.repeat(), steps_per_epoch=num_total_examples, epochs=1)
model.save_weights('jazz_gan.h5')
model.save('jazz_gan',save_format='tf')
model.spec_mod = 1
model.dx_mod = 1
history = model.fit(dataset.repeat(), steps_per_epoch=num_total_examples, epochs=100)
specs = model.generate(8, shape[1:], 1000)
for i in range(4):
plt.pcolormesh(np.log(np.abs(test_batch[i, :, :, 0].numpy().T)))
plt.colorbar()
plt.title(f"Real example {i+1}")
plt.show()
ipd.display(ipd.Audio(tf.signal.inverse_mdct(test_batch[i, :, :, 0]), rate=10_000))
dataset - https://www.kaggle.com/datasets/andradaolteanu/gtzan-dataset-music-genre-classification
Error
WARNING:tensorflow:From /usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/pyct/static_analysis/liveness.py:83: Analyzer.lamba_check (from tensorflow.python.autograph.pyct.static_analysis.liveness) is deprecated and will be removed after 2023-09-23.
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
(16, 256, 128, 1)
37/160 [=====>........................] - ETA: 7:24 - n_loss: 0.3691 - d_loss: 5.2594 - n_spec: 0.3038 - d_spec: 1.4172 - n_dx: 0.7294 - d_dx: 10.2440 - n_total: 1.4023 - d_total: 16.9206
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-4-f8f615ccb63e> in <module>
370 )
371 dataset = dataset.cache()
--> 372 history = model.fit(dataset.repeat(), steps_per_epoch=num_total_examples, epochs=1)
373
374 model.save_weights('jazz_gan.h5')
1 frames
/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
50 try:
51 ctx.ensure_initialized()
---> 52 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
53 inputs, attrs, num_outputs)
54 except core._NotOkStatusException as e:
InvalidArgumentError: Graph execution error:
2 root error(s) found.
(0) INVALID_ARGUMENT: ValueError: Tensor's shape (257, 256) is not compatible with supplied shape [256, 256].
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
return func(device, token, args)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/script_ops.py", line 147, in __call__
outputs = self._call(device, args)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/script_ops.py", line 154, in _call
ret = self._func(*args)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
return func(*args, **kwargs)
File "/tmp/__autograph_generated_file_0wo6uu2.py", line 10, in <lambda>
out = ag__.converted_call(ag__.ld(tf).py_function, (ag__.autograph_artifact((lambda x, y: ag__.converted_call(ag__.ld(load_at_interval), ((ag__.ld(x), ag__.ld(y)),), dict(rate=ag__.ld(rate), feats=ag__.ld(mdct_feats), duration=ag__.ld(duration)), fscope))),), dict(inp=[ag__.ld(x), ag__.ld(y)], Tout=ag__.ld(tf).float32), fscope)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 335, in converted_call
return _call_unconverted(f, args, kwargs, options, False)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 458, in _call_unconverted
return f(*args, **kwargs)
File "<ipython-input-4-f8f615ccb63e>", line 302, in load_at_interval
spec = tf.signal.mdct(audio_fill, feats)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py", line 1299, in set_shape
raise ValueError(f"Tensor's shape {self.shape} is not compatible "
ValueError: Tensor's shape (257, 256) is not compatible with supplied shape [256, 256].
[[{{node EagerPyFunc}}]]
[[IteratorGetNext]]
[[Shape/_4]]
(1) INVALID_ARGUMENT: ValueError: Tensor's shape (257, 256) is not compatible with supplied shape [256, 256].
Traceback (most recent call last):
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
return func(device, token, args)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/script_ops.py", line 147, in __call__
outputs = self._call(device, args)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/script_ops.py", line 154, in _call
ret = self._func(*args)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 642, in wrapper
return func(*args, **kwargs)
File "/tmp/__autograph_generated_file_0wo6uu2.py", line 10, in <lambda>
out = ag__.converted_call(ag__.ld(tf).py_function, (ag__.autograph_artifact((lambda x, y: ag__.converted_call(ag__.ld(load_at_interval), ((ag__.ld(x), ag__.ld(y)),), dict(rate=ag__.ld(rate), feats=ag__.ld(mdct_feats), duration=ag__.ld(duration)), fscope))),), dict(inp=[ag__.ld(x), ag__.ld(y)], Tout=ag__.ld(tf).float32), fscope)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 335, in converted_call
return _call_unconverted(f, args, kwargs, options, False)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/impl/api.py", line 458, in _call_unconverted
return f(*args, **kwargs)
File "<ipython-input-4-f8f615ccb63e>", line 302, in load_at_interval
spec = tf.signal.mdct(audio_fill, feats)
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py", line 1299, in set_shape
raise ValueError(f"Tensor's shape {self.shape} is not compatible "
ValueError: Tensor's shape (257, 256) is not compatible with supplied shape [256, 256].
[[{{node EagerPyFunc}}]]
[[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_35945]
This problem appeared after it started showing this error. A few days ago this was not the case
WARNING:tensorflow:From /usr/local/lib/python3.8/dist-packages/tensorflow/python/autograph/pyct/static_analysis/liveness.py:83: Analyzer.lamba_check (from tensorflow.python.autograph.pyct.static_analysis.liveness) is deprecated and will be removed after 2023-09-23.
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
I looked for a solution on many sites and did not find it
I am studying by designing a model structure using Transformer encoder and decoder.
I trained the classification model as a result of the encoder and trained the generative model with the decoder result (the result of the encoder as an input).
Exports multiple results to output.
The following error occurred while learning:
I tracked the error using torch.autograd.set_detect_anomaly(True).
I saw an article about the same error on the PyTorch forum.
However, they were mostly using inplace operations such as += or x[:, 0]=0. So it was solved when I fixed.
But I didn't use any of these operations.
I tried to change unsqueeze() and squeeze() to view(), and also attach clone() to tensor maipulation. but error hasn't be fixed.
What is the problem?
model code
import torch
import torch.nn as nn
import random
from torch.nn.utils.rnn import pad_sequence
import math
from pytorch_pretrained_bert import BertTokenizer, BertForSequenceClassification, BertForQuestionAnswering
from tqdm import tqdm
import pandas as pd
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
class SelfAttention(nn.Module):
def __init__(self, embedding_dim, num_heads):
super(SelfAttention, self).__init__()
self.multihead_attn = nn.MultiheadAttention(embed_dim=embedding_dim, num_heads=num_heads,
batch_first=True)
def forward(self, x):
query = x
key = x
value = x
attn_output = self.multihead_attn(query, key, value, need_weights=False)
return attn_output
class Encoder(nn.Module):
def __init__(self, embedding_dim):
super(Encoder, self).__init__()
self.embedding_dim = embedding_dim
# self.pos_encoder = PositionalEncoding()
self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.embedding_dim, nhead=8, batch_first=True)
self.encoder = nn.TransformerEncoder(encoder_layer=self.encoder_layer, num_layers=6)
self.feedforward = nn.Linear(self.embedding_dim, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.encoder(x)
cls_out = torch.mean(out, dim=-2)
cls_out = self.feedforward(cls_out)
cls_out = self.sigmoid(cls_out)
return out, cls_out
class Decoder(nn.Module):
def __init__(self, embedding_dim):
super(Decoder, self).__init__()
# self.bert = BertForQuestionAnswering.from_pretrained('bert-base-uncased')
self.embedding_dim = embedding_dim
self.decoder_layer = nn.TransformerDecoderLayer(d_model=self.embedding_dim, nhead=8, batch_first=True)
self.decoder = nn.TransformerDecoder(decoder_layer=self.decoder_layer, num_layers=6)
def forward(self, tgt, memory):
out = self.decoder(tgt, memory)
return out
class AlzhBERT(nn.Module):
def __init__(self, embedding_dim):
super(AlzhBERT, self).__init__()
self.embedding_dim = embedding_dim
self.max_sent_length = 7
self.token_level_attn = nn.ModuleList([SelfAttention(self.embedding_dim, num_heads=8) for _ in range(10)])
self.token_level_attn_single = SelfAttention(self.embedding_dim, num_heads=8)
self.sentence_level_attn = SelfAttention(self.embedding_dim, num_heads=8)
self.encoder = Encoder(embedding_dim=embedding_dim)
self.decoder = Decoder(embedding_dim=embedding_dim)
def forward(self, X_batch):
i = 0
enc_outs = {}
dec_outs = {}
for datastruct in X_batch:
enc_outs[i] = []
dec_outs[i] = []
j=0
for section in datastruct.sections:
print(i, " + ", j)
inv = section.inv.requires_grad_(True).to(device)
y_dec = section.next_uttr.requires_grad_(True).to(device)
par = section.par
# print(par)
try:
tmp = par.dim()
except AttributeError:
print(par)
print("attr err")
j = j+1
continue
# par = par.permute(1,0,2) # (seq_len, sent_len, embed) => 한 번에 self attention
# 여러개 self_attention
# for p in par:
result = self.token_level_attn_single(par.to(device).requires_grad_(True))[0]
res = torch.mean(result, dim=-2).unsqueeze(0)
res_sent = self.sentence_level_attn(res.to(device))[0]
context = torch.mean(res_sent, dim=-3)
inv_input = torch.mean(inv, dim=-2)
# x_enc = torch.concat((inv_input, context))
# x_enc = x_enc.view([1, -1, self.embedding_dim])
enc_out, cls_out = self.encoder(torch.concat([inv_input, context]).unsqueeze(0))
# y_dec = torch.mean(y_dec, dim=-2).to(device)
# enc_out = torch.mean(enc_out, dim=-2).unsqueeze(0).to(device)
dec_out = self.decoder(y_dec, enc_out.to(device))
enc_outs[i].append(cls_out)
dec_outs[i].append(dec_out)
j = j+1
enc_outs[i] = torch.tensor(enc_outs[i], requires_grad=True)
i = i + 1
return enc_outs, dec_outs
train code
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("device: ", device)
torch.autograd.set_detect_adnomaly(True)
def train_loop(dataloader, model, loss_fn, optimizer, epochs):
# dataloader = dataloader["train"]
size = len(dataloader.dataset)
writer = SummaryWriter()
enc_optimizer = optimizer[0]
dec_optimizer = optimizer[1]
for epoch in range(epochs):
enc_loss_hist = []
dec_loss_hist = []
accuracy = []
print("======== epoch ", epoch, "==========\n")
for i, (Xs, ys) in tqdm(enumerate(dataloader), desc="Train..."):
X_folds, y_folds = cross_validation(10, Xs, ys)
model.train()
for X, y in zip(X_folds['train'], y_folds['train']): # Xf는 DataStruct의 리스트임
# print("<Check Data>")
# print("X 0: ", X[0])
# print("label 0: ", y[0])
# Prediction and Loss
# X = batch_to_tensor(X)
# X = torch.tensor(X).to(device)
y = torch.tensor(y, dtype=torch.float32).to(device)
enc_preds, dec_preds = model(X)
for k in range(len(X)):
for t in range(len(enc_preds[k])):
enc_loss = loss_fn(y[k].to(device), enc_preds[k][t].to(device)).requires_grad_(True)
dec_loss = loss_fn(X[k].sections[t].next_uttr.to(device), dec_preds[k][t].to(device)).requires_grad_(True)
cls_out = torch.tensor(1 if enc_preds[k][t] >= 0.5 else 0)
cls_loss = torch.sum(cls_out == y[k])
accuracy.append(cls_loss)
# Backpropagation
enc_optimizer.zero_grad()
dec_optimizer.zero_grad()
enc_loss.backward(retain_graph=True)
enc_optimizer.step()
dec_loss.backward()
dec_optimizer.step()
enc_loss_hist.append(enc_loss)
dec_loss_hist.append(dec_loss)
cross_validation_loop(X_folds["valid"], y_folds["valid"], model, loss_fn, epoch)
enc_loss_save = torch.mean(torch.tensor(enc_loss_hist))
dec_loss_save = torch.mean(torch.tensor(dec_loss_hist))
accuracy_save = torch.mean(torch.tensor(accuracy, dtype=torch.float32))
writer.add_scalar("Avg Enc Loss/train", enc_loss_save, epoch)
writer.add_scalar("Avg Dec Loss/train", dec_loss_save, epoch)
writer.add_scalar("Avg Accuracy/train", accuracy_save)
if device == "cuda":
saved_model_dir = "/home/juny/AlzheimerModel/checkpoint"
else:
saved_model_dir = "./saved_model"
now = datetime.now()
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': enc_optimizer.state_dict(),
'loss': [enc_loss_save, dec_loss_save],
}, os.path.join('/home/juny/AlzheimerModel/checkpoint',
now.strftime("%Y-%m-%d-%H-%M") + "-e" + str(epoch) + ".pt"))
torch.save(model.state_dict(), os.path.join(saved_model_dir, "saved_model" + now.strftime("%Y-%m-%d-%H-%M") + ".pt"))
encloss, decloss, current = enc_loss_save, dec_loss_save.item(), i * len(X)
print(f"enc loss: {encloss:>7f} dec loss: {decloss:>7f} [{current:>5d}/{size:>5d}")
writer.flush()
writer.close()
error
C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\autograd\__init__.py:173: UserWarning: Error detected in NativeLayerNormBackward0. Traceback of forward call that caused the error:
File "C:/Users/usr/PycharmProjects/project/train.py", line 265, in <module>
train_loop(dataloader=train_dataloader, model=model, loss_fn=loss_fn, optimizer=(enc_optimizer, dec_optimizer), epochs=epochs)
File "C:/Users/usr/PycharmProjects/project/train.py", line 47, in train_loop
enc_preds, dec_preds = model(X)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\PycharmProjects\project\AlzhBERT.py", line 139, in forward
dec_out = self.decoder(y_dec, enc_out.to(device))
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\PycharmProjects\project\AlzhBERT.py", line 84, in forward
out = self.decoder(tgt, memory)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\transformer.py", line 291, in forward
output = mod(output, memory, tgt_mask=tgt_mask,
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\transformer.py", line 578, in forward
x = self.norm3(x + self._ff_block(x))
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\modules\normalization.py", line 189, in forward
return F.layer_norm(
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\nn\functional.py", line 2503, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
(Triggered internally at C:\cb\pytorch_1000000000000\work\torch\csrc\autograd\python_anomaly_mode.cpp:104.)
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
Train...: 0it [00:05, ?it/s]
Traceback (most recent call last):
File "C:/Users/usr/PycharmProjects/project/train.py", line 265, in <module>
train_loop(dataloader=train_dataloader, model=model, loss_fn=loss_fn, optimizer=(enc_optimizer, dec_optimizer), epochs=epochs)
File "C:/Users/usr/PycharmProjects/project/train.py", line 65, in train_loop
loss.backward(retain_graph=True)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\_tensor.py", line 396, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "C:\Users\usr\anaconda3\envs\pytorch-python3.8\lib\site-packages\torch\autograd\__init__.py", line 173, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [768]] is at version 1; expected version 0 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!
Process finished with exit code 1
I am trying to create a GNN that models a protein. However, I am running into an error with GraphConv (I get the same error with GCNConv). I do not understand why I am getting this error when the shapes should be able to be multiplied. I think the error must have something to do with the custom dataset I created, but I am not 100% sure. Please let me know if you have had a similar issue or know how to fix this. Thank you.
EDIT: Even if I change embedding_size to 1479, I still get: RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1479).
Custom dataset:
class ProteinDataset(geom_data.Dataset):
def __init__(self, root, transform=None, pre_transform=None):
# root = where data set is stored
super(ProteinDataset, self).__init__(root, transform, pre_transform)
self.root = root
#property
def raw_file_names(self):
return os.listdir(f'{self.root}/raw')
#property
def processed_file_names(self):
inxs = []
for pdb in self.raw_paths:
inxs.append(pdb.split('/')[-1].split('.p')[0])
return [f'{i}.pt' for i in inxs]
def download(self):
pass
def process(self):
for pdb in self.raw_paths:
try:
mol_obj = Chem.rdmolfiles.MolFromPDBFile(pdb)
except AttributeError:
os.remove(pdb)
continue
# Get node features
node_feats = self._get_node_features(mol_obj).reshape([-1,1])
# Get edge features
edge_feats = self._get_edge_features(mol_obj).reshape([-1,1])
# Get adjacency info
edge_index = self._get_adjacency_info(mol_obj)
label = self._get_labels(pdb)
# Create Data object
data = geom_data.Data(x=node_feats,
edge_index=edge_index,
edge_attr=edge_feats,
y=label)
i = pdb.split('/')[-1].split('.p')[0]
torch.save(data, os.path.join(self.processed_dir,f'{i}.pt'))
def _get_node_features(self, mol):
all_node_feats = []
for atom in mol.GetAtoms():
all_node_feats.append(atom.GetMass())
all_node_feats = np.asarray(all_node_feats)
return torch.tensor(all_node_feats, dtype=torch.float)
def _get_edge_features(self, mol):
all_edge_feats = []
dists = Chem.rdmolops.Get3DDistanceMatrix(mol)
# CA-CA Distances
for bond in mol.GetBonds():
begin = bond.GetBeginAtomIdx()
end = bond.GetEndAtomIdx()
all_edge_feats.append(dists[begin,end])
all_edge_feats = np.asarray(all_edge_feats)
return torch.tensor(all_edge_feats, dtype=torch.float)
def _get_adjacency_info(self, mol):
adj_matrix = Chem.rdmolops.GetAdjacencyMatrix(mol)
row, col = np.where(adj_matrix)
coo = np.array(list(zip(row, col)))
coo = np.reshape(coo, (2, -1))
return torch.tensor(coo, dtype=torch.long)
def _get_labels(self, fn):
with open(fn, 'r') as f:
label = float(f.readline())
f.close()
label = np.asarray([label])
return torch.tensor(label, dtype=torch.float)
def len(self):
return len(self.raw_paths)
def get(self, inx):
data = torch.load(self.processed_paths[inx])
return data
Model:
class GNN(torch.nn.Module):
def __init__(self, feature_size):
super(GNN, self).__init__()
embedding_size = 1024
# GNN Layers
self.conv1 = GraphConv(feature_size, embedding_size)
self.head1 = Linear(embedding_size*3, embedding_size)
self.pool1 = TopKPooling(embedding_size, ratio=0.8)
self.conv2 = GraphConv(embedding_size, embedding_size)
self.head2 = Linear(embedding_size*3, embedding_size)
self.pool2 = TopKPooling(embedding_size, ratio=0.5)
self.conv3 = GraphConv(embedding_size, embedding_size)
self.head3 = Linear(embedding_size*3, embedding_size)
self.pool3 = TopKPooling(embedding_size, ratio=0.2)
# Linear Layers
self.fc1 = Linear(embedding_size*2, 1024)
self.fc2 = Linear(1024, 128)
self.fc3 = Linear(128, 1)
def forward(self, x, edge_attr, edge_index, batch_index):
# First block
x = self.conv1(x, edge_index).relu()
x = self.head1(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool1(x,
edge_index,
None,
batch_index)
x1 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Second block
x = self.conv2(x, edge_index).relu()
x = self.head2(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool2(x,
edge_index,
None,
batch_index)
x2 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Third block
x = self.conv3(x, edge_index).relu()
x = self.head3(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool3(x,
edge_index,
None,
batch_index)
x3 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Concat pooled vectors
x = x1 + x2 + x3
# Apply Linear Layers
x = self.fc1(x).relu()
x = self.fc2(x).relu()
x = self.fc3(x)
return x
Training:
device = torch.device('cuda')
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
# Loading the dataset
train_set = ProteinDataset(root='data/lys50_2/train')
test_set = ProteinDataset(root='data/lys50_2/test')
print('Shape of input:', train_set[0].x.shape[0])
# Loading the model
model = GNN(feature_size=train_set[0].x.shape[0])
model = model.to(device)
print(f'Number of parameters: {count_parameters(model)}')
print(model)
# Loss and Optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
print(optimizer)
# Prepare for training
train_loader = DataLoader(train_set, batch_size=1, shuffle=True)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
def train(m,opt):
loss_sum = 0.0
for _, batch in enumerate(train_loader):
# Use GPU
batch.to(device)
# Reset grad
opt.zero_grad()
# Pass node features and connections
pred = m(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculate loss and gradients
loss = loss_fn(pred, batch.y)
loss.backward()
loss_sum += loss.item()
# Update using the gradients
opt.step()
return loss_sum / len(train_loader)
def validate(m):
loss_sum = 0.0
for _, batch in enumerate(test_loader):
for _, batch in enumerate(test_loader):
# Use GPU
batch.to(device)
# No grad
with torch.no_grad():
pred = m(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculate loss and gradients
loss = loss_fn(pred, batch.y)
loss_sum += loss.item()
return loss_sum / len(test_loader)
model.zero_grad()
optimizer.zero_grad()
# Loop for training
for i in range(101):
loss = train(model,optimizer)
if (i%10==0):
loss_v = validate(model)
print(i, loss, loss_v)
else:
print(i, loss)
Error when running training:
Traceback (most recent call last):
File "/home/spencer/sh3/gnn/./train.py", line 79, in <module>
loss = train(model,optimizer)
File "/home/spencer/sh3/gnn/./train.py", line 44, in train
pred = m(batch.x.float(),
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/feig/s1/spencer/sh3/gnn/model2.py", line 32, in forward
x = self.conv1(x, edge_index).relu()
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/conv/graph_conv.py", line 71, in forward
out = self.lin_rel(out)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/dense/linear.py", line 109, in forward
return F.linear(x, self.weight, self.bias)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/functional.py", line 1848, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1024)
The error tells you that input shapes don't match.
You can reshape the input in the forward method like this: x = x.view(1, 1479) but make sure that this is what you need - this error usually indicates wrongly shaped dataset or passing the wrong input.
OS: Manjaro Linux x64
CUDA: 11.0.3
TF/Keras: 2.4(.1)
Py: 3.8
Hello,
I'm trying to build some kind of W-VAE-GAN. I keep un running into that very same error over and over again and I already had that problem with Keras 2.3, interestingly NOT using TF/K 2.2. Unfortunately I need to use Keras 2.4 because I'm supposed to run my code on our university server with these exact TF/K and CUDA-Versions. At this point I'm just trying to make sure my code works as intended.
The error I get is the following, with the exact lines commented in my code:
...
Epoch 1/20
Traceback (most recent call last):
File "/home/peer/Programmierkram/BA/Metal_GAN/wvaegan.py", line 282, in <module>
gan.fit(gen_trans, batch_size=batch_size, epochs=epochs, callbacks=[callback])
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 1100, in fit
tmp_logs = self.train_function(iterator)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 828, in __call__
result = self._call(*args, **kwds)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 871, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 725, in _initialize
self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 2969, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3361, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3196, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 990, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 634, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 977, in wrapper
raise e.ag_error_metadata.to_exception(e)
tensorflow.python.framework.errors_impl.OperatorNotAllowedInGraphError: in user code:
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/home/peer/Programmierkram/BA/Metal_GAN/wvaegan.py:190 train_step
z_mean, z_log_var, z = self.encoder(clip_img) # <------ WHY NO ERROR HERE?
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:505 __iter__
self._disallow_iteration()
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:498 _disallow_iteration
self._disallow_when_autograph_enabled("iterating over `tf.Tensor`")
/home/peer/Programmierkram/BA/Metal_GAN/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:474 _disallow_when_autograph_enabled
raise errors.OperatorNotAllowedInGraphError(
OperatorNotAllowedInGraphError: iterating over `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
What I do not understand is how it comes to this error in the first place, as I successfully executed the VAE-part using TF2.2 without any error like this, and, more importantly, there is no iteration over any tensor obvious to me. Even if I commented out the for-loop in my train_step the same error occurs few lines later in the same context. I have also tried decorating the def train_step() with #tf.function, yet nothing changed.
The code I used is the following:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow.keras.backend as K
from keras.preprocessing.image import ImageDataGenerator
import itertools
import scipy.io
import matplotlib.pyplot as plt
import matplotlib.image as PIL
runOnGPU = 0
if runOnGPU==1:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
print(e)
else:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
path_clipped_train = os.path.join('./sinograms/clip')
path_transparent_train = os.path.join('./sinograms/transparent')
img_width, img_height = 512, 512
bottleneck = 1024 * 2
filters = (1024, 512, 256, 64)
filter_size = (3, 3, 3, 3)
batch_size = 4
epochs = 20
dsc_steps = 1
gp_w = 10.0
beta_v = 2
learning_rate = 125e-5
latent_dim = 2
input_shape = (1, img_width, img_height, 1)
dataset_gen1 = ImageDataGenerator(rescale=1 / 255, dtype="float32")
dataset_gen2 = ImageDataGenerator(rescale=1 / 255, dtype="float32")
gen_trans = dataset_gen1.flow_from_directory(path_transparent_train,
target_size=(img_width, img_height),
color_mode='grayscale',
classes=[''],
class_mode=None,
batch_size=batch_size,
shuffle=False,
)
gen_clip = dataset_gen2.flow_from_directory(path_clipped_train,
target_size=(img_width, img_height),
color_mode='grayscale',
classes=[''],
class_mode=None,
batch_size=batch_size,
shuffle=False,
)
class Sampling(layers.Layer):
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
def get_encoder():
encoder_inputs = keras.Input(shape=input_shape[1:], name="encoder_input")
enc = encoder_inputs
for (numFilters, szFilters) in zip(filters, filter_size):
enc = layers.Conv2D(numFilters, szFilters, activation='relu', strides=2, padding='same')(enc)
enc = layers.BatchNormalization()(enc)
enc = layers.Dropout(0.2)(enc)
conv_shape = K.int_shape(enc)[1:]
enc = layers.Flatten()(enc)
enc = layers.Dense(bottleneck, activation='relu', name="bottleneck")(enc)
enc = layers.BatchNormalization()(enc)
z_mean = layers.Dense(latent_dim, name="z_mean")(enc)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(enc)
latent_z = Sampling()([z_mean, z_log_var])
encoder_model = keras.models.Model(encoder_inputs, latent_z, name="encoder")
return encoder_model, conv_shape
enc_model, conv_shape = get_encoder()
enc_model.summary()
def get_decoder():
latent_input = keras.Input(shape=(latent_dim,))
dec = layers.Dense(conv_shape[0] * conv_shape[1] * conv_shape[2], activation='relu')(latent_input)
dec = layers.Reshape(conv_shape)(dec)
for (numFilters, szFilters) in zip(reversed(filters), reversed(filter_size)):
dec = layers.Conv2DTranspose(numFilters, szFilters, activation='relu', strides=2, padding='same')(dec)
dec = layers.BatchNormalization()(dec)
dec = layers.Dropout(0.2)(dec)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation='relu', padding='same')(dec)
decoder_model = keras.models.Model(latent_input, decoder_outputs, name="decoder")
return decoder_model
dec_model = get_decoder()
dec_model.summary()
def get_discriminator():
dscr_input = keras.Input(shape=input_shape[1:])
dscr = dscr_input
for numFilters in filters:
dscr = layers.Conv2D(numFilters, kernel_size=5, activation='relu', strides=2, padding='same')(dscr)
dscr = layers.Flatten()(dscr)
dscr = layers.Dense(1, activation="relu", name="dsc_end")(dscr)
discriminator_model = keras.models.Model(dscr_input, dscr, name="discriminator")
return discriminator_model
dsc_model = get_discriminator()
dsc_model.summary()
class GAN(keras.Model):
def __init__(self,
discriminator,
encoder,
decoder,
latent_dim,
dsc_steps=dsc_steps,
gp_w=gp_w,
):
super(GAN, self).__init__()
self.discriminator = discriminator
self.encoder = encoder
self.decoder = decoder
self.latent_dim = latent_dim
self.dsc_steps = dsc_steps
self.gp_w = gp_w
def compile(self,
dsc_optimizer, enc_optimizer, dec_optimizer,
dsc_loss_fn, enc_loss_fn, dec_loss_fn):
super(GAN, self).compile()
self.dsc_optimizer = dsc_optimizer
self.enc_optimizer = enc_optimizer
self.dec_optimizer = dec_optimizer
self.dsc_loss_fn = dsc_loss_fn
self.enc_loss_fn = enc_loss_fn
self.dec_loss_fn = dec_loss_fn
def call(self, data):
ds = self.discriminator(data)
e = self.encoder(data)
d = self.decoder(e)
def gradient_penalty(self, batch_size, ref_img, gen_img):
alpha = tf.random_normal([batch_size, 1, 1, 1], 0.0, 1.0)
diff = gen_img - ref_img
interpolated = ref_img + alpha * diff
with tf.GradientTape() as gp_tape:
gp_tape.watch(interpolated)
pred = self.discriminator(interpolated, training=True)
grads = gp_tape.gradient(pred, [interpolated])[0]
norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1, 2, 3]))
gp = tf.reduce_mean((norm - 1.0) ** 2)
return gp
#tf.function # doesn't make any difference if decorating with that
def train_step(self, data):
trans_img = data
clip_img = data
batch_size = tf.shape(trans_img)[:1]
for i in range(self.dsc_steps):
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(clip_img) # <------ ERROR HERE
gen_img = self.decoder(z)
gen_logits = self.discriminator(gen_img)
ref_logits = self.discriminator(trans_img)
dsc_cost = self.dsc_loss_fn(ref_img=ref_logits, gen_img=gen_logits)
gp = self.gradient_penalty(batch_size, trans_img, gen_img)
dsc_loss = dsc_cost + gp * self.gp_w
dsc_gradient = tape.gradient(dsc_loss, self.discriminator.trainable_variables)
self.dsc_optimizer.apply_gradients(zip(dsc_gradient, self.discriminator.trainable_variables))
with tf.GradientTape() as tape:
z_mean, z_log_var, z = self.encoder(clip_img) # <------ ERROR ALSO HERE IF dsc_steps = 0
gen_img = self.decoder(z)
gen_img_logits = self.discriminator(gen_img)
dec_loss = self.dec_loss_fn(gen_img_logits)
kl_loss = self.kl_loss(z_mean, z_log_var)
enc_gradient = tape.gradient(kl_loss, self.encoder.trainable_variables)
self.enc_optimizer.apply_gradients(zip(enc_gradient, self.encoder.trainable_variables))
dec_gradient = tape.gradient(dec_loss, self.decoder.trainable_variables)
self.dec_optimizer.apply_gradients(zip(dec_gradient, self.decoder.trainable_variables))
return {"dsc_loss": dsc_loss, "KL-Loss": kl_loss, "dec_loss": dec_loss}
class GANMonitor(keras.callbacks.Callback):
def __init__(self, num_img=6, latent_dim=latent_dim):
self.num_img = num_img
self.latent_dim = latent_dim
def on_epoch_end(self, epoch, logs=None):
generated_images = self.model.decoder()
generated_images = (generated_images * 127.5) + 127.5
for i in range(self.num_img):
img = generated_images[i].np()
img = keras.preprocessing.image.array_to_img(img)
img.save("generated_img_{i}_{epoch}.png".format(i=i, epoch=epoch))
encoder_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
decoder_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
discriminator_optimizer = keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9)
def discriminator_loss(real_img, fake_img):
real_loss = tf.reduce_mean(real_img)
fake_loss = tf.reduce_mean(fake_img)
return fake_loss - real_loss
def generator_loss(fake_img):
return -tf.reduce_mean(fake_img)
def kl_loss(z_mean, z_log_var):
kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
return beta_v * kl_loss
def reconstruction_loss(data, reconstruction):
rec_loss = tf.reduce_mean(
tf.reduce_sum(keras.losses.mse(data, reconstruction), axis=(1, 2))
)
return rec_loss
callback = GANMonitor(num_img=3, latent_dim=latent_dim)
gan = GAN(
discriminator=dsc_model,
encoder=enc_model,
decoder=dec_model,
latent_dim=latent_dim,
dsc_steps=dsc_steps,
)
gan.compile(
dsc_optimizer=discriminator_optimizer,
enc_optimizer=encoder_optimizer,
dec_optimizer=decoder_optimizer,
dsc_loss_fn=discriminator_loss,
enc_loss_fn=kl_loss,
dec_loss_fn=generator_loss,
)
gan.fit(gen_trans, batch_size=batch_size, epochs=epochs, callbacks=[callback])
I'd be very thankful for any help because I haven't been able to find or work any solution to this. I've read that in TF2.5 some errors like this shouldn't occur anymore, but using TF2.5 is not an option.
I found the problem.
In the original Keras example for an VAE, the encoder ends in three layers, namely z_mean, z_log_var and latent_z. While it was possible to access all terminal layers of a model in TF 2.2, as I did in my train_step
z_mean, z_log_var, z = encoder(data)
only (lantent_) z is committed as defined in the encoder model initialization.
By defining the model with
encoder_model = keras.models.Model(encoder_inputs, ([z_mean, z_log_var, latent_z]), name="encoder")
with a list of output layers, all z* are accessible.
I assume that getting multiple variables from a single tensor with a singular output
x1, x2, x3 = model(data)
results in a loop that might look something like:
for i in x:
x{i} = model(data)
which is the only explaination for an iteration over a tensor I can think of.
However, reading code might be useful, I'll try to remember that.
I keep on getting this error and don't seem to be able to solve it. The problem came up during upgrading keras to the new version which is integrated with tensorflow. Downgrading does solve this problem but I don't find that a long term solution. The following code I added is a recreation of the error using the keras Layers example. It was just used such that the error could be recreated without requiring anyone to download data sets or import more functions. For this reason if anyone can track the origin of the problem then other issues may follow which I can hopefully solve myself.
Hereby the code and Error:
from __future__ import print_function
import keras
from keras.models import Sequential
from keras import layers
from keras.datasets import mnist
from keras import backend as K
from keras.engine.topology import Layer
from keras.utils import conv_utils
from keras import initializers
import numpy as np
import math
class SincConv1D(Layer):
def __init__(self, N_filt, Filt_dim, fs, **kwargs):
self.N_filt = N_filt
self.Filt_dim = Filt_dim
self.fs = fs
super(SincConv1D, self).__init__(**kwargs)
def build(self, input_shape):
# The filters are trainable parameters.
self.filt_b1 = self.add_weight(name='filt_b1', shape=(self.N_filt,), initializer='uniform',trainable=True)
self.filt_band = self.add_weight(name='filt_band', shape=(self.N_filt,), initializer='uniform', trainable=True)
# Mel Initialization of the filterbanks
low_freq_mel = 80
high_freq_mel = (2595 * np.log10(1 + (self.fs / 2) / 700)) # Convert Hz to Mel
mel_points = np.linspace(low_freq_mel, high_freq_mel, self.N_filt) # Equally spaced in Mel scale
f_cos = (700 * (10 ** (mel_points / 2595) - 1)) # Convert Mel to Hz
b1 = np.roll(f_cos, 1)
b2 = np.roll(f_cos, -1)
b1[0] = 30
b2[-1] = (self.fs / 2) - 100
self.freq_scale = self.fs * 1.0
self.set_weights([b1 / self.freq_scale, (b2 - b1) / self.freq_scale])
super(SincConv1D, self).build(input_shape) # Be sure to call this at the end
def call(self, x):
# Get beginning and end frequencies of the filters.
min_freq = 50.0
min_band = 50.0
filt_beg_freq = K.abs(self.filt_b1) + min_freq / self.freq_scale
filt_end_freq = filt_beg_freq + (K.abs(self.filt_band) + min_band / self.freq_scale)
# Filter window (hamming).
n = np.linspace(0, self.Filt_dim, self.Filt_dim)
window = 0.54 - 0.46 * K.cos(2 * math.pi * n / self.Filt_dim)
window = K.cast(window, "float32")
window = K.variable(window)
# TODO what is this?
t_right_linspace = np.linspace(1, (self.Filt_dim - 1) / 2, int((self.Filt_dim - 1) / 2))
t_right = K.variable(t_right_linspace / self.fs)
# Compute the filters.
output_list = []
for i in range(self.N_filt):
# equation of convolution
low_pass1 = 2 * filt_beg_freq[i] * sinc(filt_beg_freq[i] * self.freq_scale, t_right)
low_pass2 = 2 * filt_end_freq[i] * sinc(filt_end_freq[i] * self.freq_scale, t_right)
band_pass = (low_pass2 - low_pass1)
band_pass = band_pass / K.max(band_pass)
output_list.append(band_pass * window)
filters = K.stack(output_list) # (80, 251)
filters = K.transpose(filters) # (251, 80)
filters = K.reshape(filters, (self.Filt_dim, 1,
self.N_filt)) # (251,1,80) in TF: (filter_width, in_channels, out_channels) in PyTorch (out_channels, in_channels, filter_width)
'''
Given an input tensor of shape [batch, in_width, in_channels] if data_format is "NWC",
or [batch, in_channels, in_width] if data_format is "NCW", and a filter / kernel tensor of shape [filter_width, in_channels, out_channels],
this op reshapes the arguments to pass them to conv2d to perform the equivalent convolution operation.
Internally, this op reshapes the input tensors and invokes tf.nn.conv2d. For example, if data_format does not start with "NC",
a tensor of shape [batch, in_width, in_channels] is reshaped to [batch, 1, in_width, in_channels], and the filter is reshaped to
[1, filter_width, in_channels, out_channels]. The result is then reshaped back to [batch, out_width, out_channels]
(where out_width is a function of the stride and padding as in conv2d) and returned to the caller.
'''
# Do the convolution.
out = K.conv1d(x, kernel=filters)
# out = K.reshape(out, shape=(None, 8618, 80,))
return out
def compute_output_shape(self, input_shape):
new_size = conv_utils.conv_output_length(input_shape[1], self.Filt_dim, padding="valid", stride=1, dilation=1)
return (input_shape[0],) + (new_size,) + (self.N_filt,)
def sinc(band, t_right):
y_right = K.sin(2 * math.pi * band * t_right) / (2 * math.pi * band * t_right)
# y_left = flip(y_right, 0) TODO remove if useless
y_left = K.reverse(y_right, 0)
y = K.concatenate([y_left, K.variable(K.ones(1)), y_right])
return y
class LayerNorm(layers.Layer):
""" Layer Normalization in the style of https://arxiv.org/abs/1607.06450 """
def __init__(self, scale_initializer='ones', bias_initializer='zeros', **kwargs):
super(LayerNorm, self).__init__(**kwargs)
self.epsilon = 1e-6
self.scale_initializer = initializers.get(scale_initializer)
self.bias_initializer = initializers.get(bias_initializer)
def build(self, input_shape):
self.scale = self.add_weight(shape=(input_shape[-1],),
initializer=self.scale_initializer,
trainable=True,
name='{}_scale'.format(self.name))
self.bias = self.add_weight(shape=(input_shape[-1],),
initializer=self.bias_initializer,
trainable=True,
name='{}_bias'.format(self.name))
self.built = True
def call(self, x, mask=None):
mean = K.mean(x, axis=-1, keepdims=True)
std = K.std(x, axis=-1, keepdims=True)
norm = (x - mean) * (1 / (std + self.epsilon))
return norm * self.scale + self.bias
def compute_output_shape(self, input_shape):
return input_shape
class Antirectifier(layers.Layer):
'''This is the combination of a sample-wise
L2 normalization with the concatenation of the
positive part of the input with the negative part
of the input. The result is a tensor of samples that are
twice as large as the input samples.
It can be used in place of a ReLU.
# Input shape
2D tensor of shape (samples, n)
# Output shape
2D tensor of shape (samples, 2*n)
# Theoretical justification
When applying ReLU, assuming that the distribution
of the previous output is approximately centered around 0.,
you are discarding half of your input. This is inefficient.
Antirectifier allows to return all-positive outputs like ReLU,
without discarding any data.
Tests on MNIST show that Antirectifier allows to train networks
with twice less parameters yet with comparable
classification accuracy as an equivalent ReLU-based network.
'''
def compute_output_shape(self, input_shape):
shape = list(input_shape)
assert len(shape) == 2 # only valid for 2D tensors
shape[-1] *= 2
return tuple(shape)
def call(self, inputs):
inputs -= K.mean(inputs, axis=1, keepdims=True)
inputs = K.l2_normalize(inputs, axis=1)
pos = K.relu(inputs)
neg = K.relu(-inputs)
return K.concatenate([pos, neg], axis=1)
# global parameters
batch_size = 128
num_classes = 10
epochs = 40
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
# build the model
model = Sequential()
model.add(layers.Dense(256, input_shape=(784,)))
model.add(LayerNorm())
model.add(SincConv1D(10, 10, 2000))
model.add(Antirectifier())
model.add(layers.Dropout(0.1))
model.add(layers.Dense(256))
model.add(Antirectifier())
model.add(layers.Dropout(0.1))
model.add(layers.Dense(num_classes))
model.add(layers.Activation('softmax'))
# compile the model
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
# train the model
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
# next, compare with an equivalent network
# with2x bigger Dense layers and ReLU
# next, compare with an equivalent network
# with2x bigger Dense layers and ReLU
The error obtained:
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\IPython\core\interactiveshell.py", line 3331, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-5-9ba6ad267bc1>", line 1, in <module>
runfile('C:/Users/Sebastiaan/Documents/Aerospace engineering/thesis/deep_learning_stuff/pycharm_code/own_layers_error.py', wdir='C:/Users/Sebastiaan/Documents/Aerospace engineering/thesis/deep_learning_stuff/pycharm_code')
File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.3.2\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Program Files\JetBrains\PyCharm Community Edition 2018.3.2\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/Sebastiaan/Documents/Aerospace engineering/thesis/deep_learning_stuff/pycharm_code/own_layers_error.py", line 191, in <module>
model.add(SincConv1D(10, 10, 2000))
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\keras\engine\sequential.py", line 182, in add
output_tensor = layer(self.outputs[0])
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\keras\backend\tensorflow_backend.py", line 75, in symbolic_fn_wrapper
return func(*args, **kwargs)
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\keras\engine\base_layer.py", line 489, in __call__
output = self.call(inputs, **kwargs)
File "C:/Users/Sebastiaan/Documents/Aerospace engineering/thesis/deep_learning_stuff/pycharm_code/own_layers_error.py", line 54, in call
window = K.variable(window)
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\keras\backend\tensorflow_backend.py", line 620, in variable
value, dtype=dtype, name=name, constraint=constraint)
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\tensorflow_core\python\keras\backend.py", line 814, in variable
constraint=constraint)
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\tensorflow_core\python\ops\variables.py", line 260, in __call__
return cls._variable_v2_call(*args, **kwargs)
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\tensorflow_core\python\ops\variables.py", line 254, in _variable_v2_call
shape=shape)
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\tensorflow_core\python\ops\variables.py", line 235, in <lambda>
previous_getter = lambda **kws: default_variable_creator_v2(None, **kws)
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\tensorflow_core\python\ops\variable_scope.py", line 2645, in default_variable_creator_v2
shape=shape)
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\tensorflow_core\python\ops\variables.py", line 262, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\tensorflow_core\python\ops\resource_variable_ops.py", line 1411, in __init__
distribute_strategy=distribute_strategy)
File "C:\Users\Sebastiaan\anaconda3\envs\environment_01\lib\site-packages\tensorflow_core\python\ops\resource_variable_ops.py", line 1494, in _init_from_args
raise ValueError("Tensor-typed variable initializers must either be "
ValueError: Tensor-typed variable initializers must either be wrapped in an init_scope or callable (e.g., `tf.Variable(lambda : tf.truncated_normal([10, 40]))`) when building functions. Please file a feature request if this restriction inconveniences you. ```
Executing the following change solved the problem for me:
From:
window = K.variable(window)
To:
window = K.variable(lambda: window)