I want to train a VAE that had a huge dataset and decided to use a VAE code made for fashion MNIST and popular modifications for batch-loading using filenames that I found on github. My research collab notebook is here and a sample section of dataset.
But the way the VAE class is written it does not have a call function which should be there according to keras documentation. I am getting the error NotImplementedError: When subclassing the Model class, you should implement a call method.
class VAE(tf.keras.Model):
"""a basic vae class for tensorflow
Extends:
tf.keras.Model
"""
def __init__(self, **kwargs):
super(VAE, self).__init__()
self.__dict__.update(kwargs)
self.enc = tf.keras.Sequential(self.enc)
self.dec = tf.keras.Sequential(self.dec)
def encode(self, x):
mu, sigma = tf.split(self.enc(x), num_or_size_splits=2, axis=1)
return ds.MultivariateNormalDiag(loc=mu, scale_diag=sigma)
def reparameterize(self, mean, logvar):
eps = tf.random.normal(shape=mean.shape)
return eps * tf.exp(logvar * 0.5) + mean
def reconstruct(self, x):
mu, _ = tf.split(self.enc(x), num_or_size_splits=2, axis=1)
return self.decode(mu)
def decode(self, z):
return self.dec(z)
def compute_loss(self, x):
q_z = self.encode(x)
z = q_z.sample()
x_recon = self.decode(z)
p_z = ds.MultivariateNormalDiag(
loc=[0.] * z.shape[-1], scale_diag=[1.] * z.shape[-1]
)
kl_div = ds.kl_divergence(q_z, p_z)
latent_loss = tf.reduce_mean(tf.maximum(kl_div, 0))
recon_loss = tf.reduce_mean(tf.reduce_sum(tf.math.square(x - x_recon), axis=0))
return recon_loss, latent_loss
def compute_gradients(self, x):
with tf.GradientTape() as tape:
loss = self.compute_loss(x)
return tape.gradient(loss, self.trainable_variables)
#tf.function
def train(self, train_x):
gradients = self.compute_gradients(train_x)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
and the encoder and decoder are defined separately and compiled as
N_Z = 8
filt_base = 32
DIMS = (128,128,3)
encoder = [
tf.keras.layers.InputLayer(input_shape=DIMS),
tf.keras.layers.Conv2D(
filters=filt_base, kernel_size=3, strides=(1, 1), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base*2, kernel_size=3, strides=(1, 1), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base*2, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base*3, kernel_size=3, strides=(1, 1), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base*3, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base*4, kernel_size=3, strides=(1, 1), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base*4, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(units=N_Z*2),
]
decoder = [
tf.keras.layers.Dense(units=8 * 8 * 128, activation="relu"),
tf.keras.layers.Reshape(target_shape=(8, 8, 128)),
tf.keras.layers.Conv2DTranspose(
filters=filt_base*4, kernel_size=3, strides=(2, 2), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=filt_base*4, kernel_size=3, strides=(1, 1), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=filt_base*3, kernel_size=3, strides=(2, 2), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=filt_base*3, kernel_size=3, strides=(1, 1), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=filt_base*2, kernel_size=3, strides=(2, 2), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=filt_base*2, kernel_size=3, strides=(1, 1), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=filt_base, kernel_size=3, strides=(2, 2), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=1, kernel_size=3, strides=(1, 1), padding="SAME", activation="sigmoid"
),
]
optimizer = tf.keras.optimizers.Adam(1e-3)
model = VAE(
enc = encoder,
dec = decoder,
optimizer = optimizer,
)
model.compile(optimizer=optimizer)
and trying to train the model using fit_generator function
num_epochs = 50
model.fit_generator(generator=my_training_batch_generator,
steps_per_epoch=(num_training_samples // batch_size),
epochs=num_epochs,
verbose=1,
validation_data=my_validation_batch_generator,
validation_steps=(num_validation_samples // batch_size),
use_multiprocessing=True,
workers=16,
max_queue_size=32)
I am new to machine learning and any help to resolve the issue would be appreciated. I think the issue is with the def train line in class VAE.
An optional request is if the training can be done so that I can see the reconstruction after each epoch would be appreciated. I already have a plot_reconstruction function in the research collab notebook for this purpose that needs to be called.
APaul31,
Specifically in your code I suggest adding call() function to the VAE class:
def call(self, x):
q_z = self.encode(x)
z = q_z.sample()
x_recon = self.decode(z)
I also suggest to use more standard approach to your task, especially as a beginner:
use tf.keras.preprocessing.image_dataset_from_directory() for image loading. Tutorial here.
use custom Model.train_step() to calculate VAE losses instead of multiple functions in your VAE class. Example here.
Currently I am having the error of TypeError: 'NoneType' object is not callable
The problem is with fit method, when you are passing a data generator use fit_generator instead of fit. In the collab it's calling fit
Also, note that you can use flow_from_directory method instead of image_dataset_from_directory to lazily generate batches, it won't load the whole data into memory
https://keras.io/api/preprocessing/image/#flowfromdirectory-method.
Related
encoder_input = Input((1,125,16))
en_conv = Conv2D(16, (1, 64), activation='elu', padding="same",
kernel_constraint=max_norm(2., axis=(0, 1, 2)))(encoder_input)
en_conv = BatchNormalization(axis=3, epsilon=1e-05, momentum=0.1)(en_conv)
en_conv = AveragePooling2D(pool_size=(1,125//125))(en_conv)
en_conv = Conv2D(10, (1, 32), activation='elu', padding="same",
kernel_constraint=max_norm(2., axis=(0, 1, 2)))(en_conv)
en_conv = BatchNormalization(axis=3, epsilon=1e-05, momentum=0.1)(en_conv)
en_conv = AveragePooling2D((1,125//125))(en_conv)
en_conv = Flatten()(en_conv)
encoder_output = Dense(64, kernel_constraint=max_norm(0.5))(en_conv)
I would like to convert the above tensoflow model to the pytorch code form below, but I am in trouble because there are functions that are not in pytorch. How can I convert it?
class Conv2D_Norm_Constrained(nn.Conv2d):
def __init__(self, max_norm_val, norm_dim, **kwargs):
super().__init__(**kwargs)
self.max_norm_val = max_norm_val
self.norm_dim = norm_dim
def get_constrained_weights(self, epsilon=1e-8):
norm = self.weight.norm(2, dim=self.norm_dim, keepdim=True)
return self.weight * (torch.clamp(norm, 0, self.max_norm_val) / (norm + epsilon))
def forward(self, input):
return F.conv2d(input, self.get_constrained_weights(), self.bias, self.stride, self.padding, self.dilation, self.groups)
self.en_conv = nn.Sequential(
Conv2D_Norm_Constrained(in_channels=1, out_channels=16, kernel_size=(1, 64), padding="same", max_norm_val=2.0, norm_dim=(0, 1, 2)),
nn.ELU(),
nn.BatchNorm2d(16,eps=1e-05, momentum=0.1),
nn.AvgPool2d(pool_size=(1,125//125)),
nn.Flatten(),
nn.Linear(,64),
nn.ELU(),
nn.Linear(64,3)
)
I would like to convert the above tensoflow model to the pytorch code form below, but I am in trouble because there are functions that are not in pytorch. How can I convert it?
I'm trying to implement UNet for image segmentation in TensorFlow 2 using the Keras API, but I am not sure how to implement the Concatenate layer. Here is what I have tried:
def create_model_myunet(depth, start_f, output_channels, encoder_kernel_size):
# Encoder
model = tf.keras.Sequential()
for i in range(0, depth):
if i == 0:
print("Specifying an input shape")
input_shape = [config.img_h, config.img_w, 3]
else:
input_shape = [None]
model.add(tf.keras.layers.Conv2D(filters=2**(start_f+i),
kernel_size=(encoder_kernel_size, encoder_kernel_size),
strides=(1, 1),
padding='same',
input_shape=input_shape,
name = "enc_conv2d_" + str(i)))
model.add(tf.keras.layers.ReLU(name = "enc_relu_" + str(i)))
model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), name="enc_maxpool2d_" + str(i)))
# Decoder
initializer = tf.random_normal_initializer(0., 0.02)
for i in range(depth, 1, -1):
model.add(
tf.keras.layers.Conv2DTranspose(2**(start_f+i),
encoder_kernel_size,
strides=2,
padding='same',
kernel_initializer=initializer,
use_bias=False)
)
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.ReLU(name="dec_relu_"+str(i)))
model.add(tf.keras.layers.Concatenate([
model.get_layer(name="dec_relu_"+str(i)).output,
model.get_layer(name="enc_relu_"+str(i-1)).output
] ))
pass
last = tf.keras.layers.Conv2DTranspose(
output_channels, 3, strides=2,
padding='same', activation='softmax') #64x64 -> 128x128
model.add(last)
return model
It gives me the following error:
ValueError: A Concatenate layer should be called on a list of at
least 2 inputs
You need to change
model.add(tf.keras.layers.Concatenate([
model.get_layer(name="dec_relu_"+str(i)).output,
model.get_layer(name="enc_relu_"+str(i-1)).output
] ))
to
model.add(tf.keras.layers.Concatenate()([ # Sequential api
model.get_layer(name="dec_relu_"+str(i)).output,
model.get_layer(name="enc_relu_"+str(i-1)).output
] ))
or
model.add(tf.keras.layers.concatenate([ # Functional api
model.get_layer(name="dec_relu_"+str(i)).output,
model.get_layer(name="enc_relu_"+str(i-1)).output
] ))
I'm trying to build a Neural Network, based on the Inception architecture used for images, but for 1D vectors.
I have based the model I created on this one from the keras getting started guide from this link https://keras.io/getting-started/functional-api-guide/:
tf.keras.backend.clear_session()
logger = tf.get_logger()
logger.setLevel(logging.ERROR)
input_vector = Input(shape=(71276,1),)
tower_1 = tf.keras.layers.Conv1D(filters=64, kernel_size=1, padding='same', activation='relu', name='conv_1')(input_vector)
tower_1 = tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding='same', activation='relu', name='conv_2')(tower_1)
tower_2 = tf.keras.layers.Conv1D(filters=64, kernel_size=1, padding='same', activation='relu', name='conv_3')(input_vector)
tower_2 = tf.keras.layers.Conv1D(filters=64, kernel_size=1, padding='same', activation='relu', name='conv_4')(tower_2)
tower_3 = tf.keras.layers.MaxPooling1D(pool_size=3, strides=1, padding='same')(input_vector)
tower_3 = tf.keras.layers.Conv1D(filters=64, kernel_size=1, padding='same', activation='relu', name='conv_4')(tower_3)
output = tf.keras.layers.concatenate([tower_1, tower_2, tower_3])
model = tf.keras.models.Model(inputs=input_vector, outputs=output)
model.compile(loss='mse',
optimizer=tf.keras.optimizers.Adam(lr=0.001),
metrics=['mae'])
model.summary()
This is my code:
from keras.layers import Conv1D, MaxPooling1D, Input
from keras.models import Model
tf.keras.backend.clear_session()
logger = tf.get_logger()
logger.setLevel(logging.ERROR)
input_vector = Input(shape=(71276,1),)
tower_1 = Conv1D(filters=64, kernel_size=1, padding='same', activation='relu', name='conv_1')(input_vector)
tower_1 = Conv1D(filters=64, kernel_size=3, padding='same', activation='relu', name='conv_1')(tower_1)
tower_2 = Conv1D(filters=64, kernel_size=1, padding='same', activation='relu', name='conv_1')(input_vector)
tower_2 = Conv1D(filters=64, kernel_size=1, padding='same', activation='relu', name='conv_1')(tower_2)
tower_3 = MaxPooling1D(pool_size=3, strides=1, padding='same')(input_vector)
tower_3 = Conv1D(filters=64, kernel_size=1, padding='same', activation='relu', name='conv_1')(tower_3)
output = tf.keras.layers.concatenate([tower_1, tower_2, tower_3])
model = Model(inputs=input_vector, outputs=output)
model.compile(loss='mse',
optimizer=tf.keras.optimizers.Adam(lr=0.001),
metrics=['mae'])
model.summary()
When executing, I'm getting the following error, and don't really understand why:
AttributeError Traceback (most recent call last)
<ipython-input-9-2931ae837421> in <module>()
6 input_vector = Input(shape=(71276,1),)
7
----> 8 tower_1 = tf.keras.layers.Conv1D(filters=64, kernel_size=1, padding='same', activation='relu', name='conv_1')(input_vector)
9 tower_1 = tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding='same', activation='relu', name='conv_2')(tower_1)
10
5 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/base_layer.py in <lambda>(t)
2056 `call` method of the layer at the call that created the node.
2057 """
-> 2058 inbound_layers = nest.map_structure(lambda t: t._keras_history.layer,
2059 input_tensors)
2060 node_indices = nest.map_structure(lambda t: t._keras_history.node_index,
AttributeError: 'tuple' object has no attribute 'layer'
I don't have a lot of experience with convolutional layers so it is very possible I have made a very obvious mistake. Searching online I haven't been able to find someone else having the same problem.
I'm running this on Google Colaboratory, in a python 3 runtime.
Any help would be appreciated, thank you!
A few things:
All your layers have the same name? I bet that could cause lots of strange bugs
tower_3 doesn't have the same shape as the other two towers. It's impossible to concatenate. (You're using a MaxPooling1D, check the summary to confirm.)
You are mixing keras and tf.keras, that is certainly a huge problem. Choose only one.
I'm currently working on an image generating Conv NN and an audio generating Recurrent NN. I built for both the generators but for some reason the build_audio_generator model has in its last layer a Tensor (Tensor("model_4/sequential_4/activation_4/Tanh:0") with shape (?, 1) instead of (?, 28, 28, 1) as needed. My question, how do I have to change the code of build_audio_generator so that it has the same shape(?, 28, 28, 1) as build_generator?
Code:
def build_generator(latent_dim, channels, num_classes):
model = Sequential()
model.add(Dense(128 * 7 * 7, activation="relu", input_dim=latent_dim))
model.add(Reshape((7, 7, 128)))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(128, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(64, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(channels, kernel_size=3, padding='same'))
model.add(Activation("tanh"))
model.summary()
noise = Input(shape=(latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(num_classes, 100)(label))
model_input = multiply([noise, label_embedding])
img = model(model_input)
return Model([noise, label], img)
def build_audio_generator(latent_dim, num_classes):
model = Sequential()
model.add(LSTM(512, input_dim=latent_dim, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512))
model.add(Dense(256))
model.add(Dropout(0.3))
model.add(Dense(num_classes))
model.add(Activation('tanh'))
model.summary()
noise = Input(shape=(None, latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(num_classes, 100)(label))
model_input = multiply([noise, label_embedding])
sound = model(model_input)
return Model([noise, label], sound)
# Build the generator
generator = build_generator(100, 3, 1)
audio_generator = build_audio_generator(100, 1)
# The generator takes noise and the target label as input
# and generates the corresponding digit of that label
noise = Input(shape=(None, 100,))
label = Input(shape=(1,))
img = generator([noise, label])
audio = audio_generator([noise, label])
print('Audio: '+ str(audio))
print('Audio shape: ' + str(audio.shape))
print('IMG: '+str(img))
print('IMG shape: ' + str(img.shape))
Console output:
Audio: Tensor("model_4/sequential_4/activation_4/Tanh:0", shape=(?, 1), dtype=float32)
Audio shape: (?, 1)
IMG: Tensor("model_3/sequential_3/activation_3/Tanh:0", shape=(?, 28, 28, 1), dtype=float32)
IMG shape: (?, 28, 28, 1)
I think you would want 3D for audio instead, no?
Just keep return_sequences=True in all LSTMs.
I'm just getting started with Keras and with Deep learning, so the answer to my question could be obvious to some, but for me it isn't.
I made a model to colorize some black and white photos following the article on Floydhub (where I'm training it) and it works just fine when I train it with similar pictures (such as human faces) but as soon as I use a larger dataset as an input with different pictures, the loss just remains stable and doesn't get better.
I've tried different learning rates and optimizers but just cannot get a good result.
What could I change to get a better result?
This is the code (thanks to Emil Wallner for the article on Floydhub)
# Get images
X = []
for filename in os.listdir('/data/images/Train/'):
X.append(img_to_array(load_img('/data/images/Train/'+filename)))
X = np.array(X, dtype=float)
Xtrain = 1.0/255*X
#Load weights
inception = InceptionResNetV2(weights=None, include_top=True)
inception.load_weights('/data/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5')
inception.graph = tf.get_default_graph()
embed_input = Input(shape=(1000,))
#Encoder
encoder_input = Input(shape=(256, 256, 1,))
encoder_output = Conv2D(64, (3,3), activation='relu', padding='same', strides=2)(encoder_input)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
#Fusion
fusion_output = RepeatVector(32 * 32)(embed_input)
fusion_output = Reshape(([32, 32, 1000]))(fusion_output)
fusion_output = concatenate([encoder_output, fusion_output], axis=3)
fusion_output = Conv2D(256, (1, 1), activation='relu', padding='same')(fusion_output)
#Decoder
decoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(fusion_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(64, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(32, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(16, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(2, (3, 3), activation='tanh', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
model = Model(inputs=[encoder_input, embed_input], outputs=decoder_output)
#Create embedding
def create_inception_embedding(grayscaled_rgb):
grayscaled_rgb_resized = []
for i in grayscaled_rgb:
i = resize(i, (299, 299, 3), mode='constant')
grayscaled_rgb_resized.append(i)
grayscaled_rgb_resized = np.array(grayscaled_rgb_resized)
grayscaled_rgb_resized = preprocess_input(grayscaled_rgb_resized)
with inception.graph.as_default():
embed = inception.predict(grayscaled_rgb_resized)
return embed
# Image transformer
datagen = ImageDataGenerator(
shear_range=0.4,
zoom_range=0.4,
rotation_range=40,
horizontal_flip=True)
#Generate training data
batch_size = 20
def image_a_b_gen(batch_size):
for batch in datagen.flow(Xtrain, batch_size=batch_size):
grayscaled_rgb = gray2rgb(rgb2gray(batch))
embed = create_inception_embedding(grayscaled_rgb)
lab_batch = rgb2lab(batch)
X_batch = lab_batch[:,:,:,0]
X_batch = X_batch.reshape(X_batch.shape+(1,))
Y_batch = lab_batch[:,:,:,1:] / 128
yield ([X_batch, create_inception_embedding(grayscaled_rgb)], Y_batch)
#Train model
tensorboard = TensorBoard(log_dir="/output")
model.compile(optimizer='adam', loss='mse')
model.fit_generator(image_a_b_gen(batch_size), callbacks=[tensorboard], epochs=1000, steps_per_epoch=20)
#Make a prediction on the unseen images
color_me = []
for filename in os.listdir('../Test/'):
color_me.append(img_to_array(load_img('../Test/'+filename)))
color_me = np.array(color_me, dtype=float)
color_me = 1.0/255*color_me
color_me = gray2rgb(rgb2gray(color_me))
color_me_embed = create_inception_embedding(color_me)
color_me = rgb2lab(color_me)[:,:,:,0]
color_me = color_me.reshape(color_me.shape+(1,))
# Test model
output = model.predict([color_me, color_me_embed])
output = output * 128
# Output colorizations
for i in range(len(output)):
cur = np.zeros((256, 256, 3))
cur[:,:,0] = color_me[i][:,:,0]
cur[:,:,1:] = output[i]
imsave("result/img_"+str(i)+".png", lab2rgb(cur))