Related
I have defined a image, img_shape , its shape is (28,28,1) before this model,
def make_discriminator(img_shape):
return keras.Sequential([
keras.layers.Dropout(0.3),
keras.layers.Conv2D(32, 5, strides = 2,
padding='same',
input_shape = img_shape,
use_bias = False),
keras.layers.BatchNormalization(),
keras.layers.LeakyReLU(),
keras.layers.Conv2D(64, 5, strides = 2,
padding = 'same',
use_bias = False),
keras.layers.BatchNormalization(),
keras.layers.LeakyReLU(),
keras.layers.Flatten(),
keras.layers.Dense(1)
], "Discriminator")
Then I tried to directly use it as input and print the structure of this model,
D = make_discriminator(img_shape = img_shape)
print(D.summary())
However, it shows
This model has not yet been built. Build the model first by calling
build() or by calling the model on a batch of data.
But when I tried to add build() before summary,
D = make_discriminator(img_shape = img_shape)
it shows
build() got an unexpected keyword argument 'img_shape'
I dont know how to solve this problem...and the process of creating image is below,
import keras
import tensorflow as tf
import tensorflow_datasets as tfds
fmist = tfds.load('fashion_mnist')
def process(data):
img = tf.cast(data['image'], tf.float32)
lab = data['label']
img = (img / 255.0 - 0.5) * 2.0
return img
BATCH_SIZE = 256
train = fmist['train'].shuffle(10000).batch(BATCH_SIZE).\
map(process).prefetch(tf.data.experimental.AUTOTUNE)
img_shape = tf.data.experimental.get_structure(train).shape[1:]
print("image shape:", img_shape)
Try discriminator.build(input_shape=(1, 28, 28, 1)):
def make_discriminator(img_shape):
return tf.keras.Sequential([
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Conv2D(32, 5, strides = 2,
padding='same',
input_shape = img_shape,
use_bias = False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Conv2D(64, 5, strides = 2,
padding = 'same',
use_bias = False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1)
], "Discriminator")
discriminator = make_discriminator((28, 28, 1))
discriminator.build(input_shape=(1, 28, 28, 1))
print(discriminator.summary())
Or set the input_shape in the first layer of your model. Then, the remaining output shapes will be inferred and you do not have to call model.build():
def make_discriminator(img_shape):
return tf.keras.Sequential([
tf.keras.layers.Dropout(0.3, input_shape = img_shape),
tf.keras.layers.Conv2D(32, 5, strides = 2,
padding='same',
use_bias = False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Conv2D(64, 5, strides = 2,
padding = 'same',
use_bias = False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1)
], "Discriminator")
discriminator = make_discriminator((28, 28, 1))
print(discriminator.summary())
I want to train a VAE that had a huge dataset and decided to use a VAE code made for fashion MNIST and popular modifications for batch-loading using filenames that I found on github. My research collab notebook is here and a sample section of dataset.
But the way the VAE class is written it does not have a call function which should be there according to keras documentation. I am getting the error NotImplementedError: When subclassing the Model class, you should implement a call method.
class VAE(tf.keras.Model):
"""a basic vae class for tensorflow
Extends:
tf.keras.Model
"""
def __init__(self, **kwargs):
super(VAE, self).__init__()
self.__dict__.update(kwargs)
self.enc = tf.keras.Sequential(self.enc)
self.dec = tf.keras.Sequential(self.dec)
def encode(self, x):
mu, sigma = tf.split(self.enc(x), num_or_size_splits=2, axis=1)
return ds.MultivariateNormalDiag(loc=mu, scale_diag=sigma)
def reparameterize(self, mean, logvar):
eps = tf.random.normal(shape=mean.shape)
return eps * tf.exp(logvar * 0.5) + mean
def reconstruct(self, x):
mu, _ = tf.split(self.enc(x), num_or_size_splits=2, axis=1)
return self.decode(mu)
def decode(self, z):
return self.dec(z)
def compute_loss(self, x):
q_z = self.encode(x)
z = q_z.sample()
x_recon = self.decode(z)
p_z = ds.MultivariateNormalDiag(
loc=[0.] * z.shape[-1], scale_diag=[1.] * z.shape[-1]
)
kl_div = ds.kl_divergence(q_z, p_z)
latent_loss = tf.reduce_mean(tf.maximum(kl_div, 0))
recon_loss = tf.reduce_mean(tf.reduce_sum(tf.math.square(x - x_recon), axis=0))
return recon_loss, latent_loss
def compute_gradients(self, x):
with tf.GradientTape() as tape:
loss = self.compute_loss(x)
return tape.gradient(loss, self.trainable_variables)
#tf.function
def train(self, train_x):
gradients = self.compute_gradients(train_x)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
and the encoder and decoder are defined separately and compiled as
N_Z = 8
filt_base = 32
DIMS = (128,128,3)
encoder = [
tf.keras.layers.InputLayer(input_shape=DIMS),
tf.keras.layers.Conv2D(
filters=filt_base, kernel_size=3, strides=(1, 1), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base*2, kernel_size=3, strides=(1, 1), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base*2, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base*3, kernel_size=3, strides=(1, 1), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base*3, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base*4, kernel_size=3, strides=(1, 1), activation="relu", padding="same"
),
tf.keras.layers.Conv2D(
filters=filt_base*4, kernel_size=3, strides=(2, 2), activation="relu", padding="same"
),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(units=N_Z*2),
]
decoder = [
tf.keras.layers.Dense(units=8 * 8 * 128, activation="relu"),
tf.keras.layers.Reshape(target_shape=(8, 8, 128)),
tf.keras.layers.Conv2DTranspose(
filters=filt_base*4, kernel_size=3, strides=(2, 2), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=filt_base*4, kernel_size=3, strides=(1, 1), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=filt_base*3, kernel_size=3, strides=(2, 2), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=filt_base*3, kernel_size=3, strides=(1, 1), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=filt_base*2, kernel_size=3, strides=(2, 2), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=filt_base*2, kernel_size=3, strides=(1, 1), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=filt_base, kernel_size=3, strides=(2, 2), padding="SAME", activation="relu"
),
tf.keras.layers.Conv2DTranspose(
filters=1, kernel_size=3, strides=(1, 1), padding="SAME", activation="sigmoid"
),
]
optimizer = tf.keras.optimizers.Adam(1e-3)
model = VAE(
enc = encoder,
dec = decoder,
optimizer = optimizer,
)
model.compile(optimizer=optimizer)
and trying to train the model using fit_generator function
num_epochs = 50
model.fit_generator(generator=my_training_batch_generator,
steps_per_epoch=(num_training_samples // batch_size),
epochs=num_epochs,
verbose=1,
validation_data=my_validation_batch_generator,
validation_steps=(num_validation_samples // batch_size),
use_multiprocessing=True,
workers=16,
max_queue_size=32)
I am new to machine learning and any help to resolve the issue would be appreciated. I think the issue is with the def train line in class VAE.
An optional request is if the training can be done so that I can see the reconstruction after each epoch would be appreciated. I already have a plot_reconstruction function in the research collab notebook for this purpose that needs to be called.
APaul31,
Specifically in your code I suggest adding call() function to the VAE class:
def call(self, x):
q_z = self.encode(x)
z = q_z.sample()
x_recon = self.decode(z)
I also suggest to use more standard approach to your task, especially as a beginner:
use tf.keras.preprocessing.image_dataset_from_directory() for image loading. Tutorial here.
use custom Model.train_step() to calculate VAE losses instead of multiple functions in your VAE class. Example here.
Currently I am having the error of TypeError: 'NoneType' object is not callable
The problem is with fit method, when you are passing a data generator use fit_generator instead of fit. In the collab it's calling fit
Also, note that you can use flow_from_directory method instead of image_dataset_from_directory to lazily generate batches, it won't load the whole data into memory
https://keras.io/api/preprocessing/image/#flowfromdirectory-method.
I want to use Subclassing/inheritance of the keras Model class. When I want to compile my model it isn't.
I started with keras recently but used a lot of pytorch before.
I currently run tensorflow and keras on version 1.10 and 2.16 respectively and really dont know why I cant compile the model. I tried updating tf to version 1.13 but nothing changed.
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from keras.layers import Input,Conv2D,MaxPooling2D,UpSampling2D,BatchNormalization
from keras import Model, layers
print(tf.VERSION)
print(tf.keras.__version__)
batch_size = 128
epochs = 50
inChannel = 1
img_width, img_height = 64, 64
input_shape = (img_width, img_height, 1)
class AE_64x64(Model):
def __init__(self):
super(AE_64x64, self).__init__()
'''
data_format: channels last
'''
self.conv1 = Conv2D(filters=30, kernel_size=(7,7), activation='relu', padding='same', strides=2)(Input(shape=input_shape))
self.conv2 = Conv2D(filters=40, kernel_size=(5,5), activation='relu', padding='same', strides=2)
self.batchnorm = BatchNormalization(axis=2)
self.max_pool = MaxPooling2D((3,3),padding='same')
self.conv3 = Conv2D(filters=50, kernel_size=(3,3), activation='relu', padding='same', strides=2)
self.conv4 = Conv2D(filters=60, kernel_size=(3,3), activation='relu')
self.b1 = Conv2D(filters=80, kernel_size=(3,3), activation='relu')
self.b2 = Conv2D(filters=99, kernel_size=(3,3), activation='relu')
self.conv6 = Conv2D(filters=50, kernel_size=(3,3), activation='relu')
self.conv7 = Conv2D(filters=40, kernel_size=(3,3), activation='relu')
self.conv8 = Conv2D(filters=30, kernel_size=(3,3), activation='relu')
self.conv9 = Conv2D(filters=1, kernel_size=(3,3), activation='relu')
def call(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.batchnorm(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.max_pool(x)
x = self.batchnorm(x)
x = self.b1(x)
x = self.b2(x)
x = self.batchnorm(x)
x = self.conv5(x)
x = self.conv6(x)
x = self.batchnorm(x)
x = self.conv7(x)
x = self.conv8(x)
x = self.batchnorm(x)
x = self.conv9(x)
return x
AE_Model = AE_64x64()
AE_Model.compile(loss='mean_squared_error',optimizer=tf.train.AdamOptimizer(),metrics= ['mean_squared_error'])
AE_Model.summary()
I expected a summary output but instead I received this error message:
RuntimeError: You must compile your model before using it.
Is there a logical mistake in the code or a Hardware/Version problem?
you at least have to build your model. Or you fit your model with data.
anyway, when I run your code without data I get this result
AE_Model.compile(loss='mean_squared_error',optimizer=tf.train.AdamOptimizer(),metrics= ['mean_squared_error'])
AE_Model.build(input_shape)
AE_Model.summary()
1.13.1
2.2.4-tf
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
I'm currently working on an image generating Conv NN and an audio generating Recurrent NN. I built for both the generators but for some reason the build_audio_generator model has in its last layer a Tensor (Tensor("model_4/sequential_4/activation_4/Tanh:0") with shape (?, 1) instead of (?, 28, 28, 1) as needed. My question, how do I have to change the code of build_audio_generator so that it has the same shape(?, 28, 28, 1) as build_generator?
Code:
def build_generator(latent_dim, channels, num_classes):
model = Sequential()
model.add(Dense(128 * 7 * 7, activation="relu", input_dim=latent_dim))
model.add(Reshape((7, 7, 128)))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(128, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(64, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(channels, kernel_size=3, padding='same'))
model.add(Activation("tanh"))
model.summary()
noise = Input(shape=(latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(num_classes, 100)(label))
model_input = multiply([noise, label_embedding])
img = model(model_input)
return Model([noise, label], img)
def build_audio_generator(latent_dim, num_classes):
model = Sequential()
model.add(LSTM(512, input_dim=latent_dim, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512))
model.add(Dense(256))
model.add(Dropout(0.3))
model.add(Dense(num_classes))
model.add(Activation('tanh'))
model.summary()
noise = Input(shape=(None, latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(num_classes, 100)(label))
model_input = multiply([noise, label_embedding])
sound = model(model_input)
return Model([noise, label], sound)
# Build the generator
generator = build_generator(100, 3, 1)
audio_generator = build_audio_generator(100, 1)
# The generator takes noise and the target label as input
# and generates the corresponding digit of that label
noise = Input(shape=(None, 100,))
label = Input(shape=(1,))
img = generator([noise, label])
audio = audio_generator([noise, label])
print('Audio: '+ str(audio))
print('Audio shape: ' + str(audio.shape))
print('IMG: '+str(img))
print('IMG shape: ' + str(img.shape))
Console output:
Audio: Tensor("model_4/sequential_4/activation_4/Tanh:0", shape=(?, 1), dtype=float32)
Audio shape: (?, 1)
IMG: Tensor("model_3/sequential_3/activation_3/Tanh:0", shape=(?, 28, 28, 1), dtype=float32)
IMG shape: (?, 28, 28, 1)
I think you would want 3D for audio instead, no?
Just keep return_sequences=True in all LSTMs.
I'm just getting started with Keras and with Deep learning, so the answer to my question could be obvious to some, but for me it isn't.
I made a model to colorize some black and white photos following the article on Floydhub (where I'm training it) and it works just fine when I train it with similar pictures (such as human faces) but as soon as I use a larger dataset as an input with different pictures, the loss just remains stable and doesn't get better.
I've tried different learning rates and optimizers but just cannot get a good result.
What could I change to get a better result?
This is the code (thanks to Emil Wallner for the article on Floydhub)
# Get images
X = []
for filename in os.listdir('/data/images/Train/'):
X.append(img_to_array(load_img('/data/images/Train/'+filename)))
X = np.array(X, dtype=float)
Xtrain = 1.0/255*X
#Load weights
inception = InceptionResNetV2(weights=None, include_top=True)
inception.load_weights('/data/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5')
inception.graph = tf.get_default_graph()
embed_input = Input(shape=(1000,))
#Encoder
encoder_input = Input(shape=(256, 256, 1,))
encoder_output = Conv2D(64, (3,3), activation='relu', padding='same', strides=2)(encoder_input)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
#Fusion
fusion_output = RepeatVector(32 * 32)(embed_input)
fusion_output = Reshape(([32, 32, 1000]))(fusion_output)
fusion_output = concatenate([encoder_output, fusion_output], axis=3)
fusion_output = Conv2D(256, (1, 1), activation='relu', padding='same')(fusion_output)
#Decoder
decoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(fusion_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(64, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(32, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(16, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(2, (3, 3), activation='tanh', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
model = Model(inputs=[encoder_input, embed_input], outputs=decoder_output)
#Create embedding
def create_inception_embedding(grayscaled_rgb):
grayscaled_rgb_resized = []
for i in grayscaled_rgb:
i = resize(i, (299, 299, 3), mode='constant')
grayscaled_rgb_resized.append(i)
grayscaled_rgb_resized = np.array(grayscaled_rgb_resized)
grayscaled_rgb_resized = preprocess_input(grayscaled_rgb_resized)
with inception.graph.as_default():
embed = inception.predict(grayscaled_rgb_resized)
return embed
# Image transformer
datagen = ImageDataGenerator(
shear_range=0.4,
zoom_range=0.4,
rotation_range=40,
horizontal_flip=True)
#Generate training data
batch_size = 20
def image_a_b_gen(batch_size):
for batch in datagen.flow(Xtrain, batch_size=batch_size):
grayscaled_rgb = gray2rgb(rgb2gray(batch))
embed = create_inception_embedding(grayscaled_rgb)
lab_batch = rgb2lab(batch)
X_batch = lab_batch[:,:,:,0]
X_batch = X_batch.reshape(X_batch.shape+(1,))
Y_batch = lab_batch[:,:,:,1:] / 128
yield ([X_batch, create_inception_embedding(grayscaled_rgb)], Y_batch)
#Train model
tensorboard = TensorBoard(log_dir="/output")
model.compile(optimizer='adam', loss='mse')
model.fit_generator(image_a_b_gen(batch_size), callbacks=[tensorboard], epochs=1000, steps_per_epoch=20)
#Make a prediction on the unseen images
color_me = []
for filename in os.listdir('../Test/'):
color_me.append(img_to_array(load_img('../Test/'+filename)))
color_me = np.array(color_me, dtype=float)
color_me = 1.0/255*color_me
color_me = gray2rgb(rgb2gray(color_me))
color_me_embed = create_inception_embedding(color_me)
color_me = rgb2lab(color_me)[:,:,:,0]
color_me = color_me.reshape(color_me.shape+(1,))
# Test model
output = model.predict([color_me, color_me_embed])
output = output * 128
# Output colorizations
for i in range(len(output)):
cur = np.zeros((256, 256, 3))
cur[:,:,0] = color_me[i][:,:,0]
cur[:,:,1:] = output[i]
imsave("result/img_"+str(i)+".png", lab2rgb(cur))