How does Keras(Tensorflow) calc. shape of last layer Tensor? - python

I'm currently working on an image generating Conv NN and an audio generating Recurrent NN. I built for both the generators but for some reason the build_audio_generator model has in its last layer a Tensor (Tensor("model_4/sequential_4/activation_4/Tanh:0") with shape (?, 1) instead of (?, 28, 28, 1) as needed. My question, how do I have to change the code of build_audio_generator so that it has the same shape(?, 28, 28, 1) as build_generator?
Code:
def build_generator(latent_dim, channels, num_classes):
model = Sequential()
model.add(Dense(128 * 7 * 7, activation="relu", input_dim=latent_dim))
model.add(Reshape((7, 7, 128)))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(128, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(UpSampling2D())
model.add(Conv2D(64, kernel_size=3, padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(momentum=0.8))
model.add(Conv2D(channels, kernel_size=3, padding='same'))
model.add(Activation("tanh"))
model.summary()
noise = Input(shape=(latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(num_classes, 100)(label))
model_input = multiply([noise, label_embedding])
img = model(model_input)
return Model([noise, label], img)
def build_audio_generator(latent_dim, num_classes):
model = Sequential()
model.add(LSTM(512, input_dim=latent_dim, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.3))
model.add(LSTM(512))
model.add(Dense(256))
model.add(Dropout(0.3))
model.add(Dense(num_classes))
model.add(Activation('tanh'))
model.summary()
noise = Input(shape=(None, latent_dim,))
label = Input(shape=(1,), dtype='int32')
label_embedding = Flatten()(Embedding(num_classes, 100)(label))
model_input = multiply([noise, label_embedding])
sound = model(model_input)
return Model([noise, label], sound)
# Build the generator
generator = build_generator(100, 3, 1)
audio_generator = build_audio_generator(100, 1)
# The generator takes noise and the target label as input
# and generates the corresponding digit of that label
noise = Input(shape=(None, 100,))
label = Input(shape=(1,))
img = generator([noise, label])
audio = audio_generator([noise, label])
print('Audio: '+ str(audio))
print('Audio shape: ' + str(audio.shape))
print('IMG: '+str(img))
print('IMG shape: ' + str(img.shape))
Console output:
Audio: Tensor("model_4/sequential_4/activation_4/Tanh:0", shape=(?, 1), dtype=float32)
Audio shape: (?, 1)
IMG: Tensor("model_3/sequential_3/activation_3/Tanh:0", shape=(?, 28, 28, 1), dtype=float32)
IMG shape: (?, 28, 28, 1)

I think you would want 3D for audio instead, no?
Just keep return_sequences=True in all LSTMs.

Related

Issues when loading a sequential model in Python

I had trained and saved the following sequential architecture
model = Sequential([layers.Resizing(IMG_SIZE, IMG_SIZE), # Resize of the image
layers.Rescaling(1./255), # Image rescaling of a factor 1./255
layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu',
padding = 'same', input_shape=(IMG_SIZE,IMG_SIZE,3)),
layers.Dropout(0.2),
layers.BatchNormalization(),
layers.MaxPool2D(pool_size=(2, 2)),
layers.Conv2D(filters=64, kernel_size=(3, 3),
activation='relu', padding = 'same'),
layers.BatchNormalization(),
layers.MaxPool2D(pool_size=(2, 2)),
layers.Conv2D(filters=128, kernel_size=(3, 3),
activation='relu', padding = 'same'),
layers.BatchNormalization(),
layers.MaxPool2D(pool_size=(2, 2)),
layers.Conv2D(filters=256, kernel_size=(3, 3),
activation='relu', padding = 'same'),
layers.BatchNormalization(),
layers.MaxPool2D(pool_size=(2, 2)),
layers.Flatten(),
layers.Dropout(0.2),
layers.Dense(300, activation='relu'),
layers.Dense(150, activation='relu'),
layers.Dense(n_classes, activation='softmax')])
EPOCHS = 50
BATCH_SIZE = 8
history = model.fit(x=train_batches, validation_data=validation_batches,
steps_per_epoch=len(train_batches),
validation_steps=len(validation_batches),
epochs=EPOCHS,
batch_size=BATCH_SIZE)
model.save('my_model.h5')
when i load the saved model
model=tf.keras.models.load_model('my_model.h5')
this error occurre
Traceback (most recent call last):
File "C:\Users\Antonio Di Stolfo\Desktop\nuova codici 3\pythonProject2\main.py", line 88, in <module>
model=tf.keras.models.load_model('my_model.h5')
File "C:\Users\Antonio Di Stolfo\Desktop\nuova codici 3\cnnalzheimer\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\Antonio Di Stolfo\Desktop\nuova codici 3\cnnalzheimer\lib\site-packages\keras\layers\convolutional\base_conv.py", line 409, in _get_input_channel
raise ValueError( ValueError: The channel dimension of the inputs should be defined. The input_shape received is (None, 224, 224, None), where axis -1 (0-based) is the channel dimension, which found to be `None`.
Does anyone of you know how to solve this error or how to define the channel dimension of the inputs?
I tried to load a saved model but occurred an error

Why did this model still need calling 'build()' firstly when I have defined parameters?

I have defined a image, img_shape , its shape is (28,28,1) before this model,
def make_discriminator(img_shape):
return keras.Sequential([
keras.layers.Dropout(0.3),
keras.layers.Conv2D(32, 5, strides = 2,
padding='same',
input_shape = img_shape,
use_bias = False),
keras.layers.BatchNormalization(),
keras.layers.LeakyReLU(),
keras.layers.Conv2D(64, 5, strides = 2,
padding = 'same',
use_bias = False),
keras.layers.BatchNormalization(),
keras.layers.LeakyReLU(),
keras.layers.Flatten(),
keras.layers.Dense(1)
], "Discriminator")
Then I tried to directly use it as input and print the structure of this model,
D = make_discriminator(img_shape = img_shape)
print(D.summary())
However, it shows
This model has not yet been built. Build the model first by calling
build() or by calling the model on a batch of data.
But when I tried to add build() before summary,
D = make_discriminator(img_shape = img_shape)
it shows
build() got an unexpected keyword argument 'img_shape'
I dont know how to solve this problem...and the process of creating image is below,
import keras
import tensorflow as tf
import tensorflow_datasets as tfds
fmist = tfds.load('fashion_mnist')
def process(data):
img = tf.cast(data['image'], tf.float32)
lab = data['label']
img = (img / 255.0 - 0.5) * 2.0
return img
BATCH_SIZE = 256
train = fmist['train'].shuffle(10000).batch(BATCH_SIZE).\
map(process).prefetch(tf.data.experimental.AUTOTUNE)
img_shape = tf.data.experimental.get_structure(train).shape[1:]
print("image shape:", img_shape)
Try discriminator.build(input_shape=(1, 28, 28, 1)):
def make_discriminator(img_shape):
return tf.keras.Sequential([
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Conv2D(32, 5, strides = 2,
padding='same',
input_shape = img_shape,
use_bias = False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Conv2D(64, 5, strides = 2,
padding = 'same',
use_bias = False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1)
], "Discriminator")
discriminator = make_discriminator((28, 28, 1))
discriminator.build(input_shape=(1, 28, 28, 1))
print(discriminator.summary())
Or set the input_shape in the first layer of your model. Then, the remaining output shapes will be inferred and you do not have to call model.build():
def make_discriminator(img_shape):
return tf.keras.Sequential([
tf.keras.layers.Dropout(0.3, input_shape = img_shape),
tf.keras.layers.Conv2D(32, 5, strides = 2,
padding='same',
use_bias = False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Conv2D(64, 5, strides = 2,
padding = 'same',
use_bias = False),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.LeakyReLU(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1)
], "Discriminator")
discriminator = make_discriminator((28, 28, 1))
print(discriminator.summary())

sklearn Logistic Regression “ValueError: Found array with dim 3. Estimator expected <= 2.”

I design a CNN autoencoder,and I compress the image into a four dimensions vector(name:flatten),
then I when to visualize the result by PCA method.
Below is my model:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', validation_size=0, one_hot=True)
logs_path = "./log2/noiseRemoval"
inputs_ = tf.placeholder(tf.float32, (None, 28, 28, 1), name='inputs')
targets_ = tf.placeholder(tf.float32, (None, 28, 28, 1), name='targets')
### Encoder
conv1 = tf.layers.conv2d(inputs_, 16, (3,3), padding='VALID', activation=tf.nn.relu, name='conv1')
# Now 26x26x16
maxpool1 = tf.layers.max_pooling2d(conv1, (2,2), (2,2), padding='VALID')
# Now 13x13x16
conv2 = tf.layers.conv2d(maxpool1, 8, (3,3), padding='VALID', activation=tf.nn.relu, name='conv2')
# Now 11x11x8
maxpool2 = tf.layers.max_pooling2d(conv2, (2,2), (2,2), padding='VALID')
# Now 5x5x8
conv3 = tf.layers.conv2d(maxpool2, 8, (3,3), padding='VALID', activation=tf.nn.relu, name='conv3')
# Now 3x3x8
encoded = tf.layers.max_pooling2d(conv3, (2,2), (2,2), padding='SAME')
# Now 2x2x8
feature_map=tf.layers.conv2d(encoded, 1, (3,3), padding='SAME', activation=tf.nn.relu, name='feature_map')
#Now 2x2x1
###########For PCA Visualize
flatten = tf.reshape(feature_map, [-1, 4], name='flatten')
### Decoder
upsample1 = tf.image.resize_nearest_neighbor(encoded, (4,4))
# 4x4x8
conv4 = tf.layers.conv2d_transpose(upsample1, 8, (3,3), padding='VALID', activation=tf.nn.relu, name='trans1')
# 6x6x8
upsample2 = tf.image.resize_nearest_neighbor(conv4, (11,11))
# 11x11x8
conv5 = tf.layers.conv2d_transpose(upsample2, 8, (3,3), padding='VALID', activation=tf.nn.relu, name='trans2')
# 13x13x8
upsample3 = tf.image.resize_nearest_neighbor(conv5, (26,26))
# 26x26x8
conv6 = tf.layers.conv2d_transpose(upsample3, 16, (3,3), padding='VALID', activation=tf.nn.relu, name='trans3')
# 28x28x16
logits = tf.layers.conv2d_transpose(conv6, 1, (3,3), padding='SAME', activation=None, name='logits')
#Now 28x28x1
decoded = tf.nn.sigmoid(logits, name='decoded')
#############################################################
#decoder2(resize)
upsample1_re = tf.image.resize_nearest_neighbor(encoded, (7,7))
# Now 7x7x8
conv4_re = tf.layers.conv2d(upsample1_re, 8, (3,3), padding='same', activation=tf.nn.relu, name='conv4_re')
# Now 7x7x8
upsample2_re = tf.image.resize_nearest_neighbor(conv4_re, (14,14))
# Now 14x14x8
conv5_re = tf.layers.conv2d(upsample2_re, 8, (3,3), padding='same', activation=tf.nn.relu, name='conv5_re')
# Now 14x14x8
upsample3_re = tf.image.resize_nearest_neighbor(conv5_re, (28,28))
# Now 28x28x8
conv6_re = tf.layers.conv2d(upsample3_re, 16, (3,3), padding='same', activation=tf.nn.relu, name='conv6_re')
# Now 28x28x16
logits_re = tf.layers.conv2d(conv6_re, 1, (3,3), padding='same', activation=None, name='logits_re')
#Now 28x28x1
decoded_re = tf.nn.sigmoid(logits_re, name='decoded_re')
####Optmizer
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_, logits=logits)
cost = tf.reduce_mean(loss)
loss_re=tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_, logits=logits_re)
cost_re=tf.reduce_mean(loss_re)
opt = tf.train.AdamOptimizer(0.001).minimize(cost)
opt_re = tf.train.AdamOptimizer(0.001).minimize(cost_re)
# Add 5 images from original, noisy and reconstructed samples to summaries
tf.summary.image('inputs', tf.reshape(inputs_, (-1, 28, 28, 1)), max_outputs=4)
tf.summary.image('targets', tf.reshape(targets_, (-1, 28, 28, 1)), max_outputs=4)
tf.summary.image('decoded', tf.reshape(decoded, (-1, 28, 28, 1)), max_outputs=4)
tf.summary.scalar('cost',cost)
tf.summary.image('decoded_re', tf.reshape(decoded_re, (-1, 28, 28, 1)), max_outputs=4)
tf.summary.scalar('cost_re',cost_re)
merged = tf.summary.merge_all()
#############Train###################
sess = tf.Session()
epochs = 1
batch_size = 200
sess.run(tf.global_variables_initializer())
train_writer = tf.summary.FileWriter(logs_path, sess.graph)
for epoch in range(epochs):
for ii in range(mnist.train.num_examples//batch_size):
batch = mnist.train.next_batch(batch_size)
imgs = batch[0].reshape((-1, 28, 28, 1))
batch_cost, _,batch_cost_re,_re , summary= sess.run([cost, opt,cost_re,opt_re, merged],\
feed_dict={inputs_: imgs,targets_: imgs})
train_writer.add_summary(summary,epoch)
print("Epoch: {}/{}...".format(epoch+1, epochs),
"Training loss: {:.4f}".format(batch_cost),
"Training loss_re: {:.4f}".format(batch_cost_re) )
img2=mnist.train.images[0].reshape((-1, 28, 28, 1))
code=sess.run([flatten],feed_dict={inputs_:img2})
sess.close()
After training my model,I want to use the PCA package, but get error there.
####Visualize by PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X = pca.fit_transform(code)
Y = np.argmax(mnist.train.labels, axis=1)
# plot
plt.figure(figsize=(10, 8))
plt.scatter(X[:, 0], X[:, 1], c=Y)
plt.colorbar()
plt.show()
This is the code i trying to do and it give me the error:
ValueError: Found array with dim 3. Estimator expected <= 2.
Edit:
I have solve the problem and provide my code below, for those who have similar problem:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', validation_size=0)
logs_path = "./log2/noiseRemoval"
inputs_ = tf.placeholder(tf.float32, (None, 28, 28, 1), name='inputs')
targets_ = tf.placeholder(tf.float32, (None, 28, 28, 1), name='targets')
### Encoder
conv1 = tf.layers.conv2d(inputs_, 16, (3,3), padding='VALID', activation=tf.nn.relu, name='conv1')
# Now 26x26x16
maxpool1 = tf.layers.max_pooling2d(conv1, (2,2), (2,2), padding='SAME')
# Now 13x13x16
conv2 = tf.layers.conv2d(maxpool1, 8, (3,3), padding='VALID', activation=tf.nn.relu, name='conv2')
# Now 11x11x8
maxpool2 = tf.layers.max_pooling2d(conv2, (2,2), (2,2), padding='SAME')
# Now 6x6x8
conv3 = tf.layers.conv2d(maxpool2, 8, (3,3), padding='VALID', activation=tf.nn.relu, name='conv3')
# Now 4x4x8
encoded = tf.layers.max_pooling2d(conv3, (2,2), (2,2), padding='SAME')
# Now 2x2x8
### Decoder
upsample1 = tf.image.resize_nearest_neighbor(encoded, (4,4))
# 4x4x8
conv4 = tf.layers.conv2d_transpose(upsample1, 8, (3,3), padding='VALID', activation=tf.nn.relu, name='trans1')
# 6x6x8
upsample2 = tf.image.resize_nearest_neighbor(conv4, (11,11))
# 11x11x8
conv5 = tf.layers.conv2d_transpose(upsample2, 8, (3,3), padding='VALID', activation=tf.nn.relu, name='trans2')
# 13x13x8
upsample3 = tf.image.resize_nearest_neighbor(conv5, (26,26))
# 26x26x8
conv6 = tf.layers.conv2d_transpose(upsample3, 16, (3,3), padding='VALID', activation=tf.nn.relu, name='trans3')
# 28x28x16
logits = tf.layers.conv2d_transpose(conv6, 1, (3,3), padding='SAME', activation=None, name='logits')
#Now 28x28x1
decoded = tf.nn.sigmoid(logits, name='decoded')
#############################################################
#decoder2(resize)
upsample1_re = tf.image.resize_nearest_neighbor(encoded, (7,7))
# Now 7x7x8
conv4_re = tf.layers.conv2d(upsample1_re, 8, (3,3), padding='same', activation=tf.nn.relu, name='conv4_re')
# Now 7x7x8
upsample2_re = tf.image.resize_nearest_neighbor(conv4_re, (14,14))
# Now 14x14x8
conv5_re = tf.layers.conv2d(upsample2_re, 8, (3,3), padding='same', activation=tf.nn.relu, name='conv5_re')
# Now 14x14x8
upsample3_re = tf.image.resize_nearest_neighbor(conv5_re, (28,28))
# Now 28x28x8
conv6_re = tf.layers.conv2d(upsample3_re, 16, (3,3), padding='same', activation=tf.nn.relu, name='conv6_re')
# Now 28x28x16
logits_re = tf.layers.conv2d(conv6_re, 1, (3,3), padding='same', activation=None, name='logits_re')
#Now 28x28x1
decoded_re = tf.nn.sigmoid(logits_re, name='decoded_re')
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_, logits=logits)
cost = tf.reduce_mean(loss)
loss_re=tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_, logits=logits_re)
cost_re=tf.reduce_mean(loss_re)
opt = tf.train.AdamOptimizer(0.001).minimize(cost)
opt_re = tf.train.AdamOptimizer(0.001).minimize(cost_re)
# Add 5 images from original, noisy and reconstructed samples to summaries
tf.summary.image('feature_map', tf.reshape(conv2, (-1, 11, 11, 1)), max_outputs=12)
tf.summary.image('feature_map2', tf.reshape(conv3, (-1, 4, 4, 1)), max_outputs=12)
tf.summary.image('inputs', tf.reshape(inputs_, (-1, 28, 28, 1)), max_outputs=4)
tf.summary.image('targets', tf.reshape(targets_, (-1, 28, 28, 1)), max_outputs=4)
tf.summary.image('decoded', tf.reshape(decoded, (-1, 28, 28, 1)), max_outputs=4)
tf.summary.scalar('cost',cost)
tf.summary.image('decoded_re', tf.reshape(decoded, (-1, 28, 28, 1)), max_outputs=4)
tf.summary.scalar('cost_re',cost_re)
merged = tf.summary.merge_all()
#############Train###################
sess = tf.Session()
epochs = 50
batch_size = 200
sess.run(tf.global_variables_initializer())
train_writer = tf.summary.FileWriter(logs_path, sess.graph)
for epoch in range(epochs):
for ii in range(mnist.train.num_examples//batch_size):
batch = mnist.train.next_batch(batch_size)
imgs = batch[0].reshape((-1, 28, 28, 1))
batch_cost, _,batch_cost_re,_re , summary= sess.run([cost, opt,cost_re,opt_re, merged], feed_dict={inputs_: imgs,
targets_: imgs})
train_writer.add_summary(summary,epoch)
print("Epoch: {}/{}...".format(epoch+1, epochs),
"Training loss: {:.4f}".format(batch_cost),
"Training loss_re: {:.4f}".format(batch_cost_re) )
sess.close()
#劉書宏, Thank you very much for the solution. For the benefit of the community am posting your solutions here (Answer Section).
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', validation_size=0)
logs_path = "./log2/noiseRemoval"
inputs_ = tf.placeholder(tf.float32, (None, 28, 28, 1), name='inputs')
targets_ = tf.placeholder(tf.float32, (None, 28, 28, 1), name='targets')
### Encoder
conv1 = tf.layers.conv2d(inputs_, 16, (3,3), padding='VALID', activation=tf.nn.relu, name='conv1')
# Now 26x26x16
maxpool1 = tf.layers.max_pooling2d(conv1, (2,2), (2,2), padding='SAME')
# Now 13x13x16
conv2 = tf.layers.conv2d(maxpool1, 8, (3,3), padding='VALID', activation=tf.nn.relu, name='conv2')
# Now 11x11x8
maxpool2 = tf.layers.max_pooling2d(conv2, (2,2), (2,2), padding='SAME')
# Now 6x6x8
conv3 = tf.layers.conv2d(maxpool2, 8, (3,3), padding='VALID', activation=tf.nn.relu, name='conv3')
# Now 4x4x8
encoded = tf.layers.max_pooling2d(conv3, (2,2), (2,2), padding='SAME')
# Now 2x2x8
### Decoder
upsample1 = tf.image.resize_nearest_neighbor(encoded, (4,4))
# 4x4x8
conv4 = tf.layers.conv2d_transpose(upsample1, 8, (3,3), padding='VALID', activation=tf.nn.relu, name='trans1')
# 6x6x8
upsample2 = tf.image.resize_nearest_neighbor(conv4, (11,11))
# 11x11x8
conv5 = tf.layers.conv2d_transpose(upsample2, 8, (3,3), padding='VALID', activation=tf.nn.relu, name='trans2')
# 13x13x8
upsample3 = tf.image.resize_nearest_neighbor(conv5, (26,26))
# 26x26x8
conv6 = tf.layers.conv2d_transpose(upsample3, 16, (3,3), padding='VALID', activation=tf.nn.relu, name='trans3')
# 28x28x16
logits = tf.layers.conv2d_transpose(conv6, 1, (3,3), padding='SAME', activation=None, name='logits')
#Now 28x28x1
decoded = tf.nn.sigmoid(logits, name='decoded')
#############################################################
#decoder2(resize)
upsample1_re = tf.image.resize_nearest_neighbor(encoded, (7,7))
# Now 7x7x8
conv4_re = tf.layers.conv2d(upsample1_re, 8, (3,3), padding='same', activation=tf.nn.relu, name='conv4_re')
# Now 7x7x8
upsample2_re = tf.image.resize_nearest_neighbor(conv4_re, (14,14))
# Now 14x14x8
conv5_re = tf.layers.conv2d(upsample2_re, 8, (3,3), padding='same', activation=tf.nn.relu, name='conv5_re')
# Now 14x14x8
upsample3_re = tf.image.resize_nearest_neighbor(conv5_re, (28,28))
# Now 28x28x8
conv6_re = tf.layers.conv2d(upsample3_re, 16, (3,3), padding='same', activation=tf.nn.relu, name='conv6_re')
# Now 28x28x16
logits_re = tf.layers.conv2d(conv6_re, 1, (3,3), padding='same', activation=None, name='logits_re')
#Now 28x28x1
decoded_re = tf.nn.sigmoid(logits_re, name='decoded_re')
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_, logits=logits)
cost = tf.reduce_mean(loss)
loss_re=tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_, logits=logits_re)
cost_re=tf.reduce_mean(loss_re)
opt = tf.train.AdamOptimizer(0.001).minimize(cost)
opt_re = tf.train.AdamOptimizer(0.001).minimize(cost_re)
# Add 5 images from original, noisy and reconstructed samples to summaries
tf.summary.image('feature_map', tf.reshape(conv2, (-1, 11, 11, 1)), max_outputs=12)
tf.summary.image('feature_map2', tf.reshape(conv3, (-1, 4, 4, 1)), max_outputs=12)
tf.summary.image('inputs', tf.reshape(inputs_, (-1, 28, 28, 1)), max_outputs=4)
tf.summary.image('targets', tf.reshape(targets_, (-1, 28, 28, 1)), max_outputs=4)
tf.summary.image('decoded', tf.reshape(decoded, (-1, 28, 28, 1)), max_outputs=4)
tf.summary.scalar('cost',cost)
tf.summary.image('decoded_re', tf.reshape(decoded, (-1, 28, 28, 1)), max_outputs=4)
tf.summary.scalar('cost_re',cost_re)
merged = tf.summary.merge_all()
#############Train###################
sess = tf.Session()
epochs = 50
batch_size = 200
sess.run(tf.global_variables_initializer())
train_writer = tf.summary.FileWriter(logs_path, sess.graph)
for epoch in range(epochs):
for ii in range(mnist.train.num_examples//batch_size):
batch = mnist.train.next_batch(batch_size)
imgs = batch[0].reshape((-1, 28, 28, 1))
batch_cost, _,batch_cost_re,_re , summary= sess.run([cost, opt,cost_re,opt_re, merged], feed_dict={inputs_: imgs,
targets_: imgs})
train_writer.add_summary(summary,epoch)
print("Epoch: {}/{}...".format(epoch+1, epochs),
"Training loss: {:.4f}".format(batch_cost),
"Training loss_re: {:.4f}".format(batch_cost_re) )
sess.close()

Error in K.ctc_batch_cost: tensorflow.python.framework.errors_impl.InvalidArgumentError: sequence_length(0) <= 30

I'm running a CRNN model in Keras to perfrom some handwriting recognition but Im getting an error while computing CTC loss.
The problem only occurs when I'm trying to load a pre-trained network for my CNN. It works fine if I make my own CNN network from scratch.
This is the model where I'm getting error:
def get_DensenetLSTM(training):
input_shape = (img_w, img_h, 3)
inputs = Input(name='the_input', shape=input_shape, dtype='float32')
# out = Conv2D(3, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')(inputs)
# out = Reshape(target_shape=((250, 80, 3)), name='reshape_inp')(inner)
densenet = DenseNet121(include_top=False, weights='imagenet', input_tensor= inputs)
inner = densenet.output
# CNN to RNN
inner = Reshape(target_shape=((32, 1536)), name='reshape')(inner) # (None, 32, 2048)
# print(inner.shape)
inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner) # (None, 32, 64)
# print(inner.shape)
# RNN layer
lstm_1 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner) # (None, 32, 512)
lstm_1b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner)
lstm1_merged = add([lstm_1, lstm_1b]) # (None, 32, 512)
lstm1_merged = BatchNormalization()(lstm1_merged)
lstm_2 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged)
lstm_2b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b')(lstm1_merged)
lstm2_merged = concatenate([lstm_2, lstm_2b]) # (None, 32, 1024)
lstm_merged = BatchNormalization()(lstm2_merged)
# transforms RNN output to character activations:
inner = Dense(num_classes, kernel_initializer='he_normal',name='dense2')(lstm2_merged) #(None, 32, 63)
y_pred = Activation('softmax', name='softmax')(inner)
labels = Input(name='the_labels', shape=[max_text_len], dtype='float32') # (None ,8)
input_length = Input(name='input_length', shape=[1], dtype='int64') # (None, 1)
label_length = Input(name='label_length', shape=[1], dtype='int64') # (None, 1)
# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) #(None, 1)
if training:
return Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)
else:
return Model(inputs=[inputs], outputs=y_pred)
This works fine:
def get_Model(training):
input_shape = (img_w, img_h, 1)
# Make Network
inputs = Input(name='the_input', shape=input_shape, dtype='float32')
inner = Conv2D(64, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')(inputs) # (None, 128, 64, 64)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner)
inner = Conv2D(128, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')(inner)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner)
inner = Conv2D(256, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')(inner)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = Conv2D(256, (3, 3), padding='same', name='conv4', kernel_initializer='he_normal')(inner)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner)
inner = Conv2D(512, (3, 3), padding='same', name='conv5', kernel_initializer='he_normal')(inner)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = Conv2D(512, (3, 3), padding='same', name='conv6')(inner)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(1, 2), name='max4')(inner)
inner = Conv2D(512, (2, 2), padding='same', kernel_initializer='he_normal', name='con7')(inner)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
# CNN to RNN
inner = Reshape(target_shape=((62, 2560)), name='reshape')(inner)
inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)
# RNN layer
lstm_1 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner)
lstm_1b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner)
lstm1_merged = add([lstm_1, lstm_1b])
lstm1_merged = BatchNormalization()(lstm1_merged)
lstm_2 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged)
lstm_2b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b')(lstm1_merged)
lstm2_merged = concatenate([lstm_2, lstm_2b])
lstm_merged = BatchNormalization()(lstm2_merged)
# transforms RNN output to character activations:
inner = Dense(num_classes, kernel_initializer='he_normal',name='dense2')(lstm2_merged)
y_pred = Activation('softmax', name='softmax')(inner)
labels = Input(name='the_labels', shape=[max_text_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) #(None, 1)
if training:
return Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)
else:
return Model(inputs=[inputs], outputs=y_pred)
This is the ctc_loss function:
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
y_pred = y_pred[:, 2:, :]
print(y_pred.shape)
print(input_length)
print(labels.shape)
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
When I run the Densenet_LSTM model, I get the following error:
tensorflow.python.framework.errors_impl.InvalidArgumentError: sequence_length(0) <= 30
[[{{node ctc/CTCLoss}} = CTCLoss[_class=["loc:#training/Adam/gradients/ctc/CTCLoss_grad/mul"], ctc_merge_repeated=true, ignore_longer_outputs_than_inputs=false, preprocess_collapse_repeated=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](ctc/Log/_7309, ctc/ToInt64/_7311, ctc/ToInt32_2/_7313, ctc/ToInt32_1/_7315)]]
Please help.

Keras model doesn't work with a larger dataset

I'm just getting started with Keras and with Deep learning, so the answer to my question could be obvious to some, but for me it isn't.
I made a model to colorize some black and white photos following the article on Floydhub (where I'm training it) and it works just fine when I train it with similar pictures (such as human faces) but as soon as I use a larger dataset as an input with different pictures, the loss just remains stable and doesn't get better.
I've tried different learning rates and optimizers but just cannot get a good result.
What could I change to get a better result?
This is the code (thanks to Emil Wallner for the article on Floydhub)
# Get images
X = []
for filename in os.listdir('/data/images/Train/'):
X.append(img_to_array(load_img('/data/images/Train/'+filename)))
X = np.array(X, dtype=float)
Xtrain = 1.0/255*X
#Load weights
inception = InceptionResNetV2(weights=None, include_top=True)
inception.load_weights('/data/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5')
inception.graph = tf.get_default_graph()
embed_input = Input(shape=(1000,))
#Encoder
encoder_input = Input(shape=(256, 256, 1,))
encoder_output = Conv2D(64, (3,3), activation='relu', padding='same', strides=2)(encoder_input)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(128, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same', strides=2)(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(512, (3,3), activation='relu', padding='same')(encoder_output)
encoder_output = Conv2D(256, (3,3), activation='relu', padding='same')(encoder_output)
#Fusion
fusion_output = RepeatVector(32 * 32)(embed_input)
fusion_output = Reshape(([32, 32, 1000]))(fusion_output)
fusion_output = concatenate([encoder_output, fusion_output], axis=3)
fusion_output = Conv2D(256, (1, 1), activation='relu', padding='same')(fusion_output)
#Decoder
decoder_output = Conv2D(128, (3,3), activation='relu', padding='same')(fusion_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(64, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
decoder_output = Conv2D(32, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(16, (3,3), activation='relu', padding='same')(decoder_output)
decoder_output = Conv2D(2, (3, 3), activation='tanh', padding='same')(decoder_output)
decoder_output = UpSampling2D((2, 2))(decoder_output)
model = Model(inputs=[encoder_input, embed_input], outputs=decoder_output)
#Create embedding
def create_inception_embedding(grayscaled_rgb):
grayscaled_rgb_resized = []
for i in grayscaled_rgb:
i = resize(i, (299, 299, 3), mode='constant')
grayscaled_rgb_resized.append(i)
grayscaled_rgb_resized = np.array(grayscaled_rgb_resized)
grayscaled_rgb_resized = preprocess_input(grayscaled_rgb_resized)
with inception.graph.as_default():
embed = inception.predict(grayscaled_rgb_resized)
return embed
# Image transformer
datagen = ImageDataGenerator(
shear_range=0.4,
zoom_range=0.4,
rotation_range=40,
horizontal_flip=True)
#Generate training data
batch_size = 20
def image_a_b_gen(batch_size):
for batch in datagen.flow(Xtrain, batch_size=batch_size):
grayscaled_rgb = gray2rgb(rgb2gray(batch))
embed = create_inception_embedding(grayscaled_rgb)
lab_batch = rgb2lab(batch)
X_batch = lab_batch[:,:,:,0]
X_batch = X_batch.reshape(X_batch.shape+(1,))
Y_batch = lab_batch[:,:,:,1:] / 128
yield ([X_batch, create_inception_embedding(grayscaled_rgb)], Y_batch)
#Train model
tensorboard = TensorBoard(log_dir="/output")
model.compile(optimizer='adam', loss='mse')
model.fit_generator(image_a_b_gen(batch_size), callbacks=[tensorboard], epochs=1000, steps_per_epoch=20)
#Make a prediction on the unseen images
color_me = []
for filename in os.listdir('../Test/'):
color_me.append(img_to_array(load_img('../Test/'+filename)))
color_me = np.array(color_me, dtype=float)
color_me = 1.0/255*color_me
color_me = gray2rgb(rgb2gray(color_me))
color_me_embed = create_inception_embedding(color_me)
color_me = rgb2lab(color_me)[:,:,:,0]
color_me = color_me.reshape(color_me.shape+(1,))
# Test model
output = model.predict([color_me, color_me_embed])
output = output * 128
# Output colorizations
for i in range(len(output)):
cur = np.zeros((256, 256, 3))
cur[:,:,0] = color_me[i][:,:,0]
cur[:,:,1:] = output[i]
imsave("result/img_"+str(i)+".png", lab2rgb(cur))

Categories

Resources