Related
I'm new to Autoencoder. I have built a simple convolution autoencoder as shown below:
# ENCODER
input_img = Input(shape=(64, 64, 1))
encode1 = Conv2D(32, (3, 3), activation=tf.nn.leaky_relu, padding='same')(input_img)
encode2 = MaxPooling2D((2, 2), padding='same')(encode1)
l = Flatten()(encode2)
l = Dense(100, activation='linear')(l)
# DECODER
d = Dense(1024, activation='linear')(l)
d = Reshape((32,32,1))(d)
decode3 = Conv2D(64, (3, 3), activation=tf.nn.leaky_relu, padding='same')(d)
decode4 = UpSampling2D((2, 2))(decode3)
model = models.Model(input_img, decode4)
model.compile(optimizer='adam', loss='mse')
# Train it by providing training images
model.fit(x, y, epochs=20, batch_size=16)
Now after training this model, I want to get output from bottleneck layer i.e dense layer. That means if I throw array of shape (1000, 64, 64) to model, I want compressed array of shape (1000, 100).
I have tried one method as shown below, but it's giving me some error.
model = Model(inputs=[x], outputs=[l])
err:
ValueError: Input tensors to a Functional must come from `tf.keras.Input`.
I have also tried some other method but that's also not working. Can someone tell me how can I get compressed array back after training the model.
You need to create the separate model for the encoder. After you train the whole system encoder-decoder, you can use only encoder for prediction. Code example:
# ENCODER
input_img = layers.Input(shape=(64, 64, 1))
encode1 = layers.Conv2D(32, (3, 3), activation=tf.nn.leaky_relu, padding='same')(input_img)
encode2 = layers.MaxPooling2D((2, 2), padding='same')(encode1)
l = layers.Flatten()(encode2)
encoder_output = layers.Dense(100, activation='linear')(l)
# DECODER
d = layers.Dense(1024, activation='linear')(encoder_output)
d = layers.Reshape((32,32,1))(d)
decode3 = layers.Conv2D(64, (3, 3), activation=tf.nn.leaky_relu, padding='same')(d)
decode4 = layers.UpSampling2D((2, 2))(decode3)
model_encoder = Model(input_img, encoder_output)
model = Model(input_img, decode4)
model.fit(X, y, epochs=20, batch_size=16)
model_encoder.predict(X) should return a vector for each image.
Getting the output of intermediate layer (bottleneck_layer).
# ENCODER
input_img = Input(shape=(64, 64, 1))
encode1 = Conv2D(32, (3, 3), activation=tf.nn.leaky_relu, padding='same')(input_img)
encode2 = MaxPooling2D((2, 2), padding='same')(encode1)
l = Flatten()(encode2)
bottleneck = Dense(100, activation='linear', name='bottleneck_layer')(l)
# DECODER
d = Dense(1024, activation='linear')(bottleneck)
d = Reshape((32,32,1))(d)
decode3 = Conv2D(64, (3, 3), activation=tf.nn.leaky_relu, padding='same')(d)
decode4 = UpSampling2D((2, 2))(decode3)
# full model
model_full = models.Model(input_img, decode4)
model_full.compile(optimizer='adam', loss='mse')
model_full.fit(x, y, epochs=20, batch_size=16)
# bottleneck model
bottleneck_output = model_full.get_layer('bottleneck_layer').output
model_bottleneck = models.Model(inputs = model_full.input, outputs = bottleneck_output)
bottleneck_predictions = model_bottleneck.predict(X_test)
I am beginner in tensorflow and I am trying to create a simple autoencoder for images to detect anomalies.Firstly, I created a simple autoencoder using dogs images , now I want to use this model to reconstruct my tests images and compare the result using some metrics.So how can I do it on tensorflow (because I am beginner on tensorflow )
(I found the same idea implemented on numerical datasets , and also on MNIST dataset ).
this is my code:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler
BATCH_SIZE = 256
EPOCHS = 2
train_datagen = ImageDataGenerator(rescale=1./255)
train_batches = train_datagen.flow_from_directory('C:/MyPath/PetImages1',
target_size=(64,64), shuffle=True, class_mode='input', batch_size=BATCH_SIZE)
input_img = Input(shape=(64, 64, 3))
x = Conv2D(48, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(96, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(192, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
encoded = Conv2D(32, (1, 1), activation='relu', padding='same')(x)
latentSize = (8,8,32)
# DECODER
direct_input = Input(shape=latentSize)
x = Conv2D(192, (1, 1), activation='relu', padding='same')(direct_input)
x = UpSampling2D((2, 2))(x)
x = Conv2D(192, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(96, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(48, (3, 3), activation='relu', padding='same')(x)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)
# COMPILE
encoder = Model(input_img, encoded)
decoder = Model(direct_input, decoded)
autoencoder = Model(input_img, decoder(encoded))
autoencoder.compile(optimizer='Adam', loss='binary_crossentropy')
autoencoder.save_weights('autoencoder_DogsAuto.h5')
history=autoencoder.fit_generator(train_batches,steps_per_epoch=10,epochs =
EPOCHS)
#Images for tests
testGene = train_datagen.flow_from_directory('C:/PetImages/',
target_size=(64,64), shuffle=True, class_mode='input',
batch_size=BATCH_SIZE)
restored = autoencoder.predict_generator(testGene,
steps=testGene.n/BATCH_SIZE)
image_height=64
image_width=64
image_channels=3
x_train = np.zeros((0, image_height, image_width, image_channels), dtype=float)
for x, _ in train_batches :
if train_batches.total_batches_seen > train_batches.n/BATCH_SIZE:
break
else:
x_train = np.r_[x_train,x]
pred=autoencoder.predict(train_batches, steps=train_batches.n/BATCH_SIZE)
from sklearn import metrics
score1=np.sqrt(metrics.mean_squared_error(pred,x_train ))
print(score1)
And I got this error:
Traceback (most recent call last):
File "c:\autoencoder_anomaly.py", line 196, in
score1=np.sqrt(metrics.mean_squared_error(pred,x_train ))
File "C:\Users\AppData\Local\Programs\Python\Python36\lib\site-packages\sklearn\metrics_regression.py", line 252, in mean_squared_error
y_true, y_pred, multioutput)
File "C:\Users\AppData\Local\Programs\Python\Python36\lib\site-packages\sklearn\metrics_regression.py", line 84, in _check_reg_targets
check_consistent_length(y_true, y_pred)
ValueError: Found input variables with inconsistent numbers of samples: [6, 0]
Note that I am using only 6 images.
So how can I calculate the error of the reconstructed image using metrics and the autoencoder Model on tensorflow ?
This is simply because of shape mismatch.
when you calculate mean squared error,it calculates the element wise error of ground truth values and estimated values. so pred.shape and train_batches.shape should be equal.check the input data shapes and make sure they are equal.
step 1:
get all training images from the generator and add to one array
x_test = np.zeros((0, image_height, image_width, image_color), dtype=float)
for x, _ in testGene:
if testGene.total_batches_seen > testGene.n/BATCH_SIZE:
break
else:
x_test = np.r_[x_test , x]
step 2 : prediction
pred=autoencoder.predict(testGene, steps=testGene.n/BATCH_SIZE)
step 3 : calculate the difference
score1=np.sqrt(metrics.mean_squared_error(pred,testGene))
I have an autoencoder that has two output( decoded,pred_w), one output is the reconstructed input image and other one is a reconstructed binary image. I used sigmoid activation function in the last layer but the outputs are float number and I need the network label each pixel as 0 or 1. I attached my code here. could you please guide me what should I do to solve this problem?
thanks.
from keras.layers import Input, Concatenate, GaussianNoise,Dropout
from keras.layers import Conv2D
from keras.models import Model
from keras.datasets import mnist
from keras.callbacks import TensorBoard
from keras import backend as K
from keras import layers
import matplotlib.pyplot as plt
import tensorflow as tf
import keras as Kr
import numpy as np
import pylab as pl
import matplotlib.cm as cm
import keract
from tensorflow.python.keras.layers import Lambda;
#-----------------building w train---------------------------------------------
w_main = np.random.randint(2,size=(1,4,4,1))
w_main=w_main.astype(np.float32)
w_expand=np.zeros((1,28,28,1),dtype='float32')
w_expand[:,0:4,0:4]=w_main
w_expand.reshape(1,28,28,1)
w_expand=np.repeat(w_expand,49999,0)
#-----------------building w validation---------------------------------------------
w_valid = np.random.randint(2,size=(1,4,4,1))
w_valid=w_valid.astype(np.float32)
wv_expand=np.zeros((1,28,28,1),dtype='float32')
wv_expand[:,0:4,0:4]=w_valid
wv_expand.reshape(1,28,28,1)
wv_expand=np.repeat(wv_expand,9999,0)
#-----------------building w test---------------------------------------------
w_test = np.random.randint(2,size=(1,4,4,1))
w_test=w_test.astype(np.float32)
wt_expand=np.zeros((1,28,28,1),dtype='float32')
wt_expand[:,0:4,0:4]=w_test
wt_expand.reshape(1,28,28,1)
#wt_expand=np.repeat(wt_expand,10000,0)
#-----------------------encoder------------------------------------------------
#------------------------------------------------------------------------------
wtm=Input((28,28,1))
image = Input((28, 28, 1))
conv1 = Conv2D(16, (3, 3), activation='relu', padding='same', name='convl1e')(image)
conv2 = Conv2D(32, (3, 3), activation='relu', padding='same', name='convl2e')(conv1)
conv3 = Conv2D(8, (3, 3), activation='relu', padding='same', name='convl3e')(conv2)
DrO1=Dropout(0.25)(conv3)
encoded = Conv2D(1, (3, 3), activation='relu', padding='same',name='reconstructed_I')(DrO1)
#-----------------------adding w---------------------------------------
#add_const = Kr.layers.Lambda(lambda x: x + Kr.backend.constant(w_expand))
#encoded_merged=Kr.layers.Add()([encoded,wtm])
add_const = Kr.layers.Lambda(lambda x: x + wtm)
encoded_merged = add_const(encoded)
encoder=Model(inputs=image, outputs= encoded_merged)
encoder.summary()
#-----------------------decoder------------------------------------------------
#------------------------------------------------------------------------------
#encoded_merged = Input((28, 28, 2))
deconv1 = Conv2D(16, (3, 3), activation='relu', padding='same', name='convl1d')(encoded_merged)
deconv2 = Conv2D(32, (3, 3), activation='relu', padding='same', name='convl2d')(deconv1)
deconv3 = Conv2D(8, (3, 3), activation='relu',padding='same', name='convl3d')(deconv2)
DrO2=Dropout(0.25)(deconv3)
decoded = Conv2D(1, (3, 3), activation='relu', padding='same', name='decoder_output')(DrO2)
#decoder=Model(inputs=encoded_merged, outputs=decoded)
#decoder.summary()
model=Model(inputs=image,outputs=decoded)
#----------------------w extraction------------------------------------
convw1 = Conv2D(16, (3,3), activation='relu', padding='same', name='conl1w')(decoded)
convw2 = Conv2D(32, (3, 3), activation='relu', padding='same', name='convl2w')(convw1)
convw3 = Conv2D(8, (3, 3), activation='relu', padding='same', name='conl3w')(convw2)
DrO3=Dropout(0.25)(convw3)
pred_w = Conv2D(1, (1, 1), activation='sigmoid', padding='same', name='reconstructed_W')(DrO3)
# reconsider activation (is W positive?)
# should be filter=1 to match W
w_extraction=Model(inputs=[image,wtm],outputs=[decoded,pred_w])
#----------------------training the model--------------------------------------
#------------------------------------------------------------------------------
#----------------------Data preparesion----------------------------------------
(x_train, _), (x_test, _) = mnist.load_data()
x_validation=x_train[1:10000,:,:]
x_train=x_train[10001:60000,:,:]
#
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_validation = x_validation.astype('float32') / 255.
x_train = np.reshape(x_train, (len(x_train), 28, 28, 1)) # adapt this if using `channels_first` image data format
x_test = np.reshape(x_test, (len(x_test), 28, 28, 1)) # adapt this if using `channels_first` image data format
x_validation = np.reshape(x_validation, (len(x_validation), 28, 28, 1))
#---------------------compile and train the model------------------------------
# is accuracy sensible metric for this model?
w_extraction.compile(optimizer='adadelta', loss={'decoder_output':'mse','reconstructed_W':'mse'}, metrics=['mae'])
w_extraction.fit([x_train,w_expand], [x_train,w_expand],
epochs=100,
batch_size=128,
validation_data=([x_validation,wv_expand], [x_validation,wv_expand]),
callbacks=[TensorBoard(log_dir='E:/tmp/AutewithW200', histogram_freq=0, write_graph=False)])
model.summary()
If you need this inside the model, you can use K.round() from keras.backend. Notice this will not be differentiable and will not be able to be used very well in training.
If you need just the results, you can simple define a threshold (usually 0.5) and:
binary_reslts = results > threshold
Add metrics to your model
You can see the results by adding metrics that round the data.
Standard metrics for this could be "accuracy" or "categorical_accuracy". You you can define your own metrics like:
def diceMetric(yTrue, yPred):
yTrue = K.batch_flatten(yTrue)
yPred = K.batch_flatten(yPred)
#round
yPred = K.greater(yPred, 0.5)
yPred = K.cast(yPred, K.floatx())
intersection = yPred * yTrue
sum = yTrue + yPred
return (2*intersection + K.epsilon())/(sum + K.epsilon())
Metrics are added in compile:
model.compile(optimizer=..., loss=..., metrics = [diceMetric, 'categorical_accuracy'])
Metrics do not affect training, they're just feedback for you to know what's happening.
Why would you need your network to output exactly 0 or 1? You can interpret the output of your network as probability measure, of how likely it is that an input pixel correponds to class 0 or 1.
So during training the model tries to approximate the unknown probability distribution.
When it comes to predictions, you can use a threshold like .5 or you could use something like otsu threshold. Then you will obtain an binary output. Unfortunately thresholds will create some gaps or shrink the area of some predicted shapes.
Note:
typically you want to down and upsample in an autoencoder, because otherwise the model,could learn that the idendity function is optimal.
I am building a convolutional encoder to deal with some 128x128 images - like this.
I have been testing the structure by testing it with a image set of 500 images. The resulting decoded images are basically entirely black (not want I want!)
I was hoping to get some advice on here as I think I am making some obvious mistakes.
A small group of images can be downloaded here -> https://www.dropbox.com/sh/0oj1p6sqal32cvx/AAAYQJSK2SPfynD8wYMSo9bPa?dl=0
Current Code
################################# SETUP #######################################
import glob
import pandas as pd
import numpy as np
import sys
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
np.set_printoptions(threshold=np.nan)
######################### DATA PREPARATION #####################################
# create a list of XML files within the raw data folder
image_list = glob.glob("Images/test_images/*.jpeg")
print(image_list)
l = []
for i in image_list:
img = np.array(cv2.imread(i, 0))
l.append(img)
T = np.array(l)
# split into training and testing sets
labels = image_list
data_train, data_test, labels_train, labels_test = train_test_split(T, labels, test_size=0.5, random_state=42)
# convert to 0-1 floats (reconversion by * 255)
data_train = data_train.astype('float32') / 255.
data_test = data_test.astype('float32') / 255.
print(data_train.shape)
# reshape from channels first to channels last
data_train = np.rollaxis(data_train, 0, 3)
data_test = np.rollaxis(data_test, 0, 3)
######################### ENCODER MODELING #####################################
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Model
from keras import backend as K
input_img = Input(shape=(128, 128, 1)) # adapt this if using `channels_first` image data format
x = Conv2D(64, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
# create the model
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='mse')
# reference for reordering
data_train_dimensions = data_train.shape
data_test_dimensions = data_test.shape
# reshape the data sets
data_train = np.reshape(data_train, (data_train_dimensions[2], 128, 128, 1)) # adapt this if using `channels_first` image data format
data_test = np.reshape(data_test, (data_test_dimensions[2], 128, 128, 1))
from keras.callbacks import TensorBoard
autoencoder.fit(data_train, data_test,
epochs=10,
batch_size=128,
shuffle=True,
validation_data=(data_train, data_test),
callbacks=[TensorBoard(log_dir='/tmp/autoencoder')])
# create decoded images from model
decoded_imgs = autoencoder.predict(data_test)
# reorder columns
decoded_imgs = np.rollaxis(decoded_imgs, 3, 1)
# reshape from channels first to channels last
data_train = np.rollaxis(data_train, 0, 3)
data_test = np.rollaxis(data_test, 0, 3)
# convert to 0-1 floats (reconversion by * 255)
data_train = data_train.astype('float32') * 255.
data_test = data_test.astype('float32') * 255.
I think the main problem is that you fit on data_train, data_test instead of data_train, labels_train, that is, you should fit your model on samples and corresponding outputs but you train it only on inputs which are by a coincidence are of compatible shapes because of the 50/50 split.
If the intention of the model is to reproduce an image from compressed representation, then you could train fit(data_train, data_train, ..., validation_data=(data_test, data_test)).
hi I am building a image classifier for one-class classification in which i've used autoencoder while running this model I am getting this error by this line (autoencoder_model.fit) (ValueError: Error when checking target: expected model_2 to have shape (None, 252, 252, 1) but got array with shape (300, 128, 128, 3).)
num_of_samples = img_data.shape[0]
labels = np.ones((num_of_samples,),dtype='int64')
labels[0:376]=0
names = ['cats']
input_shape=img_data[0].shape
X_train, X_test = train_test_split(img_data, test_size=0.2, random_state=2)
inputTensor = Input(input_shape)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(inputTensor)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded_data = MaxPooling2D((2, 2), padding='same')(x)
encoder_model = Model(inputTensor,encoded_data)
# at this point the representation is (4, 4, 8) i.e. 128-dimensional
encoded_input = Input((4,4,8))
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded_input)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu',padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded_data = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
decoder_model = Model(encoded_input,decoded_data)
autoencoder_input = Input(input_shape)
encoded = encoder_model(autoencoder_input)
decoded = decoder_model(encoded)
autoencoder_model = Model(autoencoder_input, decoded)
autoencoder_model.compile(optimizer='adadelta',
`enter code here`loss='binary_crossentropy')
autoencoder_model.fit(X_train, X_train,
epochs=50,
batch_size=32,
validation_data=(X_test, X_test),
callbacks=[TensorBoard(log_dir='/tmp/autoencoder')])
It's a simple incompatibility between the output shape of the decoder and the shape of your training data. (Target means output).
I see you've got 2 MaxPoolings (dividing your image size by 4), and three upsamplings (multiplying the decoder's input by 8).
The final output of the autoencoder is too big and doesn't match your data. You must simply work in the model to make the output shape match your training data.
You're using wrong API
autoencoder_model.fit(X_train, X_train, <--- This one is wrong
epochs=50,
batch_size=32,
validation_data=(X_test, X_test),
callbacks=[TensorBoard(log_dir='/tmp/autoencoder')])
Take a look at .fit method source code
from https://github.com/keras-team/keras/blob/master/keras/models.py
def fit(self,
x=None,
y=None,
batch_size=None,
epochs=1,
verbose=1,
callbacks=None,
validation_split=0.,
validation_data=None,
shuffle=True,
class_weight=None,
sample_weight=None,
initial_epoch=0,
steps_per_epoch=None,
validation_steps=None,
**kwargs):
"""Trains the model for a fixed number of epochs (iterations on a dataset).
# Arguments
x: Numpy array of training data.
If the input layer in the model is named, you can also pass a
dictionary mapping the input name to a Numpy array.
`x` can be `None` (default) if feeding from
framework-native tensors (e.g. TensorFlow data tensors).
y: Numpy array of target (label) data.
If the output layer in the model is named, you can also pass a
dictionary mapping the output name to a Numpy array.
`y` can be `None` (default) if feeding from
framework-native tensors (e.g. TensorFlow data tensors).
So the x should be data, and the y should be label of the data.
Hope that help