I'm such a beginner in the context of generative adversarial networks and representation learning.My goal is to train a simple gan using dense layers to generate fashion mnist images.I've tried many codes online which had given successful outputs to everyone but in my case all of them produce the same output which is a blank white image with some dots in particular areas frequently.I also checked the activation functions but they seem to be fine.Also to mention that I run my code in google colab.Here is my code
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow.keras.datasets import fashion_mnist
(X_train, _), (X_test, _) = fashion_mnist.load_data()
def plot_images(images, n_cols):
n_rows = (len(images) - 1) // n_cols + 1
if images.shape[-1] == 1:
images = np.squeeze(images, axis=-1)
plt.figure(figsize=(n_cols, n_rows))
for index, image in enumerate(images):
plt.subplot(n_rows, n_cols, index + 1)
plt.imshow(image, cmap="binary")
plt.axis("off")
plt.show()
codings_size = 100
generator = tf.keras.models.Sequential([
tf.keras.layers.Dense(100, activation="selu", input_shape=[codings_size]),
tf.keras.layers.Dense(150, activation="selu"),
tf.keras.layers.Dense(28 * 28, activation="sigmoid"),
tf.keras.layers.Reshape([28, 28])
])
discriminator = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=[28, 28]),
tf.keras.layers.Dense(150, activation="selu"),
tf.keras.layers.Dense(100, activation="selu"),
tf.keras.layers.Dense(1, activation="sigmoid")
])
gan = tf.keras.models.Sequential([generator, discriminator])
discriminator.compile(loss="binary_crossentropy", optimizer="rmsprop")
discriminator.trainable = False
gan.compile(loss="binary_crossentropy", optimizer="rmsprop")
batch_size = 32
n_epochs = 50
for epoch in range(n_epochs):
print("Epoch {}/{}".format(epoch + 1, n_epochs))
for i in range(len(X_train) // batch_size):
X_batch = X_train[i: i+batch_size]
noise = np.random.normal(0, 1, size=[batch_size, codings_size])
generated_images = generator(noise)
X_fake_and_real = np.concatenate([generated_images, X_batch], axis=0)
y1 = np.array([[0.]] * batch_size + [[1.]] * batch_size)
discriminator.trainable = True
discriminator.train_on_batch(X_fake_and_real, y1)
noise = np.random.normal(0, 1, size=[batch_size, codings_size])
y2 = np.array([[1.]] * batch_size)
discriminator.trainable = False
gan.train_on_batch(noise, y2)
plot_images(generated_images, 8)
Sounds like mode collapse where generator keeps producing same or similar outputs and discriminator fails to reject it. It also could be due to some problem in your implementation.
Try training the discriminator first for one or your chosen K steps before training generator.
Keras have many have simple examples for GAN. You can modify below DCGAN implementation to work with fashion mnist.
https://keras.io/examples/generative/dcgan_overriding_train_step/
https://keras.io/examples/generative/
Related
I applied PCA on MNIST with a reduced dimensionality of 32. Then, to test it, I created a simple classification network. The train accuracy is good: 96%, but on the other hand, the test accuracy is 2%.
So what's wrong?
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import tensorflow.keras.layers as layers
from tensorflow.keras.models import Sequential
import numpy as np
(x,y),(x2,y2) = mnist.load_data()
y = tf.keras.utils.to_categorical(y)
y2 = tf.keras.utils.to_categorical(y2)
def pca(x):
x = np.reshape(x, (x.shape[0], 784)).astype("float32") / 255.
mean = x.mean(axis=1)
#print(mean)
#print(mean[:,None])
x -= mean[:,None]
s, u, v = tf.linalg.svd(x)
s = tf.linalg.diag(s)
k = 32 # DIM_REDUCED
pca = tf.matmul(u[:,0:k], s[0:k,0:k])
#print(pca)
#print(pca.shape)
return pca
x = pca(x)
x2 = pca(x2)
## BUILD A SUPER SIMPLE CLASSIFIC. NET
model = Sequential()
model.add(layers.Dense(32, activation="relu", input_shape=(32,)))
model.add(layers.Dense(16, activation="relu"))
model.add(layers.Dense(10, activation="softmax"))
model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["acc"])
model.fit(x,y, epochs = 5, verbose = 1, batch_size = 64, validation_data = (x2,y2))
OUTPUT:
Epoch 5/5
60000/60000 [==============================] - 1s 23us/sample - loss: 0.1278 - acc: 0.9626 - val_loss: 11.0141 - val_acc: 0.0202
First, you are appling two differents "pca" for each set. The eigenvectors and eigenvalues of train can be different than test set.
Second, you are using SVD to obtain principal components, but this components is not the result that you want. Use principal axis like a projection matrix to obtain a better/compress representation of the data.
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import tensorflow.keras.layers as layers
from tensorflow.keras.models import Sequential
import numpy as np
(x,y),(x2,y2) = mnist.load_data()
y = tf.keras.utils.to_categorical(y)
y2 = tf.keras.utils.to_categorical(y2)
def pca(x):
x = np.reshape(x, (x.shape[0], 784)).astype("float32") / 255.
mean = x.mean(axis=0)
#print(mean)
#print(mean[:,None])
x -= mean[None, :]
s, u, v = tf.linalg.svd(x)
s = tf.linalg.diag(s)
k = 32 # DIM_REDUCED
projM = v[:, 0:k] #tf.matmul(u[:,0:k], s[0:k,0:k])
return mean, projM
def apply_pca(mean, projM, x):
x = np.reshape(x, (x.shape[0], 784)).astype("float32") / 255.
#print(mean)
#print(mean[:,None])
x -= mean[None, :]
return tf.matmul(x, projM)
mean, projM = pca(x)
x = apply_pca(mean, projM, x)
x2 = apply_pca(mean, projM, x2)
## BUILD A SUPER SIMPLE CLASSIFIC. NET
model = Sequential()
model.add(layers.Dense(32, activation="relu", input_shape=(32,)))
model.add(layers.Dense(16, activation="relu"))
model.add(layers.Dense(10, activation="softmax"))
model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["acc"])
model.fit(x,y, epochs = 5, verbose = 1, batch_size = 64, validation_data = (x2,y2))
I am new to machine learning and keras library and I made a CNN code for regression like below.
%matplotlib inline
from __future__ import division
import numpy as np
from numpy.random import rand
import matplotlib.pyplot as plt
def initial_spin_state(N):
state = np.random.choice((0.11111, 0.99999), (N, N))
return state
def metropolis_algorithm(config, beta):
N = len(config)
for i in range(N):
for j in range(N):
a = np.random.randint(0, N)
b = np.random.randint(0, N)
s = config[a, b]
near=config[(a+1)%N,b] + config[a,(b+1)%N] + config[(a-1)%N,b] + config[a,(b-1)%N]
delta = 2 * s *near
if delta < 0:
s *= -1
elif rand() < np.exp(-delta * beta):
s *= -1
config[a, b] = s
return config
def get_energy(config):
energy = 0
N = len(config)
for i in range(N):
for j in range(N):
S = config[i, j]
near = config[(i+1)%N, j] + config[i,(j+1)%N] + config[(i-1)%N, j] + config[i,(j-1)%N]
energy += near*S
return energy
x_train = []
y_train = []
for i in range(50000):
config = initial_spin_state(16)
energy = get_energy(config)
x_train.append(config)
y_train.append(energy)
x_train = np.array(x_train)
y_train = np.array(y_train)
print(x_train.shape)
print(y_train.shape)
x_test = []
y_test = []
for j in range(20000):
config = initial_spin_state(16)
energy = get_energy(config)
x_test.append(config)
y_test.append(energy)
x_test = np.array(x_test)
y_test = np.array(y_test)
print(x_test.shape)
print(y_test.shape)
x_train = x_train.reshape(50000, 16, 16, 1)
x_test = x_test.reshape(20000, 16, 16, 1)
print(x_train.shape)
print(x_test.shape)
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adadelta
from keras.regularizers import l2
model = Sequential()
model.add(Conv2D(32, (2, 2), input_shape = (16, 16, 1), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Conv2D(16, (2, 2), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Flatten())
model.add(Dense(512, activation = 'relu'))
#model.add(Dense(1024, activation = 'relu'))
model.add(Dense(1))
np.random.seed(0)
model.summary()
model.compile(loss = "mse", metrics = ['accuracy'], optimizer = 'adam')
%%time
hist = model.fit(x_train, y_train, epochs = 200, batch_size = 500,
validation_data = (x_test, y_test), verbose = 2)
import matplotlib.pyplot as plt
plt.plot(hist.history['acc'], '_b', label = "training")
plt.plot(hist.history['val_acc'], 'r:', label = "test")
plt.legend()
plt.grid("on")
plt.show()
this code is for image input, and contiuous energy value output.
so if I put a image(ising configuration) to CNN, it should predict a energy for the configuration.
the problem is..
when I train CNN, training loss and validation loss is decreased very slowly.
of course, training accuracy and validation accuracy is incread very slowly.
and, sometimes only training accuracy is increasd, val-accuracy is not increased.
genious guys.. what`s wrong with my code??
teach me plz
First of all, since you are doing regression problem, I dont think its a good idea to use acc as your metric, instead, you might consider using mean absolute error mae as your matric.
The loss you are using is mse (mean squared error), so the value would be pretty large especially when you are not normalising your y values. However, after running ~25 epochs of your provided code, the validation loss dropped to 290.xx with 13.xx of mae (And it is not converging yet). And I tried to use your model to predict some validation data, it works fine. Maybe you should test your model before you assume there is something went wrong.
I'm new to Tensorflow and I'm trying to rebuild a simple network, that I've built in Keras (TF backend), with Tensorflows Python API. It is a simple function approximator (z = sin(x + y)).
I've tried different architectures, optimizers and learning rates, but I'm not getting the new network to train properly. However in my eyes, the networks seem to be identical. Both get the exact same feature vectors and labels:
# making training data
start = 0
end = 2*np.pi
samp = 1000
num_samp = samp**2
step = end / samp
x_train = np.arange(start, end, step)
y_train = np.arange(start, end, step)
data = np.array(np.meshgrid(x_train,y_train)).T.reshape(-1,2)
z_label = np.sin(data[:,0] + data[:,1])
Here is the Keras model:
#start model
model = Sequential()
#stack layers
model.add(Dense(units=128, activation='sigmoid', input_dim=2, name='dense_1'))
model.add(Dense(units=64, activation='sigmoid', input_dim=128, name='dense_2'))
model.add(Dense(units=1, activation='linear', name='output'))
#compile model
model.compile(loss='mean_squared_error',
optimizer='sgd',
metrics=['accuracy'])
checkpointer = ModelCheckpoint(filepath='./weights/weights.h5',
verbose=1, save_best_only=True)
tensorboard = TensorBoard(log_dir="logs/{}".format(time()))
model.fit(data, z_label, epochs=20, batch_size=32,
shuffle='true',validation_data=(data_val, z_label_val),
callbacks=[checkpointer, tensorboard])
Here is the new network, built with Tensorflows Python API:
# hyperparameter
n_inputs = 2
n_hidden1 = 128
n_hidden2 = 64
n_outputs = 1
learning_rate = 0.01
# construction phase
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='input')
y = tf.placeholder(tf.float32, shape=(None), name="target")
hidden1 = tf.layers.dense(X, n_hidden1, name="hidden1", activation=tf.nn.sigmoid)
hidden2 = tf.layers.dense(hidden1, n_hidden2, name="hidden2", activation=tf.nn.sigmoid)
logits = tf.layers.dense(hidden2, n_outputs, activation='linear', name='output')
loss = tf.reduce_mean(tf.square(logits - y), name='loss')
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss, name='train')
init = tf.global_variables_initializer()
saver = tf.train.Saver()
# --- execution phase ---
n_epochs = 40
batch_size = 32
n_batches = int(num_samp/batch_size)
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
print("Epoch: ", epoch, " Running...")
loss_arr = np.array([])
for iteration in range( n_batches ):
start = iteration * batch_size
end = start + batch_size
sess.run(training_op, feed_dict={X: data[start:end], y: z_label[start:end] })
loss_arr = np.append(loss_arr, loss.eval(feed_dict={X: data[start:end, :], y: z_label[start:end]}))
mean_loss = np.mean(loss_arr)
print("Epoch: ", epoch, " Calculated ==> Loss: ", mean_loss)
While the Keras model train properly with a decreasing loss and proper test results, the new model converges pretty fast and stops learning. Accordingly the results are completely useless.
Am I building/training the the model incorrectly or is Keras doing anything in the background, that I'm not aware of?
Solved this issue. The problem was the shape of the label vector. It was a lying vector with shape (1000000,). While Keras is apparently capable of dealing with different shapes of output and label vectors, Tensorflow initialized the placeholder incorrectly and the loss function
loss = tf.reduce_mean(tf.square(logits - y), name='loss')
did't make sense anymore and thus training failed. Adding
z_label = z_label.reshape(-1,1)
reshaped the label vector to (1000000, 1) and solved it. Alternatively one can specify the shape of the placeholder more precisely
y = tf.placeholder(tf.float32, shape=(None,1), name="target")
I am new to Generative-Adversarial Networks (GAN) and Neural Networks in general.
Using Python and Keras, I want to apply GANs for Time-Series Prediction. My final goal also includes to detect anomalies in the time series.
I'm using the popular Air-Passangers time series data.
Here is the code I am using for time-series prediction. However, the result I get using GANs is bit uninterpretable for me and I think it needs some improvement.
Thanks for your help.
from __future__ import print_function, division
from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
#import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
import pandas as pd
import sys
import numpy as np
class GAN():
def __init__(self):
self.data_rows = 1
self.data_cols = 1
self.data_shape = (self.data_rows, self.data_cols)
self.latent_dim = 48
optimizer = Adam(0.0002, 0.5)
self.discriminator = self.build_discriminator()
self.discriminator.compile(loss='binary_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
self.generator = self.build_generator()
z = Input(shape=(self.latent_dim,))
data = self.generator(z)
self.discriminator.trainable = False
validity = self.discriminator(data)
self.combined = Model(z, validity)
self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)
def build_generator(self):
model = Sequential()
model.add(Dense(256, input_dim=self.latent_dim))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(512))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(1024))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(np.prod(self.data_shape), activation='linear'))
model.add(Reshape(self.data_shape))
model.summary()
noise = Input(shape=(self.latent_dim,))
data = model(noise)
return Model(noise, data)
def build_discriminator(self):
model = Sequential()
model.add(Flatten(input_shape=self.data_shape))
model.add(Dense(512))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(256))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(1, activation='sigmoid'))
model.summary()
data = Input(shape=self.data_shape)
validity = model(data)
return Model(data, validity)
def train(self, epochs, batch_size=128, sample_interval=50):
df = pd.read_csv("AirPassengers.csv")
ts = df[["#Passengers"]]
X_train = ts.as_matrix()
# Rescale -1 to 1
#X_train = X_train / 127.5 - 1.
X_train = np.expand_dims(X_train, axis=3)
print("X_train")
print(X_train.shape)
#print(X_train)
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for epoch in range(epochs):
idx = np.random.randint(0, X_train.shape[0], batch_size)
data_s = X_train[idx]
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
gen_data = self.generator.predict(noise)
d_loss_real = self.discriminator.train_on_batch(data_s, valid)
d_loss_fake = self.discriminator.train_on_batch(gen_data, fake)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# ---------------------
# Train Generator
# ---------------------
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
# Train the generator (to have the discriminator label samples as valid)
g_loss = self.combined.train_on_batch(noise, valid)
print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
if epoch % sample_interval == 0:
self.plot_gan_result(epoch, batch_size)
c = X_train.reshape(144, 1)
fig, axs = plt.subplots()
axs.plot(c, color = "blue", label = 'true')
def plot_gan_result(self, epoch, batch_size):
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
gen_data = self.generator.predict(noise)
b = gen_data.reshape(24, 1)
fig, axs = plt.subplots()
print("noise shape")
print(noise.shape)
print(noise[0])
axs.plot(b, color = "red", label = 'generated')
if __name__ == '__main__':
gan = GAN()
gan.train(epochs=30, batch_size=24, sample_interval=200)
I'm using this tutorial about autoencoders: https://blog.keras.io/building-autoencoders-in-keras.html
All the code is working, however the performance is very bad (the results are blurred) when I set 10e-5 for the regularization parameter, which is the parameter defined in the tutorial code. In fact, I need to decrease the regularization to 10e-8 to have a correct output.
My question is as follows: Why the result is so different from the tutorial? Data is the same and parameters are the same, I didn't expect a large difference.
I suspect that the default behavior of the Keras functions has been changed (automatic batch normalization performed in all cases?) from May 14th, 2016.
Outputs
With 10e-5 regularization (blurred); val_loss of 0.2967 after 50 epochs and 0.2774 after 100 epochs.
With 10e-8 regularization: val_loss of 0.1080 after 50 epochs and 0.1009 after 100 epochs.
With no regularization: val_loss of 0.1018 after 50 epochs and 0.0944 after 100 epochs.
Complete code (for reference)
# Source: https://blog.keras.io/building-autoencoders-in-keras.html
import numpy as np
np.random.seed(2713)
from keras.layers import Input, Dense
from keras.models import Model
from keras import regularizers
encoding_dim = 32
input_img = Input(shape=(784,))
# add a Dense layer with a L1 activity regularizer
encoded = Dense(encoding_dim, activation='relu',
activity_regularizer=regularizers.l1(10e-5))(input_img)
decoded = Dense(784, activation='sigmoid')(encoded)
autoencoder = Model(input_img, decoded)
# this model maps an input to its encoded representation
encoder = Model(input_img, encoded)
# create a placeholder for an encoded (32-dimensional) input
encoded_input = Input(shape=(encoding_dim,))
# retrieve the last layer of the autoencoder model
decoder_layer = autoencoder.layers[-1]
# create the decoder model
decoder = Model(encoded_input, decoder_layer(encoded_input))
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
from keras.datasets import mnist
(x_train, _), (x_test, _) = mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
print(x_train.shape)
print(x_test.shape)
autoencoder.fit(x_train, x_train,
epochs=100,
batch_size=256,
shuffle=True,
validation_data=(x_test, x_test))
# encode and decode some digits
# note that we take them from the *test* set
encoded_imgs = encoder.predict(x_test)
decoded_imgs = decoder.predict(encoded_imgs)
# use Matplotlib (don't ask)
import matplotlib.pyplot as plt
n = 10 # how many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
# display original
ax = plt.subplot(2, n, i + 1)
plt.imshow(x_test[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
# display reconstruction
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs[i].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
I have the same issue. And it is on GitHub here https://github.com/keras-team/keras/issues/5414
It seems like you were correct in just changing the constant.