Related
I understand the Vanishing and exploding gradients problem in Vanilla RNNs and why this happens. However, I would like to create this problem purposefully to understand in a better way. I have taken a below code from https://www.datatechnotes.com/2018/12/rnn-example-with-keras-simplernn-in.html.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN
# convert into dataset matrix
def convertToMatrix(data, step):
X, Y =[], []
for i in range(len(data)-step):
d=i+step
X.append(data[i:d,])
Y.append(data[d,])
return np.array(X), np.array(Y)
step = 4
N = 1000
Tp = 800
t=np.arange(0,N)
x=np.sin(0.02*t)+2*np.random.rand(N)
df = pd.DataFrame(x)
df.head()
plt.plot(df)
plt.show()
values=df.values
train,test = values[0:Tp,:], values[Tp:N,:]
# add step elements into train and test
test = np.append(test,np.repeat(test[-1,],step))
train = np.append(train,np.repeat(train[-1,],step))
trainX,trainY =convertToMatrix(train,step)
testX,testY =convertToMatrix(test,step)
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
model = Sequential()
model.add(SimpleRNN(units=32, input_shape=(1,step), activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='rmsprop')
model.summary()
model.fit(trainX,trainY, epochs=100, batch_size=16, verbose=2)
trainPredict = model.predict(trainX)
testPredict= model.predict(testX)
predicted=np.concatenate((trainPredict,testPredict),axis=0)
trainScore = model.evaluate(trainX, trainY, verbose=0)
print(trainScore)
How should I modify this code to create this problem? Thank you.
Vanishing gradient is the problem when we use the sigmoid activation function. If you change relu to sigmoid, you may encounter vanishing gradient problem.
model = Sequential()
model.add(SimpleRNN(units=32, input_shape=(1,step), activation="sigmoid"))
model.add(Dense(8, activation="sigmoid"))
after a few days where my code was reproducible everytime - it is now not! i don't know what happend, i just changed some lines of codes and i don't know how to fix it!
# Code reproduzierbar machen
import numpy as np
import os
import random as rn
import tensorflow as tf
import keras
from keras import backend as K
#-----------------------------Keras reproducible------------------#
SEED = 1234
tf.set_random_seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
rn.seed(SEED)
session_conf = tf.ConfigProto(
intra_op_parallelism_threads=1,
inter_op_parallelism_threads=1
)
sess = tf.Session(
graph=tf.get_default_graph(),
config=session_conf
)
K.set_session(sess)
# Importiere Datasets (Training und Test)
import pandas as pd
poker_train = pd.read_csv("C:/Users/elihe/Documents/Studium Master/WS 19 und 20/Softwareprojekt/poker-hand-training-true.data")
poker_test = pd.read_csv("C:/Users/elihe/Documents/Studium Master/WS 19 und 20/Softwareprojekt/poker-hand-testing.data")
X_tr = poker_train.iloc[:, 0:10]
y_train = poker_train.iloc[:, 10:11]
X_te = poker_test.iloc[:, 0:10]
y_test = poker_test.iloc[:, 10:11]
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_tr)
X_test = sc.transform(X_te)
# NN mit Keras erstellen
import keras
from keras.models import Sequential
from keras.layers import Dense
nen = Sequential()
nen.add(Dense(100, input_dim = 10, activation = 'relu'))
nen.add(Dense(50, activation = 'relu'))
nen.add(Dense(10, activation = 'softmax'))
# Kompilieren
from keras import metrics
nen.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
nen_fit = nen.fit(X_train, y_train,epochs=2, batch_size=50, verbose=1, validation_split = 0.2, shuffle = False)
I took just 2 epochs so that i can see immediately if my Output is the same, normally there would be 500 epochs. The first lines of code made it until today reproducible, but now it is not! I changed the part with the X_te and X_tr, because first i made a OneHotEncoding with the classes y_train and y_test, but now i am not doing it. Also i changed the activation functions from sigmoid to relu and the optimizer from RMSprop to adam. I don't know what to do:(
I am new to machine learning and keras library and I made a CNN code for regression like below.
%matplotlib inline
from __future__ import division
import numpy as np
from numpy.random import rand
import matplotlib.pyplot as plt
def initial_spin_state(N):
state = np.random.choice((0.11111, 0.99999), (N, N))
return state
def metropolis_algorithm(config, beta):
N = len(config)
for i in range(N):
for j in range(N):
a = np.random.randint(0, N)
b = np.random.randint(0, N)
s = config[a, b]
near=config[(a+1)%N,b] + config[a,(b+1)%N] + config[(a-1)%N,b] + config[a,(b-1)%N]
delta = 2 * s *near
if delta < 0:
s *= -1
elif rand() < np.exp(-delta * beta):
s *= -1
config[a, b] = s
return config
def get_energy(config):
energy = 0
N = len(config)
for i in range(N):
for j in range(N):
S = config[i, j]
near = config[(i+1)%N, j] + config[i,(j+1)%N] + config[(i-1)%N, j] + config[i,(j-1)%N]
energy += near*S
return energy
x_train = []
y_train = []
for i in range(50000):
config = initial_spin_state(16)
energy = get_energy(config)
x_train.append(config)
y_train.append(energy)
x_train = np.array(x_train)
y_train = np.array(y_train)
print(x_train.shape)
print(y_train.shape)
x_test = []
y_test = []
for j in range(20000):
config = initial_spin_state(16)
energy = get_energy(config)
x_test.append(config)
y_test.append(energy)
x_test = np.array(x_test)
y_test = np.array(y_test)
print(x_test.shape)
print(y_test.shape)
x_train = x_train.reshape(50000, 16, 16, 1)
x_test = x_test.reshape(20000, 16, 16, 1)
print(x_train.shape)
print(x_test.shape)
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adadelta
from keras.regularizers import l2
model = Sequential()
model.add(Conv2D(32, (2, 2), input_shape = (16, 16, 1), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Conv2D(16, (2, 2), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Flatten())
model.add(Dense(512, activation = 'relu'))
#model.add(Dense(1024, activation = 'relu'))
model.add(Dense(1))
np.random.seed(0)
model.summary()
model.compile(loss = "mse", metrics = ['accuracy'], optimizer = 'adam')
%%time
hist = model.fit(x_train, y_train, epochs = 200, batch_size = 500,
validation_data = (x_test, y_test), verbose = 2)
import matplotlib.pyplot as plt
plt.plot(hist.history['acc'], '_b', label = "training")
plt.plot(hist.history['val_acc'], 'r:', label = "test")
plt.legend()
plt.grid("on")
plt.show()
this code is for image input, and contiuous energy value output.
so if I put a image(ising configuration) to CNN, it should predict a energy for the configuration.
the problem is..
when I train CNN, training loss and validation loss is decreased very slowly.
of course, training accuracy and validation accuracy is incread very slowly.
and, sometimes only training accuracy is increasd, val-accuracy is not increased.
genious guys.. what`s wrong with my code??
teach me plz
First of all, since you are doing regression problem, I dont think its a good idea to use acc as your metric, instead, you might consider using mean absolute error mae as your matric.
The loss you are using is mse (mean squared error), so the value would be pretty large especially when you are not normalising your y values. However, after running ~25 epochs of your provided code, the validation loss dropped to 290.xx with 13.xx of mae (And it is not converging yet). And I tried to use your model to predict some validation data, it works fine. Maybe you should test your model before you assume there is something went wrong.
I have to perform a simple FSGM attack to a convolutional neural network. The code for the CNN works correctly, and the model is saved without a problem, but when i try to perform the attack an error is shown.
HERE'S THE CODE FOR THE CNN
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras.utils import to_categorical
import json
import tensorflow as tf
#Using TensorFlow backend.
#download mnist data and split into train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#plot the first image in the dataset
plt.imshow(X_train[0])
#check image shape
X_train[0].shape
#reshape data to fit model
X_train = X_train.reshape(60000,28,28,1)
X_test = X_test.reshape(10000,28,28,1)
#one-hot encode target column
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
y_train[0]
#create model
model = Sequential()
#add model layers
model.add(Conv2D(32, kernel_size=(5,5), activation='relu', input_shape= (28,28,1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64, kernel_size=(5,5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(10, activation='softmax'))
#compile model using accuracy as a measure of model performance
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics= ['accuracy'])
#train model
model.fit(X_train, y_train,validation_data=(X_test, y_test), epochs=5)
json.dump({'model':model.to_json()},open("model.json", "w"))
model.save_weights("model_weights.h5")
THEN I TRY TO PERFORM THE ATTACK WITH THE FOLLOWING CODE:
import json
import foolbox
import keras
import numpy as np
from keras import backend
from keras.models import load_model
from keras.datasets import mnist
from keras.utils import np_utils
from foolbox.attacks import FGSM
from foolbox.criteria import Misclassification
from foolbox.distances import MeanSquaredDistance
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
import numpy as np
import tensorflow as tf
from keras.models import model_from_json
import os
############## Loading the model and preprocessing #####################
backend.set_learning_phase(False)
model = tf.keras.models.model_from_json(json.load(open("model.json"))["model"],custom_objects={})
model.load_weights("model_weights.h5")
fmodel = foolbox.models.KerasModel(model, bounds=(0,1))
_,(images, labels) = mnist.load_data()
images = images.reshape(10000,28,28)
images= images.astype('float32')
images /= 255
######################### Attacking the model ##########################
attack=foolbox.attacks.FGSM(fmodel, criterion=Misclassification())
adversarial=attack(images[12],labels[12]) # for single image
adversarial_all=attack(images,labels) # for all the images
adversarial =adversarial.reshape(1,28,28,1) #reshaping it for model prediction
model_predictions = model.predict(adversarial)
print(model_predictions)
########################## Visualization ################################
images=images.reshape(10000,28,28)
adversarial =adversarial.reshape(28,28)
plt.figure()
plt.subplot(1,3,1)
plt.title('Original')
plt.imshow(images[12])
plt.axis('off')
plt.subplot(1, 3, 2)
plt.title('Adversarial')
plt.imshow(adversarial)
plt.axis('off')
plt.subplot(1, 3, 3)
plt.title('Difference')
difference = adversarial - images[124]
plt.imshow(difference / abs(difference).max() * 0.2 + 0.5)
plt.axis('off')
plt.show()
this error is shown when the adversarial examples are generated:
c_api.TF_GetCode(self.status.status))
InvalidArgumentError: Matrix size-incompatible: In[0]: [1,639232], In[1]: [1024,10]
[[{{node dense_4_5/MatMul}}]]
[[{{node dense_4_5/BiasAdd}}]]
What could it be?
here is my solution.
First of all modify the model code as follows
import tensorflow as tf
import json
# download mnist data and split into train and test sets
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
# reshape data to fit model
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
X_train, X_test = X_train/255, X_test/255
# one-hot encode target column
y_train = tf.keras.utils.to_categorical(y_train)
y_test = tf.keras.utils.to_categorical(y_test)
# create model
model = tf.keras.models.Sequential()
# add model layers
model.add(tf.keras.layers.Conv2D(32, kernel_size=(5, 5),
activation='relu', input_shape=(28, 28, 1)))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Conv2D(64, kernel_size=(5, 5), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(10, activation='softmax'))
# compile model using accuracy as a measure of model performance
model.compile(optimizer='adam', loss='categorical_crossentropy',
metrics=['accuracy'])
# train model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5)
json.dump({'model': model.to_json()}, open("model.json", "w"))
model.save_weights("model_weights.h5")
You just forgot to divide each pixel by the maximum value of RGB (255)
As for the attacker code
import json
import foolbox
from foolbox.attacks import FGSM
from foolbox.criteria import Misclassification
import numpy as np
import tensorflow as tf
############## Loading the model and preprocessing #####################
tf.enable_eager_execution()
tf.keras.backend.set_learning_phase(False)
model = tf.keras.models.model_from_json(
json.load(open("model.json"))["model"], custom_objects={})
model.load_weights("model_weights.h5")
model.compile(optimizer='adam', loss='categorical_crossentropy',
metrics=['accuracy'])
_, (images, labels) = tf.keras.datasets.mnist.load_data()
images = images.reshape(images.shape[0], 28, 28, 1)
images = images/255
images = images.astype(np.float32)
fmodel = foolbox.models.TensorFlowEagerModel(model, bounds=(0, 1))
######################### Attacking the model ##########################
attack = foolbox.attacks.FGSM(fmodel, criterion=Misclassification())
adversarial = np.array([attack(images[0], label=labels[0])])
model_predictions = model.predict(adversarial)
print('real label: {}, label prediction; {}'.format(
labels[0], np.argmax(model_predictions)))
I used TensorFlowEagerModel instead of KerasModel for simplicty. The error you were encountering was due to the fact that model.predict expects a 4d matrix while you were passing a 3d matrix, so I just wrapped up the attack to the image example into a numpy array to make it 4d.
Hope it helps
I'm trying to train a deep classifier in Keras both with and without pretraining of the hidden layers via stacked autoencoders. My problem is that the pretraining seems to drastically degrade performance (i.e. if pretrain is set to False in the code below the training error of the final classification layer converges much faster). This seems completely outrageous to me given that pretraining should only initialize the weights of the hidden layers and I don't see how that could completely kill the models performance even if that initialization does not work very well. I can not include the specific dataset I used but the effect should occur for any appropriate dataset (e.g. minist). What is going on here and how can I fix it?
EDIT: code is now reproducible with the MNIST data, final line prints change in loss function, which is significantly lower with pre-training.
I have also slightly modified the code and added sample learning curves below:
from functools import partial
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.regularizers import l2
from keras.utils import to_categorical
(inputs_train, targets_train), _ = mnist.load_data()
inputs_train = inputs_train[:1000].reshape(1000, 784)
targets_train = to_categorical(targets_train[:1000])
hidden_nodes = [256] * 4
learning_rate = 0.01
regularization = 1e-6
epochs = 30
def train_model(pretrain):
model = Sequential()
layer = partial(Dense,
activation='sigmoid',
kernel_initializer='random_normal',
kernel_regularizer=l2(regularization))
for i, hn in enumerate(hidden_nodes):
kwargs = dict(units=hn, name='hidden_{}'.format(i + 1))
if i == 0:
kwargs['input_dim'] = inputs_train.shape[1]
model.add(layer(**kwargs))
if pretrain:
# train autoencoders
inputs_train_ = inputs_train.copy()
for i, hn in enumerate(hidden_nodes):
autoencoder = Sequential()
autoencoder.add(layer(units=hn,
input_dim=inputs_train_.shape[1],
name='hidden'))
autoencoder.add(layer(units=inputs_train_.shape[1],
name='decode'))
autoencoder.compile(optimizer=SGD(lr=learning_rate, momentum=0.9),
loss='binary_crossentropy')
autoencoder.fit(
inputs_train_,
inputs_train_,
batch_size=32,
epochs=epochs,
verbose=0)
autoencoder.pop()
model.layers[i].set_weights(autoencoder.layers[0].get_weights())
inputs_train_ = autoencoder.predict(inputs_train_)
num_classes = targets_train.shape[1]
model.add(Dense(units=num_classes,
activation='softmax',
name='classify'))
model.compile(optimizer=SGD(lr=learning_rate, momentum=0.9),
loss='categorical_crossentropy')
h = model.fit(
inputs_train,
targets_train,
batch_size=32,
epochs=epochs,
verbose=0)
return h.history['loss']
plt.plot(train_model(pretrain=False), label="Without Pre-Training")
plt.plot(train_model(pretrain=True), label="With Pre-Training")
plt.xlabel("Epoch")
plt.ylabel("Cross-Entropy")
plt.legend()
plt.show()