I wrote a little script that has Neural Network approximate polynomial, and plots the result every epoch, but the problem is that I want that every iteration the new plot will overwrite the previous plot, so I can see how it changes over training.
I searched around the web and found that I need to use either ion() or isinteractive() or clear(), but I tried them all and it still does not work.
Edit:
For the sake of clarification, I am using Jupyter notebook, so I want it to work on this platform.
Here's my code:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from numpy import asarray
from matplotlib import pyplot
from tensorflow.keras.layers import Conv1D
import tensorflow
class myCallback(tensorflow.keras.callbacks.Callback):
def on_train_begin(self, logs={}):
pyplot.ion()
def on_epoch_end(self, epoch, logs=None):
yhat = model.predict(x)
# inverse transforms
x_plot = scale_x.inverse_transform(x)
y_plot = scale_y.inverse_transform(y)
yhat_plot = scale_y.inverse_transform(yhat)
# report model error
print('MSE: %.3f' % mean_squared_error(y_plot, yhat_plot))
# plot x vs y
plt = pyplot.scatter(x_plot,y_plot, label='Actual')
# plot x vs yhat
pyplot.scatter(x_plot,yhat_plot, label='Predicted')
pyplot.title('Input (x) versus Output (y)')
pyplot.xlabel('Input Variable (x)')
pyplot.ylabel('Output Variable (y)')
pyplot.legend()
pyplot.show()
# define the dataset
x = asarray([i for i in range(-50,51)])
y = asarray([i**3 for i in x])
print(x.min(), x.max(), y.min(), y.max())
# reshape arrays into into rows and cols
x = x.reshape((len(x), 1))
y = y.reshape((len(y), 1))
# separately scale the input and output variables
scale_x = MinMaxScaler()
x = scale_x.fit_transform(x)
scale_y = MinMaxScaler()
y = scale_y.fit_transform(y)
print(x.min(), x.max(), y.min(), y.max())
# design the neural network model
model = Sequential()
model.add(Dense(10, input_dim=1, activation='relu', kernel_initializer='he_uniform'))
#Conv1D(32, 5, activation='relu')
model.add(Dense(10, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(1))
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.01)
# define the loss function and optimization algorithm
model.compile(loss='mse', optimizer=opt)
# ft the model on the training dataset
model.fit(x, y, epochs=10, batch_size=10, verbose=0, callbacks=[myCallback()])
# make predictions for the input data
Your help would be highly appreciated!
You are getting a new plot after each epoch but the changes are not really visible because your model is too weak. Here is an example with significant differences:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from numpy import asarray
from matplotlib import pyplot
import tensorflow
from IPython.display import clear_output
class myCallback(tensorflow.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
clear_output(wait=True)
yhat = model.predict(x)
# inverse transforms
x_plot = scale_x.inverse_transform(x)
y_plot = scale_y.inverse_transform(y)
yhat_plot = scale_y.inverse_transform(yhat)
# report model error
print('MSE: %.3f' % mean_squared_error(y_plot, yhat_plot))
# plot x vs y
plt = pyplot.scatter(x_plot,y_plot, label='Actual')
# plot x vs yhat
pyplot.scatter(x_plot,yhat_plot, label='Predicted')
pyplot.title('Input (x) versus Output (y)')
pyplot.xlabel('Input Variable (x)')
pyplot.ylabel('Output Variable (y)')
pyplot.legend()
pyplot.show()
# define the dataset
x = asarray([i for i in range(-50,51)])
y = asarray([i**3 for i in x])
print(x.shape)
print(x.min(), x.max(), y.min(), y.max())
# reshape arrays into into rows and cols
x = x.reshape((len(x), 1))
y = y.reshape((len(y), 1))
# separately scale the input and output variables
scale_x = MinMaxScaler()
x = scale_x.fit_transform(x)
scale_y = MinMaxScaler()
y = scale_y.fit_transform(y)
print(x.min(), x.max(), y.min(), y.max())
# design the neural network model
model = Sequential()
model.add(Dense(64, input_dim=1, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1))
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.01)
# define the loss function and optimization algorithm
model.compile(loss='mse', optimizer=opt)
# ft the model on the training dataset
model.fit(x, y, epochs=50, batch_size=10, verbose=0, callbacks=[myCallback()])
# make predictions for the input data
Here is the plot of the final epoch:
Related
I have trained a deep neural network for regression, with 2 input neurons, 1 output neuron and some hidden layers, as in the following (Tensorflow 2):
import numpy as np
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import losses
import tensorflow as tf
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
#Creation of a "synthetic" dataset
x1 = np.linspace(0, 6*np.pi, 2000)
x2 = 1.5 * np.linspace(0, 6*np.pi, 2000)
y = np.sin(x1) + np.cos(x2)
data = pd.DataFrame(np.array([x1, x2, y]).transpose(), columns = ['x1', 'x2', 'y'])
# train/test split and definition of the normalization over the training set
train_df, test_df = train_test_split(data, test_size=0.2, random_state=0)
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(train_df.iloc[:, :-1]))
#Definition of the DNN structure
def build_and_compile_model(norm):
model = keras.Sequential([
norm,
layers.Dense(64, input_dim=2, activation='LeakyReLU'),
layers.Dense(64, activation='LeakyReLU'),
layers.Dense(32, activation='LeakyReLU'),
layers.Dense(32, activation='LeakyReLU'),
layers.Dense(16, activation='LeakyReLU'),
layers.Dense(16, activation='LeakyReLU'),
layers.Dense(8, activation='LeakyReLU'),
layers.Dense(1, activation = 'linear')
])
model.compile(loss='mean_absolute_error',
optimizer=tf.keras.optimizers.Adam(0.001))
return model
model = build_and_compile_model(normalizer)
# Train of the DNN
%%time
history = model.fit(
train_df.iloc[:, :-1],
train_df.iloc[:, -1],
validation_split=0.2,
verbose=2, epochs=100)
Now, if y is the prediction of the network, I want to compute partial derivatives dy/dx1 and dy/dx2. To achieve this, I have tried
x = tf.constant(data.iloc[:, :-1].values)
with tf.GradientTape(persistent = True) as t:
t.watch(x)
y = model(x)
dy_dx = t.gradient(y, x)
dy_dx.numpy()
If I plot the y as a function of x1 (or of x2), and I compare it with the analytical result from the definition I have given above, I get a good agreement:
plt.figure(figsize = (5, 3), dpi = 190)
plt.plot(x1, model.predict(x), label = 'model prediction')
plt.plot(x1, np.sin(x1) + np.cos(x2), label = 'analytical result')
plt.xlabel('$x_1$')
plt.legend()
plt.show()
On the contrary, if I plot the first column of the vector dy_dx and I compare it with the analytical derivative (dy/dx1 = cos(x1)), they do not match (similar situation for the other partial derivative):
plt.figure(figsize = (5, 3), dpi = 190)
plt.plot(x1, dy_dx[:, 0], label = 'autodiff result')
plt.plot(x1, np.cos(x1), label = 'analytical result')
plt.xlabel('$x_1$')
plt.legend()
plt.show()
If I compare this gradient with the finite differences, I get
plt.figure(figsize = (5, 3), dpi = 190)
plt.plot(x1, dy_dx[:, 0], label = 'autodiff result')
plt.plot(x1[0:-1], np.diff(y.numpy()[:, 0])/.1e-1, label = 'finite differences')
plt.xlabel('$x_1$')
plt.legend()
plt.show()
So, since the autodiff result and the finite difference result are equal up to a scaling constant, this means that autodiff is not computing the partial derivative dy/dx1, but it is only computing the total derivative, plotting it over one of the variables.
So, my question remains: how to compute partial derivatives?
I am running many iterations of a train so I can smooth out the loss curves. I would like an elegant way to average all the losses from history.history['loss'] but haven't found an easy way to do it. Here's a minimal example:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt
(x_train, y_train), _ = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32')/255
y_train = to_categorical(y_train, num_classes=10)
def get_model():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(10, activation='sigmoid',
input_shape=(784,)))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.compile(loss="categorical_crossentropy", optimizer="sgd",
metrics = ['accuracy'])
return model
all_trains = []
for i in range(3):
model = get_model()
history = model.fit(x_train, y_train, epochs=2)
all_trains.append(history)
If I wanted to plot just one example, I would do this:
plt.plot(history.epoch, history.history['loss'])
plt.show()
But instead, I want to average the loss from each train in all_trains and plot them. I can think of many clunky ways to do it but would like to find a clean way.
You could simply do:
import numpy as np
import matplotlib.pyplot as plt
losses = [h.history['loss'] for h in all_trains]
mean_loss = np.mean(losses, axis=0)
std = np.std(losses, axis=0)
plt.errorbar(range(len(mean_loss)), mean_loss, yerr=std, capsize=5, marker='o')
plt.title('Average loss per epoch (± std)')
plt.xlabel('Epoch')
plt.ylabel('Categorical crossentropy')
plt.show()
I also added the standard deviation in this case.
I'm new to Keras and I'm currently using it to build a neural network that will predict a given function, one with Gaussian noise and without. Here's the implementation with the GaussianNoise layer:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.keras as keras
from sklearn.metrics import mean_squared_error
from keras.layers import Dense, GaussianNoise
from scipy.interpolate import make_interp_spline, BSpline
x = [i for i in range(-5, 5)]
x1 = np.asarray(x)
xnew = np.linspace(-5.5, 5.5, 300)
y = [(i**4.0 - 22 * (i ** 2.0)) for i in x]
xnew = np.linspace(-5.5, 5.5, 300)
spl = make_interp_spline(x, y, k=3)
power_smooth = spl(xnew)
y = [(i**4.0 - 22 * (i ** 2.0)) for i in xnew]
y1 = np.asarray(y)
input_layer = keras.layers.Input(shape=(1,))
dense = Dense(10, activation='relu')(input_layer)
gauss = GaussianNoise(stddev=25)(dense)
dense = Dense(10, activation='relu')(gauss)
dense = Dense(10, activation='relu')(dense)
dense = Dense(10, activation='relu')(dense)
dense = Dense(10, activation='relu')(dense)
output = Dense(1)(dense)
model = keras.Model(inputs=input_layer, outputs=output)
model.compile(loss='mse', optimizer='adam')
model.fit(xnew, y1, epochs=1000, batch_size=10, verbose=0)
yhat = model.predict(xnew)
print('MSE: %.3f' % mean_squared_error(y1, yhat))
spl1 = make_interp_spline(xnew, yhat, k=3)
power_smooth1 = spl1(xnew)
plt.plot(xnew,power_smooth1, label='Predicted')
plt.plot(xnew,power_smooth, label = 'Actual')
plt.title('Input (x) versus Output (y)')
plt.xlabel('Input Variable (x)')
plt.ylabel('Output Variable (y)')
plt.legend()
plt.show()
When I remove the GaussianNoise layer, it produced a decent prediction:
However, I then added the GaussianNoise layer back into my code and this is the result:
I'm not sure what's going on here for this layer to obscure the prediction so badly. How could I remedy this? Any help would be appreciated. Thank you!
The X represents features and Y represents labels for image classification. I am using CNN for binary image classification purpose like that of cats and dogs.
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
import pickle
import numpy as np
from sklearn.metrics import confusion_matrix
X = np.array(pickle.load(open("X.pickle","rb")))
Y = np.array(pickle.load(open("Y.pickle","rb")))
x_test = np.array(pickle.load(open("x_test.pickle","rb")))
y_test = np.array(pickle.load(open("y_test.pickle","rb")))
# X = np.array(pickle.load(open("x_train.pickle","rb")))
# Y = np.array(pickle.load(open("y_train.pickle","rb")))
#scaling our image data
X = X/255.0
model = Sequential()
#model.add(Conv2D(64 ,(3,3), input_shape = X.shape[1:]))
model.add(Conv2D(64 ,(3,3), input_shape = X.shape[1:]))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Conv2D(128 ,(3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Conv2D(256 ,(3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Conv2D(512 ,(3,3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Flatten())
model.add(Dense(2048))
model.add(Activation("relu"))
model.add(Dropout(0.5))
np.argmax(model.add(Dense(2)))
model.add(Activation('softmax'))
model.compile(loss="binary_crossentropy",
optimizer = "adam",
metrics = ['accuracy'])
predicted = model.predict(x_test)
print(predicted.shape)
print(y_test.shape)
print(confusion_matrix(y_test,predicted))
The output of predicted and y_test shapes are (90, 2) and
(90,) and when I used confusion matrix it flushes:-
ValueError: Classification metrics can't handle a mix of binary and continuous-multioutput targets.
You can use scikit-learn:
from sklearn.metrics import confusion_matrix
predicted = model.predict(x_test)
print(confusion_matrix(y_test,predicted.round()))
Here's scikit learn documentation for confusion matrix:
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html
Edit:
Advice:
Prefer using Softmax activation on output layer and whether it is binary or multi label classification. Use softmax with number of nodes in output layer = no of classes.
Here's one example how we can get the confusion matrix using the PyCM library:
you need to install the pycm library in anaconda or by using pip3
conda install -c sepandhaghighi pycm
from pycm import *
def plot_confusion_matrix(cm, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
"""
This function modified to plots the ConfusionMatrix object.
Normalization can be applied by setting 'normalize=True'.
Code Reference :
http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
"""
plt_cm = []
for i in cm.classes :
row=[]
for j in cm.classes:
row.append(cm.table[i][j])
plt_cm.append(row)
plt_cm = np.array(plt_cm)
if normalize:
plt_cm = plt_cm.astype('float') / plt_cm.sum(axis=1)[:, np.newaxis]
plt.imshow(plt_cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(cm.classes))
plt.xticks(tick_marks, cm.classes, rotation=45)
plt.yticks(tick_marks, cm.classes)
fmt = '.2f' if normalize else 'd'
thresh = plt_cm.max() / 2.
for i, j in itertools.product(range(plt_cm.shape[0]), range(plt_cm.shape[1])):
plt.text(j, i, format(plt_cm[i, j], fmt),
horizontalalignment="center",
color="white" if plt_cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('Actual')
plt.xlabel('Prediction')
.....
svm.fit(x_train, y_train)
y_predicted = svm.predict(x_test)
#Get the confusion Matrix:
cm = ConfusionMatrix(actual_vector=y_test, predict_vector=y_predicted)
#Print classes
print("[INFO] Clases")
print(cm.classes)
#Print the table of cmatrix
print(cm.table)
#indicators of the confusion matrix
print(cm)
greetings!
After spending days failing to use neural network for Q learning, I decided to go back to the basics and do a simple function approximation to see if everything was working correctly and see how some parameters affected the learning process.
Here is the code that I came up with
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import random
import numpy
from sklearn.preprocessing import MinMaxScaler
regressor = Sequential()
regressor.add(Dense(units=20, activation='sigmoid', kernel_initializer='uniform', input_dim=1))
regressor.add(Dense(units=20, activation='sigmoid', kernel_initializer='uniform'))
regressor.add(Dense(units=20, activation='sigmoid', kernel_initializer='uniform'))
regressor.add(Dense(units=1))
regressor.compile(loss='mean_squared_error', optimizer='sgd')
#regressor = ExtraTreesRegressor()
N = 5000
X = numpy.empty((N,))
Y = numpy.empty((N,))
for i in range(N):
X[i] = random.uniform(-10, 10)
X = numpy.sort(X).reshape(-1, 1)
for i in range(N):
Y[i] = numpy.sin(X[i])
Y = Y.reshape(-1, 1)
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()
X = X_scaler.fit_transform(X)
Y = Y_scaler.fit_transform(Y)
regressor.fit(X, Y, epochs=2, verbose=1, batch_size=32)
#regressor.fit(X, Y.reshape(5000,))
x = numpy.mgrid[-10:10:100*1j]
x = x.reshape(-1, 1)
y = numpy.mgrid[-10:10:100*1j]
y = y.reshape(-1, 1)
x = X_scaler.fit_transform(x)
for i in range(len(x)):
y[i] = regressor.predict(numpy.array([x[i]]))
plt.figure()
plt.plot(X_scaler.inverse_transform(x), Y_scaler.inverse_transform(y))
plt.plot(X_scaler.inverse_transform(X), Y_scaler.inverse_transform(Y))
The problem is that all my predictions are around 0 in value. As you can see I used an ExtraTreesRegressor from sklearn (commented lines) to check that the protocol is actually correct. So what is wrong with my neural network ? Why is it not working ?
(The actual problem that I'm trying to solve is to compute the Q function for the mountain car problem using neural network. How is it different from this function approximator ?)
With these changes:
Activations to relu
Remove kernel_initializer (i.e. leave the default 'glorot_uniform')
Adam optimizer
100 epochs
i.e.
regressor = Sequential()
regressor.add(Dense(units=20, activation='relu', input_dim=1))
regressor.add(Dense(units=20, activation='relu'))
regressor.add(Dense(units=20, activation='relu'))
regressor.add(Dense(units=1))
regressor.compile(loss='mean_squared_error', optimizer='adam')
regressor.fit(X, Y, epochs=100, verbose=1, batch_size=32)
and the rest of your code unchanged, here is the result:
Tinker, again and again...
A more concise version of your code that works:
def data_gen():
while True:
x = (np.random.random([1024])-0.5) * 10
y = np.sin(x)
yield (x,y)
regressor = Sequential()
regressor.add(Dense(units=20, activation='tanh', input_dim=1))
regressor.add(Dense(units=20, activation='tanh'))
regressor.add(Dense(units=20, activation='tanh'))
regressor.add(Dense(units=1, activation='linear'))
regressor.compile(loss='mse', optimizer='adam')
regressor.fit_generator(data_gen(), epochs=3, steps_per_epoch=128)
x = (np.random.random([1024])-0.5)*10
x = np.sort(x)
y = np.sin(x)
plt.plot(x, y)
plt.plot(x, regressor.predict(x))
plt.show()
Changes made: replacing low layer activations with hyperbolic tangents, replacing the static dataset with a random generator, replacing sgd with adam. That said, there still are problems with other parts of your code that I haven't been able to locate yet (most likely your scaler and random process).
I managed to get a good approximation by changing the architecture and the training as in the following code. It's a bit of an overkill but at least I know where the problem was coming from.
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import random
import numpy
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import ExtraTreesRegressor
from keras import optimizers
regressor = Sequential()
regressor.add(Dense(units=500, activation='sigmoid', kernel_initializer='uniform', input_dim=1))
regressor.add(Dense(units=500, activation='sigmoid', kernel_initializer='uniform'))
regressor.add(Dense(units=1, activation='sigmoid'))
regressor.compile(loss='mean_squared_error', optimizer='adam')
#regressor = ExtraTreesRegressor()
N = 5000
X = numpy.empty((N,))
Y = numpy.empty((N,))
for i in range(N):
X[i] = random.uniform(-10, 10)
X = numpy.sort(X).reshape(-1, 1)
for i in range(N):
Y[i] = numpy.sin(X[i])
Y = Y.reshape(-1, 1)
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()
X = X_scaler.fit_transform(X)
Y = Y_scaler.fit_transform(Y)
regressor.fit(X, Y, epochs=50, verbose=1, batch_size=2)
#regressor.fit(X, Y.reshape(5000,))
x = numpy.mgrid[-10:10:100*1j]
x = x.reshape(-1, 1)
y = numpy.mgrid[-10:10:100*1j]
y = y.reshape(-1, 1)
x = X_scaler.fit_transform(x)
for i in range(len(x)):
y[i] = regressor.predict(numpy.array([x[i]]))
plt.figure()
plt.plot(X_scaler.inverse_transform(x), Y_scaler.inverse_transform(y))
plt.plot(X_scaler.inverse_transform(X), Y_scaler.inverse_transform(Y))
However I'm still baffled that I found papers saying that they were using only two hidden layers of five neurons to approximate the Q function of the mountain car problem and training their network for only a few minutes and get good results. I will try changing my batch size in my original problem to see what results I can get but I'm not very optimistic