I am running many iterations of a train so I can smooth out the loss curves. I would like an elegant way to average all the losses from history.history['loss'] but haven't found an easy way to do it. Here's a minimal example:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from matplotlib import pyplot as plt
(x_train, y_train), _ = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32')/255
y_train = to_categorical(y_train, num_classes=10)
def get_model():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(10, activation='sigmoid',
input_shape=(784,)))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.compile(loss="categorical_crossentropy", optimizer="sgd",
metrics = ['accuracy'])
return model
all_trains = []
for i in range(3):
model = get_model()
history = model.fit(x_train, y_train, epochs=2)
all_trains.append(history)
If I wanted to plot just one example, I would do this:
plt.plot(history.epoch, history.history['loss'])
plt.show()
But instead, I want to average the loss from each train in all_trains and plot them. I can think of many clunky ways to do it but would like to find a clean way.
You could simply do:
import numpy as np
import matplotlib.pyplot as plt
losses = [h.history['loss'] for h in all_trains]
mean_loss = np.mean(losses, axis=0)
std = np.std(losses, axis=0)
plt.errorbar(range(len(mean_loss)), mean_loss, yerr=std, capsize=5, marker='o')
plt.title('Average loss per epoch (± std)')
plt.xlabel('Epoch')
plt.ylabel('Categorical crossentropy')
plt.show()
I also added the standard deviation in this case.
Related
I'm a rookie at machine learning so please bear with me.
I have a model that trains images and classifies them in 3 different classes. I'm trying to get the confusion matrix for the test data, but either I don't understand it or it's not making any sense. When the model is done training after 200 epochs it shows that the accuracy is around 65%:
loss: 1.5386 - accuracy: 0.6583
But then when the confusion matrix is printed like this:
[[23 51 42]
[20 27 25]
[47 69 56]]
Which isn't correct because the "true" results (23, 27 and 56) don't make up for 65% of all the results. If you add up the numbers in the matrix it adds to the amount of test images so I know that part is correct.
I had a tensorflow warning just before printing the confusion matrix that says the following, but I don't really get its meaning:
WARNING:tensorflow:Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least steps_per_epoch * epochs batches (in this case, 13 batches). You may need to use the repeat() function when building your dataset
This is my code:
import sys
from matplotlib import pyplot
import numpy as np
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import SGD, RMSprop, Adam
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
CLASSES = 3
# define cnn model
def define_model():
# load model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
#add new classifier layers
x = base_model.output
x = Dropout(0.4)(x)
x = GlobalAveragePooling2D(name='avg_pool')(x)
predictions = Dense(CLASSES, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)
# mark loaded layers as not trainable
for layer in base_model.layers:
layer.trainable = False
# compile model
opt = RMSprop(lr=0.0001)
#momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
return model
# plot diagnostic learning curves
def summarize_diagnostics(history):
# plot loss
pyplot.subplot(211)
pyplot.title('Cross Entropy Loss')
pyplot.plot(history.history['loss'], color='blue', label='train')
pyplot.plot(history.history['val_loss'], color='orange', label='test')
# plot accuracy
pyplot.subplot(212)
pyplot.title('Classification Accuracy')
pyplot.plot(history.history['accuracy'], color='blue', label='train')
pyplot.plot(history.history['val_accuracy'], color='orange', label='test')
# save plot to file
filename = sys.argv[0].split('/')[-1]
pyplot.savefig(filename + '_plot.png')
pyplot.close()
# run the test harness for evaluating a model
def run_test_harness():
# define model
model = define_model()
# create data generator
datagen = ImageDataGenerator(featurewise_center=True)
# specify imagenet mean values for centering
datagen.mean = [123.68, 116.779, 103.939]
# prepare iterators
train_it = datagen.flow_from_directory('../datasetMainPruebas3ClasesQuitandoConfusosCV1/train/',
class_mode='categorical', batch_size=32, target_size=(224, 224))
test_it = datagen.flow_from_directory('../datasetMainPruebas3ClasesQuitandoConfusosCV1/test/',
class_mode='categorical', batch_size=32, target_size=(224, 224))
# fit model
history = model.fit_generator(train_it, steps_per_epoch=len(train_it),
validation_data=test_it, validation_steps=len(test_it), epochs=200, verbose=1)
# evaluate model
_, acc = model.evaluate_generator(test_it, steps=len(test_it), verbose=1)
print('> %.3f' % (acc * 100.0))
#confusion matrix
Y_pred = model.predict_generator(test_it, len(test_it) + 1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
cm = confusion_matrix(test_it.classes, y_pred)
print(cm)
# learning curves
summarize_diagnostics(history)
# entry point, run the test harness
run_test_harness()
Any help or tip is welcome, thank you
Edit: after checking one of the comments I changed my CM code to the following:
#confusion matrix
all_y_pred = []
all_y_true = []
for i in range(len(test_it)):
x, y = test_it[i]
y_pred = model.predict(x)
all_y_pred.append(y_pred)
all_y_true.append(y)
all_y_pred = np.concatenate(all_y_pred, axis=0)
all_y_true = np.concatenate(all_y_true, axis=0)
print('Confusion Matrix')
cm = confusion_matrix(all_y_true, all_y_pred)
print(cm)
And now the error I get says "Classification metrics can't handle a mix of multilabel-indicator and continuous-multioutput targets"
Any idea why?
I wrote a little script that has Neural Network approximate polynomial, and plots the result every epoch, but the problem is that I want that every iteration the new plot will overwrite the previous plot, so I can see how it changes over training.
I searched around the web and found that I need to use either ion() or isinteractive() or clear(), but I tried them all and it still does not work.
Edit:
For the sake of clarification, I am using Jupyter notebook, so I want it to work on this platform.
Here's my code:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from numpy import asarray
from matplotlib import pyplot
from tensorflow.keras.layers import Conv1D
import tensorflow
class myCallback(tensorflow.keras.callbacks.Callback):
def on_train_begin(self, logs={}):
pyplot.ion()
def on_epoch_end(self, epoch, logs=None):
yhat = model.predict(x)
# inverse transforms
x_plot = scale_x.inverse_transform(x)
y_plot = scale_y.inverse_transform(y)
yhat_plot = scale_y.inverse_transform(yhat)
# report model error
print('MSE: %.3f' % mean_squared_error(y_plot, yhat_plot))
# plot x vs y
plt = pyplot.scatter(x_plot,y_plot, label='Actual')
# plot x vs yhat
pyplot.scatter(x_plot,yhat_plot, label='Predicted')
pyplot.title('Input (x) versus Output (y)')
pyplot.xlabel('Input Variable (x)')
pyplot.ylabel('Output Variable (y)')
pyplot.legend()
pyplot.show()
# define the dataset
x = asarray([i for i in range(-50,51)])
y = asarray([i**3 for i in x])
print(x.min(), x.max(), y.min(), y.max())
# reshape arrays into into rows and cols
x = x.reshape((len(x), 1))
y = y.reshape((len(y), 1))
# separately scale the input and output variables
scale_x = MinMaxScaler()
x = scale_x.fit_transform(x)
scale_y = MinMaxScaler()
y = scale_y.fit_transform(y)
print(x.min(), x.max(), y.min(), y.max())
# design the neural network model
model = Sequential()
model.add(Dense(10, input_dim=1, activation='relu', kernel_initializer='he_uniform'))
#Conv1D(32, 5, activation='relu')
model.add(Dense(10, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(1))
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.01)
# define the loss function and optimization algorithm
model.compile(loss='mse', optimizer=opt)
# ft the model on the training dataset
model.fit(x, y, epochs=10, batch_size=10, verbose=0, callbacks=[myCallback()])
# make predictions for the input data
Your help would be highly appreciated!
You are getting a new plot after each epoch but the changes are not really visible because your model is too weak. Here is an example with significant differences:
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from numpy import asarray
from matplotlib import pyplot
import tensorflow
from IPython.display import clear_output
class myCallback(tensorflow.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
clear_output(wait=True)
yhat = model.predict(x)
# inverse transforms
x_plot = scale_x.inverse_transform(x)
y_plot = scale_y.inverse_transform(y)
yhat_plot = scale_y.inverse_transform(yhat)
# report model error
print('MSE: %.3f' % mean_squared_error(y_plot, yhat_plot))
# plot x vs y
plt = pyplot.scatter(x_plot,y_plot, label='Actual')
# plot x vs yhat
pyplot.scatter(x_plot,yhat_plot, label='Predicted')
pyplot.title('Input (x) versus Output (y)')
pyplot.xlabel('Input Variable (x)')
pyplot.ylabel('Output Variable (y)')
pyplot.legend()
pyplot.show()
# define the dataset
x = asarray([i for i in range(-50,51)])
y = asarray([i**3 for i in x])
print(x.shape)
print(x.min(), x.max(), y.min(), y.max())
# reshape arrays into into rows and cols
x = x.reshape((len(x), 1))
y = y.reshape((len(y), 1))
# separately scale the input and output variables
scale_x = MinMaxScaler()
x = scale_x.fit_transform(x)
scale_y = MinMaxScaler()
y = scale_y.fit_transform(y)
print(x.min(), x.max(), y.min(), y.max())
# design the neural network model
model = Sequential()
model.add(Dense(64, input_dim=1, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1))
opt = tensorflow.keras.optimizers.Adam(learning_rate=0.01)
# define the loss function and optimization algorithm
model.compile(loss='mse', optimizer=opt)
# ft the model on the training dataset
model.fit(x, y, epochs=50, batch_size=10, verbose=0, callbacks=[myCallback()])
# make predictions for the input data
Here is the plot of the final epoch:
So my main goal is to use data from 2018 and try to predict data for 2019. I'm using a GRU model and I have the following code. I have a few issues, I'm not sure if the code is actually correct or if I am missing something, and also for model.fit should I use validation_split=0.1 or validation_data=X_test,y_test since I'm using a different dataframe for tesing.
Regarding the accuracy, it is very small and doesn't make any sense and I have no idea why.
import pandas as pd
import tensorflow as tf
from keras.layers.core import Dense
from keras.layers.recurrent import GRU
from keras.models import Sequential
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorboardcolab import TensorBoardColab, TensorBoardColabCallback
df = pd.read_csv('IF 10 PERCENT.csv',index_col=None)
#Loading Second Dataframe
df2 = pd.read_csv('2019 10minutes IF 10 PERCENT.csv',index_col=None)
tbc=TensorBoardColab() # Tensorboard
X_train= df[['WindSpeed_mps','AmbTemp_DegC','RotorSpeed_rpm','RotorSpeedAve','NacelleOrientation_Deg','MeasuredYawError','Pitch_Deg','WindSpeed1','WindSpeed2','WindSpeed3','GeneratorTemperature_DegC','GearBoxTemperature_DegC']]
X_train=X_train.values
y_train= df['Power_kW']
y_train=y_train.values
X_test= df2[['WindSpeed_mps','AmbTemp_DegC','RotorSpeed_rpm','RotorSpeedAve','NacelleOrientation_Deg','MeasuredYawError','Pitch_Deg','WindSpeed1','WindSpeed2','WindSpeed3','GeneratorTemperature_DegC','GearBoxTemperature_DegC']]
X_test=X_test.values
y_test= df2['Power_kW']
y_test=y_test.values
# conversion to numpy array
# scaling values for model
x_scale = MinMaxScaler()
y_scale = MinMaxScaler()
X_train= x_scale.fit_transform(X_train)
y_train= y_scale.fit_transform(y_train.reshape(-1,1))
X_test=x_scale.fit_transform(X_test)
y_test=y_scale.fit_transform(y_test.reshape(-1,1))
X_train = X_train.reshape((-1,1,12))
X_test = X_test.reshape((-1,1,12))
# splitting train and test
# creating model using Keras
model = Sequential()
model.add(GRU(units=512, return_sequences=True, input_shape=(1,12)))
model.add(GRU(units=256, return_sequences=True))
model.add(GRU(units=256))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(loss=['mse'], optimizer='adam',metrics=['accuracy'])
model.summary()
#model.fit(X_train, y_train, batch_size=250, epochs=10, validation_split=0.1, verbose=1, callbacks=[TensorBoardColabCallback(tbc)])
model.fit(X_train, y_train, batch_size=250, epochs=10, validation_data=(X_test,y_test), verbose=1, callbacks=[TensorBoardColabCallback(tbc)])
score = model.evaluate(X_test, y_test)
print('Score: {}'.format(score))
print('Accuracy: {}'.format(acc))
y_predicted = model.predict(X_test)
y_predicted = y_scale.inverse_transform(y_predicted)
y_t
est = y_scale.inverse_transform(y_test)
plt.plot(y_predicted, label='Predicted')
plt.plot(y_test, label='Measurements')
plt.legend()
plt.show()
Thank you
It sounds to me that you are trying to solve a regression problem here. if it is so, It does not make sense to measure accuracy as a metric, since accuracy is about to measure the exact label matching. MSE should be pretty good for the regression
I have a regression problem and I am using a keras fully connected layer to model my problem. I am using cross_val_score and my question is: how can I extract the model and the history of each train/validation combination the cross_val_score does?
Assuming this example:
from sklearn import datasets
from sklearn.model_selection import cross_val_score, KFold
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
seed = 1
diabetes = datasets.load_diabetes()
X = diabetes.data[:150]
y = diabetes.target[:150]
def baseline_model():
model = Sequential()
model.add(Dense(10, input_dim=10, activation='relu'))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
return model
estimator = KerasRegressor(build_fn=baseline_model, nb_epoch=100, batch_size=100, verbose=False)
kfold = KFold(n_splits=10, random_state=seed)
results = cross_val_score(estimator, X, y, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))
My understanding is that I only get the overall mse over each fold, so to say.
But I want to compare the train to validation mse over the epochs of the model for each fold, i.e. for 10 in this case.
When not using kfold, but simple train/validation split, then one can do:
hist = model.fit(X_tr, y_tr, validation_data=val_data,
epochs=100, batch_size=100,
verbose=1)
plt.plot(history.history['loss'])
plt.plot(history.history['loss'])
This would return a plot representing the evolution of the mse w.r.t. to the epochs for the train and validation datasets, allowing to spot over/underfitting.
How to do this for each fold when using cross validation?
You can go for a "manual" CV procedure, and plot the loss (or any other available metric you might want to use) for each fold, i.e. something like this:
from sklearn.metrics import mean_squared_error
cv_mse = []
for train_index, val_index in kfold.split(X):
history = estimator.fit(X[train_index], y[train_index])
pred = estimator.predict(X[val_index])
err = mean_squared_error(y[val_index], pred)
cv_mse.append(err)
plt.plot(history.history['loss'])
In which case, the cv_mse list will contain the final MSE for each fold, and you also get the respective plots for its evolution per epoch for each fold.
I'm trying to train a deep classifier in Keras both with and without pretraining of the hidden layers via stacked autoencoders. My problem is that the pretraining seems to drastically degrade performance (i.e. if pretrain is set to False in the code below the training error of the final classification layer converges much faster). This seems completely outrageous to me given that pretraining should only initialize the weights of the hidden layers and I don't see how that could completely kill the models performance even if that initialization does not work very well. I can not include the specific dataset I used but the effect should occur for any appropriate dataset (e.g. minist). What is going on here and how can I fix it?
EDIT: code is now reproducible with the MNIST data, final line prints change in loss function, which is significantly lower with pre-training.
I have also slightly modified the code and added sample learning curves below:
from functools import partial
import matplotlib.pyplot as plt
from keras.datasets import mnist
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.regularizers import l2
from keras.utils import to_categorical
(inputs_train, targets_train), _ = mnist.load_data()
inputs_train = inputs_train[:1000].reshape(1000, 784)
targets_train = to_categorical(targets_train[:1000])
hidden_nodes = [256] * 4
learning_rate = 0.01
regularization = 1e-6
epochs = 30
def train_model(pretrain):
model = Sequential()
layer = partial(Dense,
activation='sigmoid',
kernel_initializer='random_normal',
kernel_regularizer=l2(regularization))
for i, hn in enumerate(hidden_nodes):
kwargs = dict(units=hn, name='hidden_{}'.format(i + 1))
if i == 0:
kwargs['input_dim'] = inputs_train.shape[1]
model.add(layer(**kwargs))
if pretrain:
# train autoencoders
inputs_train_ = inputs_train.copy()
for i, hn in enumerate(hidden_nodes):
autoencoder = Sequential()
autoencoder.add(layer(units=hn,
input_dim=inputs_train_.shape[1],
name='hidden'))
autoencoder.add(layer(units=inputs_train_.shape[1],
name='decode'))
autoencoder.compile(optimizer=SGD(lr=learning_rate, momentum=0.9),
loss='binary_crossentropy')
autoencoder.fit(
inputs_train_,
inputs_train_,
batch_size=32,
epochs=epochs,
verbose=0)
autoencoder.pop()
model.layers[i].set_weights(autoencoder.layers[0].get_weights())
inputs_train_ = autoencoder.predict(inputs_train_)
num_classes = targets_train.shape[1]
model.add(Dense(units=num_classes,
activation='softmax',
name='classify'))
model.compile(optimizer=SGD(lr=learning_rate, momentum=0.9),
loss='categorical_crossentropy')
h = model.fit(
inputs_train,
targets_train,
batch_size=32,
epochs=epochs,
verbose=0)
return h.history['loss']
plt.plot(train_model(pretrain=False), label="Without Pre-Training")
plt.plot(train_model(pretrain=True), label="With Pre-Training")
plt.xlabel("Epoch")
plt.ylabel("Cross-Entropy")
plt.legend()
plt.show()