I am trying to classify the medical images taken from publicly available datasets. I used transfer learning for this task. Initially, when I had used the following code on the same dataset with VGG, Resnet, Densenet, and Inception, the accuracy was above 85% without fine-tuning (TensorFlow version 1.15.2.) Now, after upgrading the TensorFlow to 2.x, when I try the same code on the same dataset, the accuracy is never crossing 32%. Can anyone please help me rectify the issue? Is it something to do with the TensorFlow version or something else? I have tried varying the learning rate, fine-tuning the model, etc. Is this the issue with batch normalization error in Keras?
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import sys
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.inception_v3 import InceptionV3
import cv2
import glob
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow.keras as keras
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from keras import backend as k
from mlxtend.evaluate import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix
import math
import pandas as pd
from openpyxl import load_workbook
save_dir = '../new_tran/'
def extract_outputs(cnf_matrix, mdl_name):
cnf_matrix_T=np.transpose(cnf_matrix)
recall = np.diag(cnf_matrix) / np.sum(cnf_matrix, axis = 1)
precision = np.diag(cnf_matrix) / np.sum(cnf_matrix, axis = 0)
n_class=3
TP=np.zeros(n_class)
FN=np.zeros(n_class)
FP=np.zeros(n_class)
TN=np.zeros(n_class)
for i in range(n_class):
TP[i]=cnf_matrix[i,i]
FN[i]=np.sum(cnf_matrix[i])-cnf_matrix[i,i]
FP[i]=np.sum(cnf_matrix_T[i])-cnf_matrix[i,i]
TN[i]=np.sum(cnf_matrix)-TP[i]-FP[i]-FN[i]
P=TP+FN
N=FP+TN
classwise_sensitivity=np.true_divide(TP,P)
classwise_specificity=np.true_divide(TN,N)
classwise_accuracy=np.true_divide((TP+TN), (P+N))
OS=np.mean(classwise_sensitivity)
OSp=np.mean(classwise_specificity)
OA=np.sum(np.true_divide(TP,(P+N)))
Px=np.sum(P)
TPx=np.sum(TP)
FPx=np.sum(FP)
TNx=np.sum(TN)
FNx=np.sum(FN)
Nx=np.sum(N)
pox=OA
pex=((Px*(TPx+FPx))+(Nx*(FNx+TNx)))/(math.pow((TPx+TNx+FPx+FNx),2))
kappa_overall=[np.true_divide(( pox-pex ), ( 1-pex )),np.true_divide(( pex-pox ), ( 1-pox ))]
kappa=np.max(kappa_overall)
Rcl=np.mean(recall)
Prcn=np.mean(precision)
#######--------------------- Print all scores
print('classwise_sen',classwise_sensitivity*100)
print('classwise_spec',classwise_specificity*100)
print('classwise_acc',classwise_accuracy*100)
print('overall_sen',OS*100)
print('overall_spec',OSp*100)
print('overall_acc',OA*100)
print('overall recall', Rcl)
print('overall precision',Prcn)
f1score=(2 * Prcn * Rcl) / (Prcn + Rcl)
print('overall F1-score',f1score )
print('Kappa',kappa)
def preProcess(X):
X = X.astype('float32')
# scale from [0,255] to [-1,1]
X = (X - 127.5) / 127.5
return X
train_datagen = ImageDataGenerator(preprocessing_function=preProcess)
test_datagen = ImageDataGenerator(preprocessing_function=preProcess)
IMG_SIZE = 256
batch_size = 16
train_data_dir = '../data/train/'
test_dir = '../data/test/'
val_dir = '../data/val/'
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(IMG_SIZE , IMG_SIZE),
batch_size=16,
class_mode='sparse')
valid_generator = train_datagen.flow_from_directory(
val_dir,
target_size=(IMG_SIZE , IMG_SIZE),
batch_size=16,
class_mode='sparse')
test_generator = train_datagen.flow_from_directory(
test_dir,
target_size=(IMG_SIZE , IMG_SIZE),
batch_size=16,
class_mode='sparse')
test_im=np.concatenate([test_generator.next()[0] for i in range(test_generator.__len__())])
test_lb=np.concatenate([test_generator.next()[1] for i in range(test_generator.__len__())])
t_x, t_y = next(train_generator)
checkpoint1 = ModelCheckpoint(save_dir+"best_res.hdf5", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
checkpoint2 = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
callbacks_list1 = [checkpoint1, checkpoint2]
def new_model():
img_in = Input(t_x.shape[1:])
model = DenseNet121(include_top= False ,
layers=tf.keras.layers,
weights='imagenet',
input_tensor= img_in,
input_shape= t_x.shape[1:],
pooling ='avg')
x = model.output
predictions = Dense(3, activation="softmax", name="predictions")(x)
model = Model(inputs=img_in, outputs=predictions)
return model
model1 = new_model()
opt = Adam(lr=3E-4)
model1.compile(optimizer = opt, loss = 'sparse_categorical_crossentropy',metrics = ['accuracy'])
history1 = model1.fit(train_generator,
validation_data = valid_generator,
epochs = 200,
callbacks=callbacks_list1)
model1.load_weights(save_dir+'best_res.hdf5')
model1.compile(optimizer = opt, loss = 'sparse_categorical_crossentropy',metrics = ['accuracy'])
y_pred1 = model1.predict(test_im)
pred_class1=np.argmax(y_pred1,axis=1)
print('accuracy = ',accuracy_score(pred_class1,test_lb))
cm = confusion_matrix(y_target=test_lb,y_predicted=pred_class1, binary=False)
print(cm)
fig, ax = plot_confusion_matrix(conf_mat=cm)
plt.gcf().savefig(save_dir+"resnet.png", dpi=144)
plt.close()
extract_outputs(cm, 'Resnet')
Here are some of the screenshots of the output from tensorflow 2.x
Basically the flow_from_directory by default shuffle the data and you didn't change it.
Just add shuffle=False to your test_generator should be enough.
Like
test_generator = train_datagen.flow_from_directory(
test_dir,
target_size=(IMG_SIZE, IMG_SIZE),
batch_size=16,
class_mode='sparse',
shuffle=False)
Or if you really want to have it shuffled then test_im and test_lb have to be in the same order. For example
test_im = []
test_lb = []
for im, lb in test_generator:
test_im.append(im)
test_lb.append(lb)
test_im = np.array(test_im)
test_lb = np.array(test_lb)
Related
I am new in TensorFlow. Code above is a python code, which trains NN for spoken language identification task.
How can I add ability to predict a single audio file and give predictes class(label) name, and also how can I save classes(labels) in model before output?
from glob import glob
import os
import yaml
import math
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Concatenate
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
def step_decay(epoch, lr):
drop = 0.94
epochs_drop = 2.0
lrate = lr * math.pow(drop, math.floor((1+epoch)/epochs_drop))
return lrate
main_dir = os.getcwd()
#Load configurations for startup
config_file_path = os.path.join(main_dir, "config.yaml")
config_file = open(config_file_path, 'r+')
main_config = yaml.safe_load(config_file)
languages = main_config["Languages"]
dataset_root_path = main_config["Target Root Path"]
batch_size = main_config["Batch Size"]
image_width = main_config["Image Width"]
image_height = main_config["Image Height"]
validation_split = main_config["Validation Split"]
initial_learning_rate = main_config["Initial Learning Rate"]
config_file.close()
categories = ["train", "test"]
train_path = dataset_root_path + categories[0]
num_classes = len(languages)
model_file = dataset_root_path + 'model.h5'
all_files = glob(train_path + '/*/*.png')
num_validation = len(all_files) * validation_split
num_train = len(all_files) - num_validation
validation_steps = int(num_validation / batch_size)
steps_per_epoch = int(num_train / batch_size)
print('Steps per Epoch: ' + str(steps_per_epoch))
print('Validation steps: ' + str(validation_steps))
image_data_generator = ImageDataGenerator(rescale=1./255, validation_split=validation_split)
train_generator = image_data_generator.flow_from_directory(train_path, batch_size=batch_size, class_mode='categorical', target_size=(image_height, image_width), color_mode='grayscale', subset='training')
validation_generator = image_data_generator.flow_from_directory(train_path, batch_size=batch_size, class_mode='categorical', target_size=(image_height, image_width), color_mode='grayscale', subset='validation')
#Model definition
img_input = Input(shape=(image_height, image_width, 1))
img_conc = Concatenate(axis=3, name='input_concat')([img_input, img_input, img_input])
model = InceptionV3(input_tensor=img_conc, weights=None, include_top=True, classes=2)
model.summary()
model.compile(optimizer=RMSprop(learning_rate=initial_learning_rate, clipvalue=2.0), loss='categorical_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_accuracy', mode='max', patience=10, restore_best_weights=True)
learning_rate_decay = LearningRateScheduler(step_decay, verbose=1)
history = model.fit(train_generator, validation_data=validation_generator, epochs=60, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, callbacks=[early_stopping, learning_rate_decay])
model.save(model_file)
Config file (yaml) for project looks like this:
Project: Language Identification
Languages:
hy: Armenian
as: Assamese
Target Root Path: /home/nn/Desktop/Language_Identification/Data/
Batch Size: 1
Image Width: 500
Image Height: 128
Validation Split: 0.1
Initial Learning Rate: 0.045
Currently, I am looking for ways to optimize my model (image classification for simple triangles and squares) and I've been stuck on understanding what these file paths are supposed to be referencing. Is it a path on your computer, or is it something else?
checkpoint_filepath = 'C:\tempfile'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=True,
monitor='val_accuracy',
mode='max',
save_best_only=True)
model_fit = model.fit(train_dataset,
steps_per_epoch = 5,
epochs = 30,
validation_data= validation_dataset,
callbacks=[reduce_lr, model_checkpoint_callback]
)
I've had the same issues also with Tensorboard. The code runs just fine when with the reduce_lr callback, but has issues when I add the modelCheckpoint callback.
Here is the rest of my code for reference:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import RMSprop
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
from keras.callbacks import ReduceLROnPlateau
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import ModelCheckpoint
img = image.load_img('C:/Input_DataTS/Triangles/triangle.jpg')
plt.imshow(img)
cv2.imread('C:\Input_DataTS')
train = ImageDataGenerator(rescale= 1/255)
validation = ImageDataGenerator(rescale= 1/255)
train_dataset = train.flow_from_directory('C:\Input_DataTS',
target_size= (200,200),
batch_size= 3,
class_mode = 'binary')
validation_dataset = train.flow_from_directory('C:\Validiation_DataTS',
target_size= (200,200),
batch_size= 3,
class_mode = 'binary')
model = tf.keras.models.Sequential([ tf.keras.layers.Conv2D(16,(3,3),activation = 'relu', input_shape =(200, 200, 3)),
tf.keras.layers.MaxPool2D(2,2),
#
tf.keras.layers.Conv2D(32,(3,3),activation = 'relu'),
tf.keras.layers.MaxPool2D(2,2),
#
tf.keras.layers.Conv2D(64,(3,3),activation = 'relu'),
tf.keras.layers.MaxPool2D(2,2),
#
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(32,activation = 'relu'),
tf.keras.layers.Dense(1,activation= 'sigmoid')
])
model.compile(loss= 'binary_crossentropy',
optimizer = RMSprop(lr=0.001),
metrics =['accuracy'])
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
patience=5, min_lr=0.001)
checkpoint_filepath = 'C:\tempfile'
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=True,
monitor='val_accuracy',
mode='max',
save_best_only=True)
model_fit = model.fit(train_dataset,
steps_per_epoch = 5,
epochs = 30,
validation_data= validation_dataset,
callbacks=[reduce_lr, model_checkpoint_callback]
)
dir_path = 'C:/Testing_DataTS'
for i in os.listdir(dir_path ):
img = image.load_img(dir_path + '//' + i, target_size=(200,200))
plt.imshow(img)
plt.show()
X = image.img_to_array(img)
X = np.expand_dims(X,axis =0)
images = np.vstack([X])
val = model.predict(images)
if val == 0:
print("square")
else:
print("triangle")
Although this isn't needed for this model, I would like to learn how to do it properly for future cases. If anyone can help me with this issue, I'd greatly appreciate it. Thank you for your time!
import numpy as np
import pandas as pd
from pathlib import Path
import os.path
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import r2_score
from keras.applications.efficientnet import EfficientNetB3
import gc
from keras.models import Sequential
from keras import layers, models
from keras import Input
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers, initializers, regularizers, metrics
from keras.callbacks import ModelCheckpoint
import os
from glob import glob
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras import optimizers
from keras.layers import Conv2D,MaxPool2D,GlobalAveragePooling2D,AveragePooling2D
from keras.layers import Dense,Dropout,Activation,Flatten
import sys
# Repository source: https://github.com/qubvel/efficientnet
sys.path.append(os.path.abspath('../input/efficientnet/efficientnet-master/efficientnet-master/'))
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
image_dir = Path('/content/drive/MyDrive/processed')
filepaths = pd.Series(list(image_dir.glob(r'**/*.jpg')), name='Filepath').astype(str)
TS = pd.Series(sorted([int(l.split('TS_')[1].split('/pre')[0]) for l in filepaths]),name='TS').astype(np.int)
images = pd.concat([filepaths, TS], axis=1).sample(frac=1.0,
random_state=1).reset_index(drop=True)
image_df = images.sample(2020, random_state=1).reset_index(drop=True)
train_df, test_df = train_test_split(image_df, train_size=0.7, shuffle=True, random_state=1)
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1./255,
validation_split=0.2
)
test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
rescale=1./255
)
train_input = train_generator.flow_from_dataframe(
dataframe=train_df,
x_col='Filepath',
y_col='TS',
target_size=(256, 256),
color_mode='grayscale',
class_mode='raw',
batch_size=1,
shuffle=True,
seed=42,
subset='training'
)
val_input = train_generator.flow_from_dataframe(
dataframe=train_df,
x_col='Filepath',
y_col='TS',
target_size=(256, 256),
color_mode='grayscale',
class_mode='raw',
batch_size=1,
shuffle=True,
seed=42,
subset='validation'
)
test_input = test_generator.flow_from_dataframe(
dataframe=test_df,
x_col='Filepath',
y_col='TS',
target_size=(256, 256),
color_mode='grayscale',
class_mode='raw',
batch_size=1,
shuffle=False
)
inputs = tf.keras.Input(shape=(256, 256, 1))
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(inputs)
x = tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(128, kernel_initializer='he_normal')(x)
x = tf.keras.layers.Dense(64, kernel_initializer='he_normal')(x)
outputs = tf.keras.layers.Dense(1, activation='linear')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.compile(
optimizer='adam',
loss='mae'
)
history = model.fit(
train_input,
validation_data=val_input,
epochs=10,
callbacks=[
tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=5,
restore_best_weights=True
)
]
)
#Results
predicted_TS = np.squeeze(model.predict(test_input))
true_TS = test_input.labels
rmse = np.sqrt(model.evaluate(test_input, verbose=0))
print(" Test RMSE: {:.5f}".format(rmse))
r2 = r2_score(true_TS, predicted_TS)
print("Test R^2 Score: {:.5f}".format(r2))
null_rmse = np.sqrt(np.sum((true_TS - np.mean(true_TS))**2) / len(true_TS))
print("Null/Baseline Model Test RMSE: {:.5f}".format(null_rmse))
Image is alloy microstrure and TS is tensile strength of alloy.
I thought if I put images in this model this can predict scattered prediction values.
I can't understand why prediction result have almost same values.
And how can I reduce RMSE?
This results terrible RMSE
i'm doing this kaggle contest where i have to classify this x-ray in 3 category bacteria,virus or normal. Problem is that my accuracy is really low like 25% and loss is stuck at 0. I use a pretrained nn using weight that come from a dataset of xray chest images. This nn use keras.losses.CategoricalCrossentropy as loss function and keras.metrics.Accuracy() for accuracy
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
TRAIN_DIR = 'D:/tf/archiveBilanciato/chest_xray/train/PNEUMONIA'
TEST_DIR = 'D:/tf/archiveBilanciato/chest_xray/test'
IMG_SIZE = 224 #224 รจ quella migliore
image_size = (IMG_SIZE, IMG_SIZE)
batch_size = 32
LR = 1e-3
import os
nt = 0
for folder_name in ("bacteria", "normal","virus"):
folder_path = os.path.join("D:/tf/NeoArchiveBilanciato/chest_xray", folder_name)
for fname in os.listdir(folder_path):
fpath = os.path.join(folder_path, fname)
nt += 1
print("Totale immagini: %d" % nt)
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
"D:/tf/NeoArchiveBilanciato/chest_xray",
validation_split=0.2,
subset="training",
seed=1337,
color_mode='rgb',
image_size=image_size,
batch_size=batch_size,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
"D:/tf/NeoArchiveBilanciato/chest_xray",
validation_split=0.2,
subset="validation",
seed=1337,
color_mode='rgb',
image_size=image_size,
batch_size=batch_size,
)
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras.applications import DenseNet121
from keras.layers import GlobalAveragePooling2D,Dense
def pre_model():
base_model = tf.keras.applications.DenseNet121(
weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(14, activation="softmax")(x)
pre_model = keras.Model(inputs=base_model.input, outputs=predictions)
return pre_model
base_model = pre_model()
base_model.load_weights("D:/tf/nih_pretrained_chest_model.h5")
#print(base_model.summary())
from keras.layers import Input
#from tensorflow.keras.layers import Input
from kerassurgeon.operations import delete_layer, insert_layer
from keras.models import load_model
new_input = Input(shape=(IMG_SIZE, IMG_SIZE, 3), name='image_input')
model_imp = base_model
model_imp = Dense(3, activation='softmax')(model_imp.layers[-2].output)
base_model.trainable = False
mio_classificatore = Dense(1, activation='softmax')(base_model.layers[-2].output)
nuovo_model = keras.Model(inputs=base_model.input, outputs=mio_classificatore)
#print(nuovo_model.summary())
train = train_ds
val = val_ds
nuovo_model.compile(optimizer=keras.optimizers.Adam(),
loss=keras.losses.CategoricalCrossentropy(),
metrics=[keras.metrics.Accuracy()])
nuovo_model.fit(train,batch_size=32, epochs=14, validation_data=val)
how can I solve this problem?
There are 3 categories to predict, so the last layer in your model should contain 3 neurons(1 for each class), not 1 neuron
Try to change
mio_classificatore = Dense(1, activation='softmax')(base_model.layers[-2].output)
to
mio_classificatore = Dense(3, activation='softmax')(base_model.layers[-2].output)
Im new to Keras and I need your professional help.
I have used GridSearchCV to optmize my regression network. When i try to use the results, the newly created network is far worse in regards to the mean squared error than the one calculated by GridSearch.
The GridSearchCV code:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from time import time
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow import keras
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Dropout, LeakyReLU
from keras.utils import plot_model
from keras.optimizers import SGD, rmsprop, adam
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
from keras.initializers import uniform, normal, glorot_uniform
from keras.losses import MAPE
#Data preprocessing
def get_data():
data = pd.read_csv("test.csv", sep=";", usecols=["rHsubLS","b","lowerSetpoint"])
test = data.loc[:,['rHsubLS','b']]
target = data.loc[:,'lowerSetpoint']
print(test.shape)
print(target.shape)
return test.astype(float), target.astype(float)
def split_data(test, target):
X_train, X_test, y_train, y_test = train_test_split(test, target)
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
stdsc1 = StandardScaler()
train_data_std = stdsc1.fit_transform(X_train)
test_data_std = stdsc1.fit_transform(X_test)
y_train_1 = np.reshape(y_train, (-1, 1))
y_test_1 = np.reshape(y_test, (-1, 1))
train_target_std = stdsc1.fit_transform(y_train_1)
test_target_std = stdsc1.fit_transform(y_test_1)
return train_data_std, test_data_std, train_target_std, test_target_std
#Network Creation
def create_NN(optimizer='rmsprop', init='glorot_uniform', alpha=0.15, activation_func='tanh'):
NN_model = Sequential()
#input layer
NN_model.add(Dense(128, kernel_initializer=init, input_dim=2, activation=activation_func))
#hidden layers
NN_model.add(LeakyReLU(alpha=alpha))
NN_model.add(Dense(256, kernel_initializer=init, activation='relu'))
#output layer
NN_model.add(Dense(1, kernel_initializer=init, activation='linear'))
NN_model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=["mse", "mape"])
NN_model.summary()
return NN_model
#GridSearchCV
def train_NN(NN_model, train_data, train_target):
seed = 4
np.random.seed(seed)
model = KerasRegressor(build_fn=create_NN, verbose=1)
optimizers = ['rmsprop', 'adam', 'SGD']
inits = ['glorot_uniform', 'normal', 'uniform', 'he_uniform']
activation_funcs = ['tanh','relu','softmax']
epochs = [50, 100, 150]
batches = [50, 100, 500]
alphas = [0.15, 0.45, 0.3]
grid_parameter = dict(optimizer=optimizers, epochs=epochs, batch_size=batches, init=inits, alpha=alphas, activation_func=activation_funcs)#, dropout_rate=dropout)
if __name__ == '__main__':
grid = GridSearchCV(estimator=model, scoring='neg_mean_squared_error' , param_grid=grid_parameter, verbose=1, cv=3)
grid_results = grid.fit(train_data, train_target, use_multiprocessing=True, shuffle=True, workers=8)
print("Best: %f using %s" % (grid_results.best_score_, grid_results.best_params_))
means = grid_results.cv_results_['mean_test_score']
stds = grid_results.cv_results_['std_test_score']
params = grid_results.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
try:
test, target = get_data()
train_data, test_data, train_target, test_target = split_data(test, target)
print("Data split\n")
NN_model = create_NN()
train_NN(NN_model, train_data, train_target)
except (KeyboardInterrupt, SystemExit):
raise
The results of the GridSearch:
Best: -0.000064 using {'activation_func': 'relu', 'alpha': 0.3, 'batch_size': 50, 'epochs': 150, 'init': 'he_uniform', 'optimizer': 'adam'}
When I try to reproduce this network with this code:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from time import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from tensorflow import keras
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Input, Dropout, PReLU, LeakyReLU
from keras.utils import plot_model
from keras.optimizers import SGD
from keras.losses import MeanAbsolutePercentageError
def get_data():
data = pd.read_csv("test.csv", sep=";", usecols=["rHsubLS","b","lowerSetpoint"])
test = data.loc[:,['rHsubLS','b']]
target = data.loc[:,'lowerSetpoint']
print(test.shape)
print(target.shape)
return test.astype(float), target.astype(float)
def split_data(test, target):
X_train, X_test, y_train, y_test = train_test_split(test, target)
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)
stdsc1 = StandardScaler()
train_data_std = stdsc1.fit_transform(X_train)
test_data_std = stdsc1.fit_transform(X_test)
y_train_1 = np.reshape(y_train, (-1, 1))
y_test_1 = np.reshape(y_test, (-1, 1))
train_target_std = stdsc1.fit_transform(y_train_1)
test_target_std = stdsc1.fit_transform(y_test_1)
return train_data_std, test_data_std, train_target_std, test_target_std
def create_NN():
NN_model = Sequential()
#input layer
NN_model.add(Dense(128, input_dim=2, kernel_initializer='he_uniform', activation='relu'))
#hidden layers
NN_model.add(LeakyReLU(0.3))
NN_model.add(Dense(256, kernel_initializer='he_uniform', activation='relu'))
#output layer
NN_model.add(Dense(1, activation='linear'))
keras.backend.set_epsilon(1)
NN_model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse','mape'])
NN_model.summary()
return NN_model
def train_NN(NN_model, train_data, train_target, test_data, test_target):
history = NN_model.fit(train_data, train_target, epochs=150, shuffle=True, batch_size=50, verbose=1, use_multiprocessing=True)
return history
def test_NN(NN_model, test_data, test_target, train_data, train_target):
mean_test = NN_model.evaluate(test_data, test_target, verbose=1)
mean_train = NN_model.evaluate(train_data, train_target, verbose=1)
return mean_test, mean_train
try:
seed = 4
np.random.seed(seed)
test, target = get_data()
train_data, test_data, train_target, test_target = split_data(test, target)
print("Data split\n")
NN_model = create_NN()
print("Neural Network created\n")
history = train_NN(NN_model, train_data, train_target, test_data, test_target)
mean_test, mean_train = test_NN(NN_model, test_data, test_target, train_data, train_target)
print("Durchschnittliche Abweichung Training: ", mean_train)
print("Durchschnittliche Abweichung Test: ", mean_test)
print(NN_model.metrics_names)
NN_model.save('Regelung_v1.h5')
print("Neural Network saved")
except (KeyboardInterrupt, SystemExit):
raise
I get this result:
mse loss training data: 0.028168134637475015;
mse loss test data: 0.028960488473176955
The mean average percentage error is at about 9%. This result is not what i expected.
Where is my mistake?
Thank you for your help in advance
Have a nice day!
PC Specs:
Intel i5 4570
16GB RAM + 16 GB page file
Nvidia GTX 1070
3 TB HDD
Software:
Windows 10
Geforce Game ready driver 451.48
Tensorflow 2.2.0
Keras 2.3.1
Sklearn 0.23.1
Cuda 10.1
Python 3.7.7
Edit: Here are a few lines of the test.csv
TIMESTAMP;rHsubLS;b;lowerSetpoint
20200714091423000.00000000000;2.28878288783;-0.74361743617;-0.27947195702
20200714091423000.00000000000;0.13274132741;-0.94552945529;-0.32351276857
20200714091423000.00000000000;1.85753857539;0.77844778448;0.22244954249
20200714091423000.00000000000;1.31896318963;0.44518445184;0.33573301999
20200714091423000.00000000000;2.55885558856;-0.77792777928;-0.28837806344
The Output layer had its initialization weight missing:
NN_model.add(Dense(1, kernel_initializer='he_uniform', activation='linear'))