Training a neural network to learn polynomial equation - python

I have created a data set of y ~ x**2
However, when I train a neural network, it just can't fit a quadratic equation.
This is my model.
model2 = tf.keras.models.Sequential(
[tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(1)]
)
loss = tf.keras.losses.mse
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model2.compile(optimizer=optimizer, loss=loss, metrics=tf.metrics.RootMeanSquaredError())
model2.fit(tf.expand_dims(X_train, -1), y_train, epochs=1000, verbose=1)
My thought process of above model is that I thought that each relu activation will fit a local linear line, and slowly connect all the neurons to form a quadratic line.
In the end, I managed fit it by using an activation of lambda x:x**2 on the output layer, However, that is because I know the function is an x**2.
So my question is, without knowing the true function, how do I train a neural network to fit a non-linear curve?

Your code works fine for me.
Note, I use larger learning rate and an early stop (with 300 patience of total 2000 epochs).
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
train_x = np.linspace(0, 80, 160)
train_y = train_x**2
test_x = np.linspace(80, 100, 40)
test_y = test_x**2
model2 = tf.keras.models.Sequential(
[tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(1)]
)
loss = tf.keras.losses.mse
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=300, restore_best_weights=True)
model2.compile(optimizer=optimizer, loss=loss, metrics=tf.metrics.RootMeanSquaredError())
model2.fit(tf.expand_dims(train_x, -1), train_y, epochs=2000, verbose=1, callbacks=[early_stop])
train_pred = model2.predict(train_x)
test_pred = model2.predict(test_x)
plt.scatter(train_x, train_y, c='blue', label='train x')
plt.scatter(test_x, test_y, c='green', label='test x')
plt.scatter(train_x, train_pred, c='red', label='train pred')
plt.scatter(test_x, test_pred, c='orange', label='test pred')
plt.legend()
plt.show()
Training and test results photo here

Related

Wrong Confusion Matrix Display - CCN Image Keras model

am new to Deep Learning and using CNNs. I am trying to predict the quality of coffee seeds using images where good quality is Coffee AA and the other is Poor Quality. I built the model tried to evaluate its performance by using a Confusion Matrix on a test data(test_ds) but it seems to give me a one column prediction instead of a diagonal form. I don't know where the issue is coming from whether its how i built the model or how am trying to build the confusion matrix. Kindly help. The rest of the code is here https://colab.research.google.com/drive/1cNgAbCy8e4lG-dCepVxIpFx1Wfeof5wb?usp=sharing
Here is part of the Code:
data = tf.keras.preprocessing.image_dataset_from_directory(
"/data/train2 cofi",
shuffle=True,
image_size= (IMAGE_SIZE,IMAGE_SIZE),
batch_size= BATCH_SIZE)
img_height = 256
img_width = 256
batch_size = 32
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
"/data/test2 cofi",
image_size=(img_height, img_width),
batch_size=batch_size)
#preparing the model layers
input_shape = (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
n_classes = 1
model = models.Sequential([
resize_and_rescale,
data_augmentation,
layers.Conv2D(32, (3,3), activation='relu', input_shape = input_shape), # 32-layers, (3,3)-filter size,activation function
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, kernel_size = (3,3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Flatten(),
layers.Dense(64, activation='relu'),
layers.Dense(n_classes, activation='sigmoid') # softmax helps to normalize the probabilities in the dense layer
])
model.build(input_shape=input_shape)
#Training the model
metrics = [TruePositives(name='TruePov'), FalsePositives(name='FalsePov'),
TrueNegatives(name='TrueNeg'), FalseNegatives(name='FalseNeg'),
BinaryAccuracy(name='accuracy'), Precision(name='Precision'), Recall(name='recall'), AUC(name='auc')]
# Using the adam optimizer to track the gradient descent in the training process
model.compile(
optimizer= Adam(learning_rate = 0.01),
loss = BinaryCrossentropy(),
metrics=metrics
)
history = model.fit(
train_ds,
epochs=EPOCHS,
batch_size=BATCH_SIZE,
verbose=1,
validation_data=val_ds
)
# Building a Confusion Matrix
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
sns.set_style('darkgrid')
classes=test_ds.class_names # ordered list of class names
class_names = ['Arabica AA', 'Poor Arabica Quality']
ytrue=[]
for images, label in test_ds:
for e in label:
ytrue.append(classes[e]) # list of class names associated with each image file in test dataset
ypred=[]
errors=0
count=0
preds=model.predict(test_ds, verbose=1) # predict on the test data
for i, p in enumerate(preds):
count +=1
index=np.argmax(p) # get index of prediction with highest probability
klass=classes[index]
ypred.append(klass)
if klass != ytrue[i]:
errors +=1
acc= (count-errors)* 100/count
msg=f'there were {count-errors} correct predictions in {count} tests for an accuracy of {acc:6.2f} % '
print(msg)
ypred=np.array(ypred)
ytrue=np.array(ytrue)
if len(classes)<= 2: # if more than 30 classes plot is not useful to cramed
# create a confusion matrix
cm = confusion_matrix(ytrue, ypred )
length=len(classes)
if length<8:
fig_width=5
fig_height=5
else:
fig_width= int(length * .4)
fig_height= int(length * .4)
plt.figure(figsize=(fig_width, fig_height))
sns.heatmap(cm, annot=True, vmin=0, fmt='g', cmap='Blues', cbar=False)
plt.xticks(np.arange(length)+.3, classes, rotation= 90)
plt.yticks(np.arange(length)+.3, classes, rotation=0)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
clr = classification_report(ytrue, ypred, target_names=class_namz)
print("Classification Report:\n----------------------\n", clr)
The output of the Confusion Matrix
enter image description here

custom metric function with additional parameter

def custom_metric(y_prem):
def score_func(y_true, y_pred):
diff = y_pred - y_true
return tf.reduce_sum(diff[y_prem>=y_pred])
return score_func
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(32, input_shape=[len(X_train[0, :])], activation='tanh'),
tf.keras.layers.Dense(8, input_shape=[len(X_train[0, :])], activation='linear'),
tf.keras.layers.Dense(4, input_shape=[len(X_train[0, :])], activation='tanh'),
tf.keras.layers.Dense(1, activation='relu'),
])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[custom_metric(y_prem)])
model.summary()
model.fit(X_train_minmax, y_train, epochs=30, batch_size=len(y_train))
y_prem and y_train are both the same size(50646)
I have tried to define this custom metric function where y_prem is a vector in the size of the prediction. I want to sum the diff between the pred and the true only on the indexes where the pred is lower than y_prem but when I trained the model I received an error message:
File "C:/Users/zehavi kelman/PycharmProjects/Accident_predicting/simpego_test.py", line 61, in score_func *
return K.sum(diff[y_prem>=y_pred])
ValueError: Shapes (50646, 1) and (50646, 50646) are incompatible
How can I fix that?
I am not sure of what you want to do but I implemented a reproducible example that do not output an error message (pay attention to the x and y shapes):
import tensorflow as tf
x = tf.random.uniform(shape=[50646, 5], minval=0, maxval=1)
y = tf.random.uniform(shape=[50646, 1], minval=0, maxval=1)
y_prem = tf.random.uniform(shape=[50646, 1], minval=0, maxval=1)
def custom_metric(y_prem):
def score_func(y_true, y_pred):
diff = y_pred - y_true
return tf.reduce_sum(diff[y_prem>=y_pred])
return score_func
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(32, input_shape=[len(x[0, :])], activation='tanh'),
tf.keras.layers.Dense(8, activation='linear'),
tf.keras.layers.Dense(4, activation='tanh'),
tf.keras.layers.Dense(1, activation='relu'),
])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[custom_metric(y_prem)])
model.summary()
model.fit(x, y, epochs=30, batch_size=len(y))

Is there a way to print the calculated max gradient of each layer for a given mini-batch?

I am implementing a fully-connected model for classification using the MNIST dataset. A part of the code is the following:
model=tf.keras.models.Sequential([
tf.keras.layers.Input(shape=(28, 28, 1)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(
loss='categorical_crossentropy',
optimizer=tf.optimizers.SGD(),
metrics=["accuracy"]
)
model.fit(
x_train,
y_train,
batch_size=64,
epochs=3,
validation_data=(x_test, y_test)
)
Is there a way to print the max gradient for each layer for a given mini-batch?
Define a custom training loop instead of invoking compile() and fit().
optimizer=tf.keras.optimizers.Adam(0.001)
loss=tf.keras.losses.SparseCategoricalCrossentropy()
for x, y in zip(x_train, y_train):
with tf.GradientTape() as tape:
predictions = model(x)
loss_value = loss(y, predictions)
gradients = tape.gradient(loss_value, model.trainable_weights)
grads_and_vars = zip(gradients, model.trainable_weights)
optimizer.apply_gradients(grads_and_vars)
for layer in range(0, 4): # for 4 layers
print('max gradient of layer={}, kernel={}, bias={}'.format(
layer, gradients[layer].numpy().max(), gradients[layer*2+1].numpy().max()))
Check this out : About Keras

Need help in understanding shape error while Building a CNN with sklearn and keras?

I try to load my dataset and design a CNN but when I try to train my model this error came up and I want to know how can I fix this?
ValueError: The channel dimension of the inputs should be defined. Found None.
and this is the way I split data
xtrain, xtest, ytrain, ytest = trian_test_split(images_total,
image_labels,
stratify=image_labels,
random_state=1234,
test_size=0.2)
xvalid, xtest, yvalid, ytest= trian_test_split(xtest,
ytest,
stratify=ytest,
random_state=1234,
test_size=0.5)
def read_img(path, label):
file = tf.io.read_file(path)
img = tf.image.decode_png(file)
img = tf.image.resize(img, (32, 32))
return img, label
train_dataset = tf.data.Dataset.from_tensor_slices((xtrain, ytrain))
train_dataset = train_dataset.map(read_img).batch(batch_size)
valid_dataset = tf.data.Dataset.from_tensor_slices((xvalid, yvalid))
valid_dataset = valid_dataset.map(read_img).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((xtest, ytest))
test_dataset = test_dataset.map(read_img).batch(batch_size)
And this is my model
num_classes = 26
model = tf.keras.Sequential([
tf.keras.layers.experimental.preprocessing.Resizing(32, 32),
tf.keras.layers.experimental.preprocessing.Rescaling(1./255),
tf.keras.layers.Conv2D(32, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(32, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(32, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
layers.Dropout(0.2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(num_classes)
])
model.compile(
optimizer='adam',
loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
model.fit(train_dataset, epochs=5, validation_data=valid_dataset)
when I try to fit this model the error came up and I can't fix this
You haven't passed y label to model while calling, model needs both and x and y label to train accordingly,
Here i added an example
please take a pause and go through videos of keras architecture and documentation in keras io these would give more information about deep learning modelling
once after you got familiar with all, jump to modelling
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=3)

Keras - min and max in predicting Y

How can i set min and max values so the model only predicts Y output between them?
model = Sequential([
Dense(units=100, input_shape=(3, ), activation='tanh'),
Dense(units=18, activation='tanh'),
Dense(units=1, activation='tanh'),
Dense(units=1, activation='softmax')
])
opt = keras.optimizers.SGD(learning_rate=0.0001, momentum=0.9999, nesterov=False)
model.compile(optimizer=opt, loss='mae', metrics=[tf.keras.metrics.MeanAbsoluteError()])
model.fit(df, target, epochs=300, shuffle=False, verbose=1, callbacks=[estop, rlronp], validation_split=0.2)
tf.clip_by_value:
tf.clip_by_value(
t, clip_value_min, clip_value_max, name=None
)
In your code:
Dense(units=1, activation=lambda x: tf.clip_by_value(x, -5, 5))

Categories

Resources