I applied PCA on MNIST with a reduced dimensionality of 32. Then, to test it, I created a simple classification network. The train accuracy is good: 96%, but on the other hand, the test accuracy is 2%.
So what's wrong?
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import tensorflow.keras.layers as layers
from tensorflow.keras.models import Sequential
import numpy as np
(x,y),(x2,y2) = mnist.load_data()
y = tf.keras.utils.to_categorical(y)
y2 = tf.keras.utils.to_categorical(y2)
def pca(x):
x = np.reshape(x, (x.shape[0], 784)).astype("float32") / 255.
mean = x.mean(axis=1)
#print(mean)
#print(mean[:,None])
x -= mean[:,None]
s, u, v = tf.linalg.svd(x)
s = tf.linalg.diag(s)
k = 32 # DIM_REDUCED
pca = tf.matmul(u[:,0:k], s[0:k,0:k])
#print(pca)
#print(pca.shape)
return pca
x = pca(x)
x2 = pca(x2)
## BUILD A SUPER SIMPLE CLASSIFIC. NET
model = Sequential()
model.add(layers.Dense(32, activation="relu", input_shape=(32,)))
model.add(layers.Dense(16, activation="relu"))
model.add(layers.Dense(10, activation="softmax"))
model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["acc"])
model.fit(x,y, epochs = 5, verbose = 1, batch_size = 64, validation_data = (x2,y2))
OUTPUT:
Epoch 5/5
60000/60000 [==============================] - 1s 23us/sample - loss: 0.1278 - acc: 0.9626 - val_loss: 11.0141 - val_acc: 0.0202
First, you are appling two differents "pca" for each set. The eigenvectors and eigenvalues of train can be different than test set.
Second, you are using SVD to obtain principal components, but this components is not the result that you want. Use principal axis like a projection matrix to obtain a better/compress representation of the data.
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import tensorflow.keras.layers as layers
from tensorflow.keras.models import Sequential
import numpy as np
(x,y),(x2,y2) = mnist.load_data()
y = tf.keras.utils.to_categorical(y)
y2 = tf.keras.utils.to_categorical(y2)
def pca(x):
x = np.reshape(x, (x.shape[0], 784)).astype("float32") / 255.
mean = x.mean(axis=0)
#print(mean)
#print(mean[:,None])
x -= mean[None, :]
s, u, v = tf.linalg.svd(x)
s = tf.linalg.diag(s)
k = 32 # DIM_REDUCED
projM = v[:, 0:k] #tf.matmul(u[:,0:k], s[0:k,0:k])
return mean, projM
def apply_pca(mean, projM, x):
x = np.reshape(x, (x.shape[0], 784)).astype("float32") / 255.
#print(mean)
#print(mean[:,None])
x -= mean[None, :]
return tf.matmul(x, projM)
mean, projM = pca(x)
x = apply_pca(mean, projM, x)
x2 = apply_pca(mean, projM, x2)
## BUILD A SUPER SIMPLE CLASSIFIC. NET
model = Sequential()
model.add(layers.Dense(32, activation="relu", input_shape=(32,)))
model.add(layers.Dense(16, activation="relu"))
model.add(layers.Dense(10, activation="softmax"))
model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["acc"])
model.fit(x,y, epochs = 5, verbose = 1, batch_size = 64, validation_data = (x2,y2))
Related
I'm such a beginner in the context of generative adversarial networks and representation learning.My goal is to train a simple gan using dense layers to generate fashion mnist images.I've tried many codes online which had given successful outputs to everyone but in my case all of them produce the same output which is a blank white image with some dots in particular areas frequently.I also checked the activation functions but they seem to be fine.Also to mention that I run my code in google colab.Here is my code
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow.keras.datasets import fashion_mnist
(X_train, _), (X_test, _) = fashion_mnist.load_data()
def plot_images(images, n_cols):
n_rows = (len(images) - 1) // n_cols + 1
if images.shape[-1] == 1:
images = np.squeeze(images, axis=-1)
plt.figure(figsize=(n_cols, n_rows))
for index, image in enumerate(images):
plt.subplot(n_rows, n_cols, index + 1)
plt.imshow(image, cmap="binary")
plt.axis("off")
plt.show()
codings_size = 100
generator = tf.keras.models.Sequential([
tf.keras.layers.Dense(100, activation="selu", input_shape=[codings_size]),
tf.keras.layers.Dense(150, activation="selu"),
tf.keras.layers.Dense(28 * 28, activation="sigmoid"),
tf.keras.layers.Reshape([28, 28])
])
discriminator = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=[28, 28]),
tf.keras.layers.Dense(150, activation="selu"),
tf.keras.layers.Dense(100, activation="selu"),
tf.keras.layers.Dense(1, activation="sigmoid")
])
gan = tf.keras.models.Sequential([generator, discriminator])
discriminator.compile(loss="binary_crossentropy", optimizer="rmsprop")
discriminator.trainable = False
gan.compile(loss="binary_crossentropy", optimizer="rmsprop")
batch_size = 32
n_epochs = 50
for epoch in range(n_epochs):
print("Epoch {}/{}".format(epoch + 1, n_epochs))
for i in range(len(X_train) // batch_size):
X_batch = X_train[i: i+batch_size]
noise = np.random.normal(0, 1, size=[batch_size, codings_size])
generated_images = generator(noise)
X_fake_and_real = np.concatenate([generated_images, X_batch], axis=0)
y1 = np.array([[0.]] * batch_size + [[1.]] * batch_size)
discriminator.trainable = True
discriminator.train_on_batch(X_fake_and_real, y1)
noise = np.random.normal(0, 1, size=[batch_size, codings_size])
y2 = np.array([[1.]] * batch_size)
discriminator.trainable = False
gan.train_on_batch(noise, y2)
plot_images(generated_images, 8)
Sounds like mode collapse where generator keeps producing same or similar outputs and discriminator fails to reject it. It also could be due to some problem in your implementation.
Try training the discriminator first for one or your chosen K steps before training generator.
Keras have many have simple examples for GAN. You can modify below DCGAN implementation to work with fashion mnist.
https://keras.io/examples/generative/dcgan_overriding_train_step/
https://keras.io/examples/generative/
I am new to machine learning and keras library and I made a CNN code for regression like below.
%matplotlib inline
from __future__ import division
import numpy as np
from numpy.random import rand
import matplotlib.pyplot as plt
def initial_spin_state(N):
state = np.random.choice((0.11111, 0.99999), (N, N))
return state
def metropolis_algorithm(config, beta):
N = len(config)
for i in range(N):
for j in range(N):
a = np.random.randint(0, N)
b = np.random.randint(0, N)
s = config[a, b]
near=config[(a+1)%N,b] + config[a,(b+1)%N] + config[(a-1)%N,b] + config[a,(b-1)%N]
delta = 2 * s *near
if delta < 0:
s *= -1
elif rand() < np.exp(-delta * beta):
s *= -1
config[a, b] = s
return config
def get_energy(config):
energy = 0
N = len(config)
for i in range(N):
for j in range(N):
S = config[i, j]
near = config[(i+1)%N, j] + config[i,(j+1)%N] + config[(i-1)%N, j] + config[i,(j-1)%N]
energy += near*S
return energy
x_train = []
y_train = []
for i in range(50000):
config = initial_spin_state(16)
energy = get_energy(config)
x_train.append(config)
y_train.append(energy)
x_train = np.array(x_train)
y_train = np.array(y_train)
print(x_train.shape)
print(y_train.shape)
x_test = []
y_test = []
for j in range(20000):
config = initial_spin_state(16)
energy = get_energy(config)
x_test.append(config)
y_test.append(energy)
x_test = np.array(x_test)
y_test = np.array(y_test)
print(x_test.shape)
print(y_test.shape)
x_train = x_train.reshape(50000, 16, 16, 1)
x_test = x_test.reshape(20000, 16, 16, 1)
print(x_train.shape)
print(x_test.shape)
import numpy as np
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adadelta
from keras.regularizers import l2
model = Sequential()
model.add(Conv2D(32, (2, 2), input_shape = (16, 16, 1), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Conv2D(16, (2, 2), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Flatten())
model.add(Dense(512, activation = 'relu'))
#model.add(Dense(1024, activation = 'relu'))
model.add(Dense(1))
np.random.seed(0)
model.summary()
model.compile(loss = "mse", metrics = ['accuracy'], optimizer = 'adam')
%%time
hist = model.fit(x_train, y_train, epochs = 200, batch_size = 500,
validation_data = (x_test, y_test), verbose = 2)
import matplotlib.pyplot as plt
plt.plot(hist.history['acc'], '_b', label = "training")
plt.plot(hist.history['val_acc'], 'r:', label = "test")
plt.legend()
plt.grid("on")
plt.show()
this code is for image input, and contiuous energy value output.
so if I put a image(ising configuration) to CNN, it should predict a energy for the configuration.
the problem is..
when I train CNN, training loss and validation loss is decreased very slowly.
of course, training accuracy and validation accuracy is incread very slowly.
and, sometimes only training accuracy is increasd, val-accuracy is not increased.
genious guys.. what`s wrong with my code??
teach me plz
First of all, since you are doing regression problem, I dont think its a good idea to use acc as your metric, instead, you might consider using mean absolute error mae as your matric.
The loss you are using is mse (mean squared error), so the value would be pretty large especially when you are not normalising your y values. However, after running ~25 epochs of your provided code, the validation loss dropped to 290.xx with 13.xx of mae (And it is not converging yet). And I tried to use your model to predict some validation data, it works fine. Maybe you should test your model before you assume there is something went wrong.
I am new to Generative-Adversarial Networks (GAN) and Neural Networks in general.
Using Python and Keras, I want to apply GANs for Time-Series Prediction. My final goal also includes to detect anomalies in the time series.
I'm using the popular Air-Passangers time series data.
Here is the code I am using for time-series prediction. However, the result I get using GANs is bit uninterpretable for me and I think it needs some improvement.
Thanks for your help.
from __future__ import print_function, division
from keras.datasets import mnist
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
#import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
import pandas as pd
import sys
import numpy as np
class GAN():
def __init__(self):
self.data_rows = 1
self.data_cols = 1
self.data_shape = (self.data_rows, self.data_cols)
self.latent_dim = 48
optimizer = Adam(0.0002, 0.5)
self.discriminator = self.build_discriminator()
self.discriminator.compile(loss='binary_crossentropy',
optimizer=optimizer,
metrics=['accuracy'])
self.generator = self.build_generator()
z = Input(shape=(self.latent_dim,))
data = self.generator(z)
self.discriminator.trainable = False
validity = self.discriminator(data)
self.combined = Model(z, validity)
self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)
def build_generator(self):
model = Sequential()
model.add(Dense(256, input_dim=self.latent_dim))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(512))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(1024))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(np.prod(self.data_shape), activation='linear'))
model.add(Reshape(self.data_shape))
model.summary()
noise = Input(shape=(self.latent_dim,))
data = model(noise)
return Model(noise, data)
def build_discriminator(self):
model = Sequential()
model.add(Flatten(input_shape=self.data_shape))
model.add(Dense(512))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(256))
model.add(LeakyReLU(alpha=0.2))
model.add(Dense(1, activation='sigmoid'))
model.summary()
data = Input(shape=self.data_shape)
validity = model(data)
return Model(data, validity)
def train(self, epochs, batch_size=128, sample_interval=50):
df = pd.read_csv("AirPassengers.csv")
ts = df[["#Passengers"]]
X_train = ts.as_matrix()
# Rescale -1 to 1
#X_train = X_train / 127.5 - 1.
X_train = np.expand_dims(X_train, axis=3)
print("X_train")
print(X_train.shape)
#print(X_train)
valid = np.ones((batch_size, 1))
fake = np.zeros((batch_size, 1))
for epoch in range(epochs):
idx = np.random.randint(0, X_train.shape[0], batch_size)
data_s = X_train[idx]
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
gen_data = self.generator.predict(noise)
d_loss_real = self.discriminator.train_on_batch(data_s, valid)
d_loss_fake = self.discriminator.train_on_batch(gen_data, fake)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
# ---------------------
# Train Generator
# ---------------------
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
# Train the generator (to have the discriminator label samples as valid)
g_loss = self.combined.train_on_batch(noise, valid)
print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
if epoch % sample_interval == 0:
self.plot_gan_result(epoch, batch_size)
c = X_train.reshape(144, 1)
fig, axs = plt.subplots()
axs.plot(c, color = "blue", label = 'true')
def plot_gan_result(self, epoch, batch_size):
noise = np.random.normal(0, 1, (batch_size, self.latent_dim))
gen_data = self.generator.predict(noise)
b = gen_data.reshape(24, 1)
fig, axs = plt.subplots()
print("noise shape")
print(noise.shape)
print(noise[0])
axs.plot(b, color = "red", label = 'generated')
if __name__ == '__main__':
gan = GAN()
gan.train(epochs=30, batch_size=24, sample_interval=200)
After spending days failing to use neural network for Q learning, I decided to go back to the basics and do a simple function approximation to see if everything was working correctly and see how some parameters affected the learning process.
Here is the code that I came up with
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import random
import numpy
from sklearn.preprocessing import MinMaxScaler
regressor = Sequential()
regressor.add(Dense(units=20, activation='sigmoid', kernel_initializer='uniform', input_dim=1))
regressor.add(Dense(units=20, activation='sigmoid', kernel_initializer='uniform'))
regressor.add(Dense(units=20, activation='sigmoid', kernel_initializer='uniform'))
regressor.add(Dense(units=1))
regressor.compile(loss='mean_squared_error', optimizer='sgd')
#regressor = ExtraTreesRegressor()
N = 5000
X = numpy.empty((N,))
Y = numpy.empty((N,))
for i in range(N):
X[i] = random.uniform(-10, 10)
X = numpy.sort(X).reshape(-1, 1)
for i in range(N):
Y[i] = numpy.sin(X[i])
Y = Y.reshape(-1, 1)
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()
X = X_scaler.fit_transform(X)
Y = Y_scaler.fit_transform(Y)
regressor.fit(X, Y, epochs=2, verbose=1, batch_size=32)
#regressor.fit(X, Y.reshape(5000,))
x = numpy.mgrid[-10:10:100*1j]
x = x.reshape(-1, 1)
y = numpy.mgrid[-10:10:100*1j]
y = y.reshape(-1, 1)
x = X_scaler.fit_transform(x)
for i in range(len(x)):
y[i] = regressor.predict(numpy.array([x[i]]))
plt.figure()
plt.plot(X_scaler.inverse_transform(x), Y_scaler.inverse_transform(y))
plt.plot(X_scaler.inverse_transform(X), Y_scaler.inverse_transform(Y))
The problem is that all my predictions are around 0 in value. As you can see I used an ExtraTreesRegressor from sklearn (commented lines) to check that the protocol is actually correct. So what is wrong with my neural network ? Why is it not working ?
(The actual problem that I'm trying to solve is to compute the Q function for the mountain car problem using neural network. How is it different from this function approximator ?)
With these changes:
Activations to relu
Remove kernel_initializer (i.e. leave the default 'glorot_uniform')
Adam optimizer
100 epochs
i.e.
regressor = Sequential()
regressor.add(Dense(units=20, activation='relu', input_dim=1))
regressor.add(Dense(units=20, activation='relu'))
regressor.add(Dense(units=20, activation='relu'))
regressor.add(Dense(units=1))
regressor.compile(loss='mean_squared_error', optimizer='adam')
regressor.fit(X, Y, epochs=100, verbose=1, batch_size=32)
and the rest of your code unchanged, here is the result:
Tinker, again and again...
A more concise version of your code that works:
def data_gen():
while True:
x = (np.random.random([1024])-0.5) * 10
y = np.sin(x)
yield (x,y)
regressor = Sequential()
regressor.add(Dense(units=20, activation='tanh', input_dim=1))
regressor.add(Dense(units=20, activation='tanh'))
regressor.add(Dense(units=20, activation='tanh'))
regressor.add(Dense(units=1, activation='linear'))
regressor.compile(loss='mse', optimizer='adam')
regressor.fit_generator(data_gen(), epochs=3, steps_per_epoch=128)
x = (np.random.random([1024])-0.5)*10
x = np.sort(x)
y = np.sin(x)
plt.plot(x, y)
plt.plot(x, regressor.predict(x))
plt.show()
Changes made: replacing low layer activations with hyperbolic tangents, replacing the static dataset with a random generator, replacing sgd with adam. That said, there still are problems with other parts of your code that I haven't been able to locate yet (most likely your scaler and random process).
I managed to get a good approximation by changing the architecture and the training as in the following code. It's a bit of an overkill but at least I know where the problem was coming from.
from keras.models import Sequential
from keras.layers import Dense
import matplotlib.pyplot as plt
import random
import numpy
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import ExtraTreesRegressor
from keras import optimizers
regressor = Sequential()
regressor.add(Dense(units=500, activation='sigmoid', kernel_initializer='uniform', input_dim=1))
regressor.add(Dense(units=500, activation='sigmoid', kernel_initializer='uniform'))
regressor.add(Dense(units=1, activation='sigmoid'))
regressor.compile(loss='mean_squared_error', optimizer='adam')
#regressor = ExtraTreesRegressor()
N = 5000
X = numpy.empty((N,))
Y = numpy.empty((N,))
for i in range(N):
X[i] = random.uniform(-10, 10)
X = numpy.sort(X).reshape(-1, 1)
for i in range(N):
Y[i] = numpy.sin(X[i])
Y = Y.reshape(-1, 1)
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()
X = X_scaler.fit_transform(X)
Y = Y_scaler.fit_transform(Y)
regressor.fit(X, Y, epochs=50, verbose=1, batch_size=2)
#regressor.fit(X, Y.reshape(5000,))
x = numpy.mgrid[-10:10:100*1j]
x = x.reshape(-1, 1)
y = numpy.mgrid[-10:10:100*1j]
y = y.reshape(-1, 1)
x = X_scaler.fit_transform(x)
for i in range(len(x)):
y[i] = regressor.predict(numpy.array([x[i]]))
plt.figure()
plt.plot(X_scaler.inverse_transform(x), Y_scaler.inverse_transform(y))
plt.plot(X_scaler.inverse_transform(X), Y_scaler.inverse_transform(Y))
However I'm still baffled that I found papers saying that they were using only two hidden layers of five neurons to approximate the Q function of the mountain car problem and training their network for only a few minutes and get good results. I will try changing my batch size in my original problem to see what results I can get but I'm not very optimistic
I have a problem which deals with predicting two outputs when given a vector of predictors.
Assume that a predictor vector looks like x1, y1, att1, att2, ..., attn, which says x1, y1 are coordinates and att's are the other attributes attached to the occurrence of x1, y1 coordinates. Based on this predictor set I want to predict x2, y2. This is a time series problem, which I am trying to solve using multiple regresssion.
My question is how do I setup keras, which can give me 2 outputs in the final layer.
from keras.models import Model
from keras.layers import *
#inp is a "tensor", that can be passed when calling other layers to produce an output
inp = Input((10,)) #supposing you have ten numeric values as input
#here, SomeLayer() is defining a layer,
#and calling it with (inp) produces the output tensor x
x = SomeLayer(blablabla)(inp)
x = SomeOtherLayer(blablabla)(x) #here, I just replace x, because this intermediate output is not interesting to keep
#here, I want to keep the two different outputs for defining the model
#notice that both left and right are called with the same input x, creating a fork
out1 = LeftSideLastLayer(balbalba)(x)
out2 = RightSideLastLayer(banblabala)(x)
#here, you define which path you will follow in the graph you've drawn with layers
#notice the two outputs passed in a list, telling the model I want it to have two outputs.
model = Model(inp, [out1,out2])
model.compile(optimizer = ...., loss = ....) #loss can be one for both sides or a list with different loss functions for out1 and out2
model.fit(inputData,[outputYLeft, outputYRight], epochs=..., batch_size=...)
You can make a model with multiple output with
the Functional API
by subclassing tf.keras.Model.
Here's an example of dual outputs (regression and classification) on the Iris Dataset, using the Functional API:
from sklearn.datasets import load_iris
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input, Model
import tensorflow as tf
data, target = load_iris(return_X_y=True)
X = data[:, (0, 1, 2)]
Y = data[:, 3]
Z = target
inputs = Input(shape=(3,), name='input')
x = Dense(16, activation='relu', name='16')(inputs)
x = Dense(32, activation='relu', name='32')(x)
output1 = Dense(1, name='cont_out')(x)
output2 = Dense(3, activation='softmax', name='cat_out')(x)
model = Model(inputs=inputs, outputs=[output1, output2])
model.compile(loss={'cont_out': 'mean_absolute_error',
'cat_out': 'sparse_categorical_crossentropy'},
optimizer='adam',
metrics={'cat_out': tf.metrics.SparseCategoricalAccuracy(name='acc')})
history = model.fit(X, {'cont_out': Y, 'cat_out': Z}, epochs=10, batch_size=8)
Here's a simplified version:
from sklearn.datasets import load_iris
from tensorflow.keras.layers import Dense
from tensorflow.keras import Input, Model
data, target = load_iris(return_X_y=True)
X = data[:, (0, 1, 2)]
Y = data[:, 3]
Z = target
inputs = Input(shape=(3,))
x = Dense(16, activation='relu')(inputs)
x = Dense(32, activation='relu')(x)
output1 = Dense(1)(x)
output2 = Dense(3, activation='softmax')(x)
model = Model(inputs=inputs, outputs=[output1, output2])
model.compile(loss=['mae', 'sparse_categorical_crossentropy'], optimizer='adam')
history = model.fit(X, [Y, Z], epochs=10, batch_size=8)
Here's the same example, subclassing tf.keras.Model and with a custom training loop:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model
from sklearn.datasets import load_iris
tf.keras.backend.set_floatx('float64')
iris, target = load_iris(return_X_y=True)
X = iris[:, :3]
y = iris[:, 3]
z = target
ds = tf.data.Dataset.from_tensor_slices((X, y, z)).shuffle(150).batch(8)
class MyModel(Model):
def __init__(self):
super(MyModel, self).__init__()
self.d0 = Dense(16, activation='relu')
self.d1 = Dense(32, activation='relu')
self.d2 = Dense(1)
self.d3 = Dense(3, activation='softmax')
def call(self, x, training=None, **kwargs):
x = self.d0(x)
x = self.d1(x)
a = self.d2(x)
b = self.d3(x)
return a, b
model = MyModel()
loss_obj_reg = tf.keras.losses.MeanAbsoluteError()
loss_obj_cat = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
loss_reg = tf.keras.metrics.Mean(name='regression loss')
loss_cat = tf.keras.metrics.Mean(name='categorical loss')
error_reg = tf.keras.metrics.MeanAbsoluteError()
error_cat = tf.keras.metrics.SparseCategoricalAccuracy()
#tf.function
def train_step(inputs, y_reg, y_cat):
with tf.GradientTape() as tape:
pred_reg, pred_cat = model(inputs)
reg_loss = loss_obj_reg(y_reg, pred_reg)
cat_loss = loss_obj_cat(y_cat, pred_cat)
gradients = tape.gradient([reg_loss, cat_loss], model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
loss_reg(reg_loss)
loss_cat(cat_loss)
error_reg(y_reg, pred_reg)
error_cat(y_cat, pred_cat)
for epoch in range(50):
for xx, yy, zz in ds:
train_step(xx, yy, zz)
template = 'Epoch {:>2}, SCCE: {:>5.2f},' \
' MAE: {:>4.2f}, SAcc: {:>5.1%}'
print(template.format(epoch+1,
loss_cat.result(),
error_reg.result(),
error_cat.result()))
loss_reg.reset_states()
loss_cat.reset_states()
error_reg.reset_states()
error_cat.reset_states()