I am new to tensorflow-2 and I was starting my learning curve, with the follow simple Linear-Regression model:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Make data
num_samples, w, b = 20, 0.5, 2
xs = np.asarray(range(num_samples))
ys = np.asarray([x*w + b + np.random.normal() for x in range(num_samples)])
xts = tf.convert_to_tensor(xs, dtype=tf.float32)
yts = tf.convert_to_tensor(xs, dtype=tf.float32)
plt.plot(xs, ys, 'ro')
class Linear(tf.keras.Model):
def __init__(self, name='linear', **kwargs):
super().__init__(name='linear', **kwargs)
self.w = tf.Variable(0, True, name="w", dtype=tf.float32)
self.b = tf.Variable(1, True, name="b", dtype=tf.float32)
def call(self, inputs):
return self.w*inputs + self.b
class Custom(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
if epoch % 20 == 0:
preds = self.model.predict(xts)
plt.plot(xs, preds, label='{} {:7.2f}'.format(epoch, logs['loss']))
print('The average loss for epoch {} is .'.format(epoch, logs['loss']))
x = tf.keras.Input(dtype=tf.float32, shape=[])
#model = tf.keras.Sequential([tf.keras.layers.Dense(units=1, input_shape=[1])])
model = Linear()
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='MSE')
model.fit(x=xts, y=yts, verbose=1, batch_size=4, epochs=250, callbacks=[Custom()])
plt.legend()
plt.show()
For a reason I don't understand it seems like my model is not fitting the curve.
I also tried with keras.layers.Dense(1) and I had the same exact result.
Also it seems like the results don't correspond to a proper loss function, as around epoch 120 the model should have less loss than on 250.
Can you maybe help me understand what I am doing wrong?
Thanks a lot!
There is a small bug in your code as xts and yts are identical to each other, i.e. you wrote
xts = tf.convert_to_tensor(xs, dtype=tf.float32)
yts = tf.convert_to_tensor(xs, dtype=tf.float32)
instead of
xts = tf.convert_to_tensor(xs, dtype=tf.float32)
yts = tf.convert_to_tensor(ys, dtype=tf.float32)
which is why the loss doesn't make sense. Once this has been fixed the results are as expected, see the plot below.
Related
I am trying to implement Ridge Regression in pytorch, defining the loss function and plotting said function over different iterations. The only issue is, I keep getting an error code: mat1 and mat2 shapes cannot be multiplied (1000x10 and 1x1). I would like to convert the second matrix to a 1x10 in order to complete the code but I can't seem to get it to work.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
n = 1000
p = 10
mean = np.zeros((p))
val = 0.8
cov = np.ones((p,p))*val
cov = cov + np.eye(p)*(1-val)
np.random.seed(10)
X = np.random.multivariate_normal(mean, cov, n)
theta_true = np.concatenate((np.ones((5,1)), np.zeros((5,1))),axis=0)
delta=0.5
Sigma = np.eye(n,n,k=-1)*0.4 + np.eye(n,n)*1 + np.eye(n,n,k=1)*0.4
mean = np.zeros(n)
e = np.random.multivariate_normal(mean, Sigma, 1)
y=X#theta_true + delta*e.T
import torch
X_t = torch.from_numpy(X).float()
y_t = torch.from_numpy(y).float()
Sigma_t = torch.from_numpy(Sigma).float()
import torch.nn as nn
import torch.nn.functional as F
class MyLinear(nn.Module):
def __init__(self):
super(MyLinear, self).__init__()
self.linear = nn.Linear(1, 1)
def forward(self, x):
out = self.linear(x)
return out
def L2_norm(model):
return torch.sum(list(model.parameters())[0]**2)
def L1_norm(model):
return torch.sum(torch.abs(list(model.parameters())[0]))
def ridge_loss(y_pred, y_true, model, lambda_):
mse = F.mse_loss(y_pred, y_true)
regularization = lambda_ * L2_norm(model)
return mse + regularization
import matplotlib.pyplot as plt
model = MyLinear()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
lambda_ = 0.1
num_epochs = 1000
loss_values = []
for epoch in range(num_epochs):
optimizer.zero_grad()
y_pred = model(X_t)
loss = ridge_loss(y_pred, y_t, model, lambda_)
loss_values.append(loss.item())
loss.backward()
optimizer.step()
plt.plot(loss_values)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Ridge Regression Loss over Iterations')
plt.show()
I tried changing the theta_true definition to transform the matrix but the same error occurred.
theta_true = np.concatenate((np.ones((5,1)), np.zeros((5,1)))).reshape(10, 1)
Your Linear layer in MyLinear (line 37) is what is causing the issue.
self.linear = nn.Linear(1, 1)
means 1 input channel, one output channel, but x, as you have it here has shape (1000, 10), meaning it has 10 channels. So you will need to change that line to
self.linear = nn.Linear(10, 1)
that will do the trick, here is the image I get with that change:
I am a noob in programming who tried to study machine learning. I used tensorflow for Python. Here's the code, written (but not 100% copied) with official tensorflow guide (here's it https://www.tensorflow.org/guide/basics). I can't see the final graph with the results after training. I've tried two methods of training and both share the same problem. Could anyone help me?
import matplotlib as mp
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as pl
mp.rcParams["figure.figsize"] = [20, 10]
precision = 500
x = tf.linspace(-10.0, 10.0, precision)
def y(x): return 4 * np.sin(x - 1) + 3
newY = y(x) + tf.random.normal(shape=[precision])
class Model(tf.keras.Model):
def __init__(self, units):
super().__init__()
self.dense1 = tf.keras.layers.Dense(units = units, activation = tf.nn.relu, kernel_initializer=tf.random.normal, bias_initializer=tf.random.normal)
self.dense2 = tf.keras.layers.Dense(1)
def __call__(self, x, training = True):
x = x[:, tf.newaxis]
x = self.dense1(x)
x = self.dense2(x)
return tf.squeeze(x, axis=1)
model = Model(164)
pl.plot(x, y(x), label = "origin")
pl.plot(x, newY, ".", label = "corrupted")
pl.plot(x, model(x), label = "before training")
""" The first method
vars = model.variables
optimizer = tf.optimizers.SGD(learning_rate = 0.01)
for i in range(1000):
with tf.GradientTape() as tape:
prediction = model(x)
error = (newY-prediction)**2
mean_error = tf.reduce_mean(error)
gradient = tape.gradient(mean_error, vars)
optimizer.apply_gradients(zip(gradient, vars))
"""
model.compile(loss = tf.keras.losses.MSE, optimizer = tf.optimizers.SGD(learning_rate = 0.01))
model.fit(x, newY, epochs=100,batch_size=32,verbose=0)
pl.plot(x, model(x), label = "after training")
pl.legend()
pl.show()
I copied your code and investigated it. Your model returns NaN loss during training, I removed kernel and bias initializers and it works. For now I don't know what's wrong with your initialization. It seems that some weights got initialized with NaNs which then made the predictions become NaNs, hence you couldn't plot them.
Update: use the initializers module (like tensorflow.initializers or tensorflow.keras.initializers, not tensorflow.random). For example, use kernel_initializer=tf.initializers.random_normal instead of what you have.
As I can see, your third graph and your fourth graph are the same. They are
pl.plot(x, model(x), label = "before training") and pl.plot(x, model(x), label = "after training") You can figure out that the x-axis and y-axis data of two graphs are the same.
Hope my answer is helpful to you!
I am making a simple PyTorch neural net to approximate the sine function on x = [0, 2pi]. This is a simple architecture I use with different deep learning libraries to test whether I understand how to use it or not. The neural net, when untrained, always produces a straight horizontal line, and when trained, produces a straight line at y = 0. In general, it always produces a straight line at y = (The mean of the function). This leads me to believe something is wrong with the forward prop portion of it, as the boundary should not just be a straight line when untrained. Here is the code for the net:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.model = nn.Sequential(
nn.Linear(1, 20),
nn.Sigmoid(),
nn.Linear(20, 50),
nn.Sigmoid(),
nn.Linear(50, 50),
nn.Sigmoid(),
nn.Linear(50, 1)
)
def forward(self, x):
x = self.model(x)
return x
Here is the training loop
def train(net, trainloader, valloader, learningrate, n_epochs):
net = net.train()
loss = nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr = learningrate)
for epoch in range(n_epochs):
for X, y in trainloader:
X = X.reshape(-1, 1)
y = y.view(-1, 1)
optimizer.zero_grad()
outputs = net(X)
error = loss(outputs, y)
error.backward()
#net.parameters() net.parameters() * learningrate
optimizer.step()
total_loss = 0
for X, y in valloader:
X = X.reshape(-1, 1).float()
y = y.view(-1, 1)
outputs = net(X)
error = loss(outputs, y)
total_loss += error.data
print('Val loss for epoch', epoch, 'is', total_loss / len(valloader) )
it is called as:
net = Net()
losslist = train(net, trainloader, valloader, .0001, n_epochs = 4)
Where trainloader and valloader are the training and validation loaders. Can anyone help me see what's wrong with this? I know its not the learning rate since its the one I use in other frameworks, and I know its not the fact im using SGD or sigmoid activation functions, although I have a suspicion the error is in the activation functions somewhere.
Does anyone know how to fix this? Thanks.
After a while playing with some hyperparameters, modifying the net and changing the optimizer (following this excellent recipe) I ended up with changing the line optimizer = torch.optim.SGD(net.parameters(), lr = learningrate) to optimizer = torch.optim.Adam(net.parameters()) (the default optimizer parameters was used), running for 100 epochs and batch size equal to 1.
The following code was used (tested on CPU only):
import torch
import torch.nn as nn
from torch.utils import data
import numpy as np
import matplotlib.pyplot as plt
# for reproducibility
torch.manual_seed(0)
np.random.seed(0)
class Dataset(data.Dataset):
def __init__(self, init, end, n):
self.n = n
self.x = np.random.rand(self.n, 1) * (end - init) + init
self.y = np.sin(self.x)
def __len__(self):
return self.n
def __getitem__(self, idx):
x = self.x[idx, np.newaxis]
y = self.y[idx, np.newaxis]
return torch.Tensor(x), torch.Tensor(y)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.model = nn.Sequential(
nn.Linear(1, 20),
nn.Sigmoid(),
nn.Linear(20, 50),
nn.Sigmoid(),
nn.Linear(50, 50),
nn.Sigmoid(),
nn.Linear(50, 1)
)
def forward(self, x):
x = self.model(x)
return x
def train(net, trainloader, valloader, n_epochs):
loss = nn.MSELoss()
# Switch the two following lines and run the code
# optimizer = torch.optim.SGD(net.parameters(), lr = 0.0001)
optimizer = torch.optim.Adam(net.parameters())
for epoch in range(n_epochs):
net.train()
for x, y in trainloader:
optimizer.zero_grad()
outputs = net(x).view(-1)
error = loss(outputs, y)
error.backward()
optimizer.step()
net.eval()
total_loss = 0
for x, y in valloader:
outputs = net(x)
error = loss(outputs, y)
total_loss += error.data
print('Val loss for epoch', epoch, 'is', total_loss / len(valloader) )
net.eval()
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
def plot_result(ax, dataloader):
out, xx, yy = [], [], []
for x, y in dataloader:
out.append(net(x))
xx.append(x)
yy.append(y)
out = torch.cat(out, dim=0).detach().numpy().reshape(-1)
xx = torch.cat(xx, dim=0).numpy().reshape(-1)
yy = torch.cat(yy, dim=0).numpy().reshape(-1)
ax.scatter(xx, yy, facecolor='green')
ax.scatter(xx, out, facecolor='red')
xx = np.linspace(0.0, 3.14159*2, 1000)
ax.plot(xx, np.sin(xx), color='green')
plot_result(ax1, trainloader)
plot_result(ax2, valloader)
plt.show()
train_dataset = Dataset(0.0, 3.14159*2, 100)
val_dataset = Dataset(0.0, 3.14159*2, 30)
params = {'batch_size': 1,
'shuffle': True,
'num_workers': 4}
trainloader = data.DataLoader(train_dataset, **params)
valloader = data.DataLoader(val_dataset, **params)
net = Net()
losslist = train(net, trainloader, valloader, n_epochs = 100)
Result with Adam optimizer:
Result with SGD optimizer:
In general, it always produces a straight line at y = (The mean of the function).
Usually, this means that the NN has only successfully trained the final layer so far. You need to train it for longer or with better optimizations, as ViniciusArruda shows here.
Edit: To explain further.. When only the final layer has been trained, the NN is effectively trying to guess the output y with no knowledge of the input X. In this case, the best guess it can make is the mean value. That way, it can minimize its MSE loss.
I am trying to build a custom loss function that takes the previous output(output from the previous iteration) from the network and use it with the current output.
Here is what I am trying to do, but I don't know how to complete it
def l_loss(prev_output):
def loss(y_true, y_pred):
pix_loss = K.mean(K.square(y_pred - y_true), axis=-1)
pase = K.variable(100)
diff = K.mean(K.abs(prev_output - y_pred))
movement_loss = K.abs(pase - diff)
total_loss = pix_loss + movement_loss
return total_loss
return loss
self.model.compile(optimizer=Adam(0.001, beta_1=0.5, beta_2=0.9),
loss=l_loss(?))
I hope you can help me.
This is what I tried:
from tensorflow import keras
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential
from tensorflow.keras import backend as K
class MovementLoss(object):
def __init__(self):
self.var = None
def __call__(self, y_true, y_pred, sample_weight=None):
mse = K.mean(K.square(y_true - y_pred), axis=-1)
if self.var is None:
z = np.zeros((32,))
self.var = K.variable(z)
delta = K.update(self.var, mse - self.var)
return mse + delta
def make_model():
model = Sequential()
model.add(Dense(1, input_shape=(4,)))
loss = MovementLoss()
model.compile('adam', loss)
return model
model = make_model()
model.summary()
Using an example test data.
import numpy as np
X = np.random.rand(32, 4)
POLY = [1.0, 2.0, 0.5, 3.0]
def test_fn(xi):
return np.dot(xi, POLY)
Y = np.apply_along_axis(test_fn, 1, X)
history = model.fit(X, Y, epochs=4)
I do see the loss function oscillate in a way that appears to me is influenced by the last batch delta. Note that the loss function details are not according to your application.
The crucial step is that the K.update step must be part of the graph (as far as I understand it).
That is achieved by:
delta = K.update(var, delta)
return x + delta
H1, I am try to make NN model that satisfy simple formula.
y = X1^2 + X2^2
But when i use CrossEntropyLoss for loss function, i get two different error message.
First, when i set code like this
x = torch.randn(batch_size, 2)
y_hat = model(x)
y = answer(x).long()
optimizer.zero_grad()
loss = loss_func(y_hat, y)
loss.backward()
optimizer.step()
i get this message
RuntimeError: Assertion `cur_target >= 0 && cur_target < n_classes' failed. at
c:\programdata\miniconda3\conda-bld\pytorch_1533090623466\work\aten\src\thnn\generic/Cl
assNLLCriterion.c:93
Second, I change code like this
x = torch.randn(batch_size, 2)
y_hat = model(x)
y = answer(x).long().view(batch_size,1,1)
optimizer.zero_grad()
loss = loss_func(y_hat, y)
loss.backward()
optimizer.step()
then i get message like
RuntimeError: multi-target not supported at c:\programdata\miniconda3\conda-bld\pytorch_1533090623466\work\aten\src\thnn\generic/ClassNLLCriterion.c:21
How can i solve this problem? Thanks.(sorry for my English)
This is my code
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
def answer(x):
y = x[:,0].pow(2) + x[:,1].pow(2)
return y
class Model(nn.Module):
def __init__(self, input_size, output_size):
super(Model, self).__init__()
self.linear1 = nn.Linear(input_size, 10)
self.linear2 = nn.Linear(10, 1)
def forward(self, x):
y = F.relu(self.linear1(x))
y = F.relu(self.linear2(y))
return y
model = Model(2,1)
print(model, '\n')
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001)
batch_size = 3
epoch_n = 100
iter_n = 100
for epoch in range(epoch_n):
loss_avg = 0
for i in range(iter_n):
x = torch.randn(batch_size, 2)
y_hat = model(x)
y = answer(x).long().view(batch_size,1,1)
optimizer.zero_grad()
loss = loss_func(y_hat, y)
loss.backward()
optimizer.step()
loss_avg += loss
loss_avg = loss_avg / iter_n
if epoch % 10 == 0:
print(loss_avg)
if loss_avg < 0.001:
break
Can i make those dataset using dataloader in pytorch? Thanks for your help.
You are using the wrong loss function. CrossEntropyLoss is used for classification problems generally wheread your problem is that of regression. So you should use losses which are meant for regression like tasks like Mean Squared Error Loss, L1 Loss etc. Take a look at this, this, this and this.