Parametric estimation of a Gaussian Mixture Model - python

I am trying to train a model to estimate a GMM. However, the means of the GMM are calculated each time based on a mean_placement parameter. I am following the solution provided here, I'll copy and paste the original code:
import numpy as np
import matplotlib.pyplot as plt
import sklearn.datasets as datasets
import torch
from torch import nn
from torch import optim
import torch.distributions as D
num_layers = 8
weights = torch.ones(8,requires_grad=True)
means = torch.tensor(np.random.randn(8,2),requires_grad=True)
stdevs = torch.tensor(np.abs(np.random.randn(8,2)),requires_grad=True)
parameters = [weights, means, stdevs]
optimizer1 = optim.SGD(parameters, lr=0.001, momentum=0.9)
num_iter = 10001
for i in range(num_iter):
mix = D.Categorical(weights)
comp = D.Independent(D.Normal(means,stdevs), 1)
gmm = D.MixtureSameFamily(mix, comp)
optimizer1.zero_grad()
x = torch.randn(5000,2)#this can be an arbitrary x samples
loss2 = -gmm.log_prob(x).mean()#-densityflow.log_prob(inputs=x).mean()
loss2.backward()
optimizer1.step()
print(i, loss2)
What I would like to do is this:
num_layers = 8
weights = torch.ones(8,requires_grad=True)
means_coef = torch.tensor(10.,requires_grad=True)
means = torch.tensor(torch.dstack([torch.linspace(1,means_coef.detach().item(),8)]*2).squeeze(),requires_grad=True)
stdevs = torch.tensor(np.abs(np.random.randn(8,2)),requires_grad=True)
parameters = [means_coef]
optimizer1 = optim.SGD(parameters, lr=0.001, momentum=0.9)
num_iter = 10001
for i in range(num_iter):
means = torch.tensor(torch.dstack([torch.linspace(1,means_coef.detach().item(),8)]*2).squeeze(),requires_grad=True)
mix = D.Categorical(weights)
comp = D.Independent(D.Normal(means,stdevs), 1)
gmm = D.MixtureSameFamily(mix, comp)
optimizer1.zero_grad()
x = torch.randn(5000,2)#this can be an arbitrary x samples
loss2 = -gmm.log_prob(x).mean()#-densityflow.log_prob(inputs=x).mean()
loss2.backward()
optimizer1.step()
print(i, means_coef)
print(means_coef)
However in this case the parameter is not updated and the grad value is always None. Any ideas how to fix this?

According to your instructions I have re-written your model.
If you run it you can see that all the parameters are changing after the model is optimized. I also have provided the graph of the model at the end. You can simply modify the GMM class as you need if you want to make a new one.
import numpy as np
import matplotlib.pyplot as plt
import sklearn.datasets as datasets
import torch
from torch import nn
from torch import optim
import torch.distributions as D
class GMM(nn.Module):
def __init__(self, weights, base, scale, n_cell=8, shift=0, dim=2):
super(GMM, self).__init__()
self.weight = nn.Parameter(weights)
self.base = nn.Parameter(base)
self.scale = nn.Parameter(scale)
self.grid = torch.arange(1, n_cell+1)
self.shift = shift
self.n_cell = n_cell
self.dim = dim
def trsf_grid(self):
trsf = (
torch.log(self.scale * self.grid + self.shift)
/ torch.log(self.base)
).reshape(-1, 1)
return trsf.expand(self.n_cell, self.dim)
def forward(self, x, std):
means = self.trsf_grid()
mix = D.Categorical(self.weight)
comp = D.Independent(D.Normal(means, std), 1)
gmm = D.MixtureSameFamily(mix, comp)
return -gmm.log_prob(x).mean()
if __name__ == "__main__":
weight = torch.ones(8)
base = torch.tensor(3.)
scale = torch.tensor(1.)
stds = torch.tensor(np.abs(np.random.randn(8,2)),requires_grad=False)
model = GMM(weight, base, scale)
print(list(model.parameters()))
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
for i in range(1000):
optimizer.zero_grad()
x = torch.randn(5000,2)
loss = model(x, stds)
loss.backward()
optimizer.step()
print(list(model.parameters()))
In my case It returned the following parameters:
[Parameter containing:
tensor([1., 1., 1., 1., 1., 1., 1., 1.], requires_grad=True), Parameter containing:
tensor(3., requires_grad=True), Parameter containing:
tensor(1., requires_grad=True)]
[Parameter containing:
tensor([0.7872, 1.1010, 1.3390, 1.3757, 0.5122, 0.2884, 1.2597, 0.7597],
requires_grad=True), Parameter containing:
tensor(3.3207, requires_grad=True), Parameter containing:
tensor(0.2814, requires_grad=True)]
which indeed shows that the parameters are updating.
Also you can see the computation graph below:

Related

Error: mat1 and mat2 shapes cannot be multiplied (1000x10 and 1x1)

I am trying to implement Ridge Regression in pytorch, defining the loss function and plotting said function over different iterations. The only issue is, I keep getting an error code: mat1 and mat2 shapes cannot be multiplied (1000x10 and 1x1). I would like to convert the second matrix to a 1x10 in order to complete the code but I can't seem to get it to work.
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
n = 1000
p = 10
mean = np.zeros((p))
val = 0.8
cov = np.ones((p,p))*val
cov = cov + np.eye(p)*(1-val)
np.random.seed(10)
X = np.random.multivariate_normal(mean, cov, n)
theta_true = np.concatenate((np.ones((5,1)), np.zeros((5,1))),axis=0)
delta=0.5
Sigma = np.eye(n,n,k=-1)*0.4 + np.eye(n,n)*1 + np.eye(n,n,k=1)*0.4
mean = np.zeros(n)
e = np.random.multivariate_normal(mean, Sigma, 1)
y=X#theta_true + delta*e.T
import torch
X_t = torch.from_numpy(X).float()
y_t = torch.from_numpy(y).float()
Sigma_t = torch.from_numpy(Sigma).float()
import torch.nn as nn
import torch.nn.functional as F
class MyLinear(nn.Module):
def __init__(self):
super(MyLinear, self).__init__()
self.linear = nn.Linear(1, 1)
def forward(self, x):
out = self.linear(x)
return out
def L2_norm(model):
return torch.sum(list(model.parameters())[0]**2)
def L1_norm(model):
return torch.sum(torch.abs(list(model.parameters())[0]))
def ridge_loss(y_pred, y_true, model, lambda_):
mse = F.mse_loss(y_pred, y_true)
regularization = lambda_ * L2_norm(model)
return mse + regularization
import matplotlib.pyplot as plt
model = MyLinear()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
lambda_ = 0.1
num_epochs = 1000
loss_values = []
for epoch in range(num_epochs):
optimizer.zero_grad()
y_pred = model(X_t)
loss = ridge_loss(y_pred, y_t, model, lambda_)
loss_values.append(loss.item())
loss.backward()
optimizer.step()
plt.plot(loss_values)
plt.xlabel('Iteration')
plt.ylabel('Loss')
plt.title('Ridge Regression Loss over Iterations')
plt.show()
I tried changing the theta_true definition to transform the matrix but the same error occurred.
theta_true = np.concatenate((np.ones((5,1)), np.zeros((5,1)))).reshape(10, 1)
Your Linear layer in MyLinear (line 37) is what is causing the issue.
self.linear = nn.Linear(1, 1)
means 1 input channel, one output channel, but x, as you have it here has shape (1000, 10), meaning it has 10 channels. So you will need to change that line to
self.linear = nn.Linear(10, 1)
that will do the trick, here is the image I get with that change:

How to fit a small dataset with a perceptron model using Pytorch

I'm trying to fit a small dataset(just 7x1 size) with a 3-layer perceptron model, but the loss can't converge. I'm fresh to machine learning area, can someone please give me a hint to adjust my code?
import torch
import torch.nn as nn
import torch.nn.functional as F
vec_shape = [7, 1]
x_0 = [500, 1000, 2000, 4000, 5000, 8000, 10000]
y_0 = [1.171467, 1.486507, 11.7738, 34.448421, 75.402871, 225.319848, 492.262426]
# x = torch.tensor(x_0).reshape(vec_shape).float()
x = torch.log(torch.tensor(x_0).reshape(vec_shape).float())
y = torch.tensor(y_0).reshape(vec_shape).float()
class Net(nn.Module):
def __init__(self,n_input,n_hidden,n_output):
super(Net,self).__init__()
self.hidden1 = nn.Linear(n_input,n_hidden)
self.hidden2 = nn.Linear(n_hidden,n_hidden)
self.predict = nn.Linear(n_hidden,n_output)
def forward(self,input):
out = self.hidden1(input)
out = F.relu(out)
out = self.hidden2(out)
out = torch.sigmoid(out)
out =self.predict(out)
return out
def weight_init(self):
for op in self.modules():
if isinstance(op, nn.Linear):
nn.init.normal_(op.weight.data)
nn.init.normal_(op.bias.data)
net = Net(1,10,1)
net.weight_init()
# print(net)
optimizer = torch.optim.SGD(net.parameters(),lr = 0.1)
loss_func = torch.nn.MSELoss()
for t in range(500):
prediction = net(x)
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if(t%50 == 0):
print('Loss = %.4f' % loss.data)
I tried to expand the model or shrink it, but both changes don't work.
Rescaling and normalization is key in machine learning, your setup is pretty good and you apply some rescaling but simply not enough. With the limited amount of datapoints you have, the range is way too large. So just like you do with x_0, apply torch.log to y_0. You can always scale back the predictions after training. Below you can find the adapted code, I changed two things:
torch.log on y_0
Learning rate to 0.01
Number of iterations to 50000
Added a print statement to show rescaling of predictions
import torch
import torch.nn as nn
import torch.nn.functional as F
vec_shape = [7, 1]
x_0 = [500, 1000, 2000, 4000, 5000, 8000, 10000]
y_0 = [1.171467, 1.486507, 11.7738, 34.448421, 75.402871, 225.319848, 492.262426]
# x = torch.tensor(x_0).reshape(vec_shape).float()
x = torch.log(torch.tensor(x_0).reshape(vec_shape).float())
y = torch.log(torch.tensor(y_0).reshape(vec_shape).float()) # modified
class Net(nn.Module):
def __init__(self,n_input,n_hidden,n_output):
super(Net,self).__init__()
self.hidden1 = nn.Linear(n_input,n_hidden)
self.hidden2 = nn.Linear(n_hidden,n_hidden)
self.predict = nn.Linear(n_hidden,n_output)
def forward(self,input):
out = self.hidden1(input)
out = F.relu(out)
out = self.hidden2(out)
out = torch.sigmoid(out)
out =self.predict(out)
return out
def weight_init(self):
for op in self.modules():
if isinstance(op, nn.Linear):
nn.init.normal_(op.weight.data)
nn.init.normal_(op.bias.data)
net = Net(1,10,1)
net.weight_init()
# print(net)
optimizer = torch.optim.SGD(net.parameters(),lr = 0.01) # modified
loss_func = torch.nn.MSELoss()
for t in range(50000): # modified
prediction = net(x)
loss = loss_func(prediction, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if(t%50 == 0):
print('Loss = %.4f' % loss.data)
print(torch.exp(net(x))) # added
I also recommend normalizing your dataset after the logarithmic rescaling, for instance by dividing by the standard deviation and subtracting the mean.

Derivative of Neural Network in Pytorch

I have implemented and trained a neural network in Pytorch, however, I am interested in the derivative of the neural network parameters with respect to the input.
I have extensively searched for any procedure to that would allow evaluating the derivative of weights with respect to a given input, but I did not find anything.
I know that I can compute the gradients of a function in the following way.
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)
But How would I do that with a trained neural network instead of a function Q?
Thanks in advance.
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import numpy as np
from scipy.stats import norm
from numpy import linalg as la
import numpy.random as npr
from tabulate import tabulate
from matplotlib import pyplot as plt
import random
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
#from torchvision import datasets, transforms
from torch.autograd import Variable
# In[2]:
import numpy as np
from scipy.stats import norm
from numpy import linalg as la
import numpy.random as npr
from tabulate import tabulate
from matplotlib import pyplot as plt
import random
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
#from torchvision import datasets, transforms
from torch.autograd import Variable
from torch import optim
# In[3]:
nSimul = 32768
T1 = 1.0
T2 = 2.0
K = 110.0
spot = 100.0
vol = 0.2
vol0 = 0.5 # vol is increased over the 1st period so we have more points in the wings
# simulate all Gaussian returns (N1, N2) first
# returns: matrix of shape [nSimul, TimeSteps=2]
returns = np.random.normal(size=[nSimul,2])
# generate paths, step by step, and not path by path as customary
# this is to avoid slow Python loops, using NumPy's optimized vector functions instead
# generate the vector of all scenarios for S1, of shape [nSimul]
S1 = spot * np.exp(-0.5*vol0*vol0*T1 + vol0*np.sqrt(T1)*returns[:,0])
# generate the vector of all scenarios for S2, of shape [nSimul]
S2 = S1 * np.exp(-0.5*vol*vol*(T2-T1) + vol*np.sqrt(T2-T1)*returns[:,1])
# training set, X and Y are both vectors of shape [nSimul]
X = S1
Y = np.maximum(0, S2 - K)
xAxis = np.linspace(20, 200, 100)
xAxis=xAxis.reshape(-1,1)
# In[4]:
#Normalization of the simulated data:
meanX = np.mean(X)
stdX = np.std(X)
meanY = np.mean(Y)
stdY = np.std(Y)
normX = (X - meanX) / stdX
normY = (Y - meanY) / stdY
normX=normX.reshape(-1,1)
normY=normY.reshape(-1,1)
# In[5]:
class NeuralNetwork(nn.Module):
def __init__(self,inputsize,outputsize):
super(NeuralNetwork, self).__init__()
#self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(inputsize,3),
nn.ELU(),
nn.Linear(3, 5),
nn.ELU(),
nn.Linear(5,3),
nn.ELU(),
nn.Linear(3,outputsize),
)
w = torch.empty(0,1)
nn.init.normal_(w)
def forward(self, x):
#x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
# In[6]:
inputDim = 1 # takes variable 'x'
outputDim = 1 # takes variable 'y'
learningRate = 0.05
epochs = 10000
#weight=torch.empty(3)
model = NeuralNetwork(inputDim, outputDim)
##### For GPU #######
if torch.cuda.is_available():
model.cuda()
# In[7]:
#criterion = torch.nn.MSELoss()
#optimizer = torch.optim.SGD(model.parameters(), lr=learningRate)
# In[ ]:
def ridge_loss(outputs,labels):
torch.mean((outputs-labels)**2)
# In[ ]:
# In[9]:
#Adam optmization
criterion = torch.nn.MSELoss()
#optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.05)
# In[10]:
for epoch in range(epochs):
# Converting inputs and labels to Variable
if torch.cuda.is_available():
inputs = Variable(torch.from_numpy(normX).cuda().float())
labels = Variable(torch.from_numpy(normY).cuda().float())
else:
inputs = Variable(torch.from_numpy(normX).float())
labels = Variable(torch.from_numpy(normY).float())
# Clear gradient buffers because we don't want any gradient from previous epoch to carry forward, dont want to cummulate gradients
optimizer.zero_grad()
# get output from the model, given the inputs
outputs = model(inputs)
# get loss for the predicted output
loss = criterion(outputs, labels)
print(loss)
# get gradients w.r.t to parameters
loss.backward()
# update parameters
optimizer.step()
print('epoch {}, loss {}'.format(epoch, loss.item()))
# In[11]:
def predict(xs):
# first, normalize
nxs = (xs - meanX) / stdX
# forward feed through ANN
# we don't need gradients in the testing phase
with torch.no_grad():
if torch.cuda.is_available():
nys = model(Variable(torch.from_numpy(nxs.rehape(-1,1)).cuda().float())).cpu().data.numpy()
else:
nys = model(Variable(torch.from_numpy(nxs.reshape(-1,1))).float()).data.numpy()
# de-normalize output
ys = meanY + stdY * nys
# we get a matrix of shape [size of xs][1], which we reshape as vector [size of xs]
return np.reshape(ys, [-1])
# In[13]:
def BlackScholes(S0,r,sigma,T,K):
d1 = 1 / (sigma * np.sqrt(T)) * (np.log(S0/K) + (r+sigma**2/2)*T)
d2 = d1 - sigma * np.sqrt(T)
return norm.cdf(d1) * S0 - norm.cdf(d2) * K * np.exp(-r*T)
def BlackScholesCallDelta(S0,r,sigma,T,K):
d1 = 1 / (sigma * np.sqrt(T)) * (np.log(S0/K) + (r+sigma**2/2)*T)
return norm.cdf(d1)
BlackScholes_vec=np.vectorize(BlackScholes)
BlackScholesCallDelta_vec=np.vectorize(BlackScholesCallDelta)
# In[14]:
BS_price=BS_prices=BlackScholes_vec(S0=xAxis,r=0,sigma=0.2,T=1.0,K=110.0)
predicted=predict(xAxis)
S1=1
#line_learn = plt.plot(Sval,y,label="Deep Neural Net")
line_learn = plt.plot(xAxis,predicted,label="Neural Regression")
line_BS = plt.plot(xAxis,BS_price, label="Black-Scholes")
plt.xlabel("Spot Price")
plt.ylabel("Option Price")
#plt.title(r'Time: %1.1f' % time, loc='left', fontsize=11)
plt.title(r'Strike: %1.2f' % K, loc='right', fontsize=11)
plt.title(r'Initial price: %1.2f' % S1, loc='center', fontsize=11)
plt.legend()
plt.show()
#plt.savefig("deephedge.png", dpi=150)
plt.savefig("deephedge.pdf")
# In[15]:
Prices_rg_mc_diff=[]
for i in range(len(xAxis)-1):
delta=(predicted[i+1]-predicted[i])/(xAxis[i+1]-xAxis[i])
Prices_rg_mc_diff.append(delta)
# In[16]:
BS_delta=BlackScholesCallDelta(S0=xAxis,r=0,sigma=0.2,T=1.0,K=110.0)
predicted=predict(xAxis)
S1=1
#line_learn = plt.plot(Sval,y,label="Deep Neural Net")
line_learn = plt.plot(xAxis[1:],Prices_rg_mc_diff,label="Neural Regression")
line_BS = plt.plot(xAxis[1:],BS_delta[1:], label="Black-Scholes")
plt.xlabel("Spot Price")
plt.ylabel("Option Price")
#plt.title(r'Time: %1.1f' % time, loc='left', fontsize=11)
plt.title(r'Strike: %1.2f' % K, loc='right', fontsize=11)
plt.title(r'Initial price: %1.2f' % S1, loc='center', fontsize=11)
plt.legend()
plt.show()
#plt.savefig("deephedge.png", dpi=150)
plt.savefig("deephedge.pdf")
# In[17]:
model.backward(retain_graph=True)
# In[ ]:
print(NeuralNetwork.weight.grad)
# In[ ]:
def predict(xs):
# first, normalize
nxs = (xs - meanX) / stdX
# forward feed through ANN
# we don't need gradients in the testing phase
with torch.no_grad():
if torch.cuda.is_available():
nys = model(Variable(torch.from_numpy(nxs.rehape(-1,1)).cuda().float())).cpu().data.numpy()
else:
nys = model(Variable(torch.from_numpy(nxs.reshape(-1,1))).float()).data.numpy()
# de-normalize output
ys = meanY + stdY * nys
# we get a matrix of shape [size of xs][1], which we reshape as vector [size of xs]
return np.reshape(ys, [-1])
# In[21]:
c3=torch.from_numpy((predicted.reshape(-1,1)), requires_grad=True)
c4=torch.from_numpy(xAxis, requires_grad=True)
#c5=torch.Tensor(c3)
#c6=torch.Tensor(c4)
loss = criterion(c3,c4) # calculating loss
loss.backward()
# In[28]:
torch.tensor(predicted.reshape(-1,1), requires_grad=True)
torch.tensor(xAxis, requires_grad=True)
criterion(torch.tensor(predicted.reshape(-1,1), requires_grad=True),torch.tensor(xAxis, requires_grad=True))
loss.backward()
You need to explicitly use requires_grad = True when create a tensor. And to calculate gradient you first need to apply some operation on the tensor.
Here is an example:
import torch
x = torch.rand(2, 2, requires_grad=True)
y = x + 2
z = y * y * 3
out = z.mean()
out.backward()
print(x.grad)
Output:
tensor([[3.3720, 3.4302],
[3.4030, 3.3605]])
In this way you are using torch.autograd to calculate the gradient for tensor x. See autograd for more.
And for neural network you can simply use the network and backward it afterward.
A neural network Example:
import torch
import torch.nn as nn
import torch.nn.functional as f
x = torch.rand(2, 2)
# define a neural network
network = nn.Sequential(
nn.Linear(2,100),
nn.Linear(100,2)
)
pred = network(x)
loss = f.mae_loss(pred, x) # calculating loss
loss.backward()
# Update weights with gradients
network[0].weight = 0.1 * network[0].weight.grad
network[1].weight = 0.1 * network[1].weight.grad
Note: I didn't put any activation function in network for the sack of simplicity.
Example of backward() using torch.nn.MSELoss():
import torch
from torch.nn import MSELoss
criterion = MSELoss()
a = torch.tensor([1.,2.], requires_grad=True)
b = a**2
loss = criterion(b, a)
loss.backward()
print(a.grad)
Output:
tensor([0., 6.])

Pytorch - RuntimeError: Trying to backward through the graph a second time while trying to find a sparse feature map

I am going to find a sparse feature map for a multi-scale CNN. Because I am new to Pytorch, at first I attempted to implement a code for only one scale. I defined two optimizers, one for filters and one for the feature map. When I run the code, I face the the error. Any suggestion or help by you is greatly appreciated. Below: I brought the code:
The algorithm should work like Expectation Maximization algorithm. First, I fix the kernels, then try to optimize the sparse feature map (z) and then kernels (weights) are updated. Of course this is the thing I wanted to follow, I am not sure if it is true in the code. I attached an image file showing the algorithm in case I didn't convey correctly. enter image description here
import torch
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device is:', device)
# dataset definition
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
mnist_trainset.data = mnist_trainset.data[:10000]
mnist_testset.data = mnist_testset.data[:5000]
from torch.utils.data import DataLoader
train_dl = DataLoader(mnist_trainset, batch_size=16, shuffle=True)
test_dl = DataLoader(mnist_testset, batch_size=1024, shuffle=False)
from torch.optim import SGD
from torch.nn import Module
from torch.nn import Conv2d
from tqdm import tqdm
from torch.autograd import Variable
import scipy.signal as sps
class MNIST_ISTA(Module):
# define model elements
def __init__(self):
self.lambda_ = 0.5e-5
super(MNIST_ISTA, self).__init__()
self.scale1 = Conv2d(in_channels = 1, out_channels = 1, kernel_size=3, bias = False)
self.z = None
self.alpha = 1
def ista_(self, img_batch):
self.z = torch.normal(0, 1, size = (img_batch.shape[0], img_batch.shape[1], img_batch.shape[2], img_batch.shape[3]), requires_grad=True)
converged = False
optim = SGD([{'params': self.z, "lr": 1e-20 }])
while not converged:
z_old = self.z.clone().detach()
feature_map = self.scale1(img_batch)
output_image = sps.fftconvolve(feature_map.detach().numpy(), self.scale1.weight.detach().numpy())
output_image = torch.from_numpy(output_image)
loss = ((img_batch-pred)**2).sum() + self.alpha*torch.norm(self.z,p=1)
loss.backward()
optim.step()
self.z.grad.zero_()
self.z.data = self.soft_thresholding_(self.z, self.lambda_ )
converged = torch.norm(self.z - z_old)/torch.norm(z_old)<1e+8
def num_flat_features(self, x):
size = x.size()[1:]
num = 1
for s in size:
num *= s
return num
def soft_thresholding_(self, x, alpha):
with torch.no_grad():
rtn = F.relu(x-alpha)- F.relu(-x-alpha)
return rtn.data
def forward(self, img_batch):
self.ista_(img_batch)
pred = self.scale1(img_batch)
return pred
def zero_grad(self):
self.scale1.zero_grad()
ista_model = MNIST_ISTA()
optim = SGD([{'params': ista_model.scale1.weight, "lr": 1e-15}])
for epoch in range(5):
running_loss = 0
with torch.no_grad():
ista_model.scale1.weight.div_(torch.norm(ista_model.scale1.weight, dim=None, keepdim=True))
for data in tqdm(train_dl, desc='training', total=len(train_dl)):
img_batch = data[0]
pred = ista_model(img_batch)
loss = ((img_batch - pred) ** 2).sum()
running_loss += loss.item()
loss.backward()
optim.step()
ista_model.zero_grad()

How to save tensor to Numpy array in my customized loss function?

I want to check my middle result when training the model. So, I need to save tensor out in my customized loss.
here is my code:
from util import *
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
from gen_model import read_cache_data
from numpy import random
from ml_util import *
from catboost import CatBoostRegressor
import warnings
warnings.filterwarnings('ignore')
class myloss(keras.losses.Loss):
def __init__(self, coef, name='myloss'):
super().__init__(name=name)
self.coef = coef
def call(self, y, y_pred):
# I want to save y_pred here, the following is the method i tried, none of them works!!!!!!!!!!
#a = (tf.print(y_pred))
#b = (tf.print(y))
print(type(y_pred))
#sess = tf.Session();
sess = tf.compat.v1.Session()
with sess.as_default(): print(y_pred.eval())
#print(y_pred.eval())
#print(y_pred.numpy())
return tf.math.reduce_mean(tf.square(y - y_pred), axis=1)
def train_mlp(train_df, valid_df, test_df, fv_cols, res_col):
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)
model = keras.Sequential([layers.Dense(50, input_shape=(len(fv_cols), ), activation='relu'), layers.Dense(30, activation='relu'), layers.Dense(1)])
model.compile(optimizer=keras.optimizers.SGD(0.1), loss = myloss(0.1))
model.summary()
#sess.run(tf.compat.v1.global_variables_initializer())
model.fit(train_df[fv_cols], np.reshape(train_df[res_col].tolist(), (-1, 1)), callbacks=[callback], validation_data=(valid_df[fv_cols], valid_df[res_col]), epochs=100, batch_size=65536)
train_pred = (model.predict(train_df[fv_cols])).flatten()
test_pred = (model.predict(test_df[fv_cols])).flatten()
d = pd.DataFrame([train_pred, train_df[res_col], test_pred, test_df[res_col]]).T
d.columns = ['train_pred', 'train_y', 'test_pred', 'test_y']
print(d)
print('MLP is R2 =', r2(y_pred = train_pred, y = train_df[res_col]))
print('MLP os R2 =', r2(y_pred = test_pred, y = test_df[res_col]))
if __name__ == '__main__':
df = read_cache_data('cache')
df = df.replace(-np.inf, np.nan).replace(np.inf, np.nan).dropna()
fv_cols = df.columns[21:-3]
res_col = 'res_10'
train, test_df = df.iloc[:int(0.5*len(df))], df.iloc[int(0.5*len(df)):]
train = train.sample(frac=1, random_state=1).reset_index(drop=True)
train_df, valid_df = train.iloc[:int(0.7*len(train))], train.iloc[int(0.7*len(train)):]
train_mlp(train_df, valid_df, test_df, fv_cols, res_col)
I tried some methods, include, eval(), session.run() but none of them worked,
for eval: the error is:
ValueError: Cannot evaluate tensor using `eval()`: No default session is registered. Use `with sess.as_default()` or pass an ex
for session, the error is:
InvalidArgumentError: You must feed a value for placeholder tensor 'sequential/dense/MatMul/ReadVariableOp/resource' with dtype reso
[[node sequential/dense/MatMul/ReadVariableOp/resource (defined at lstm.py:58) ]]
Can anyone help with this?
Something like this
# custom loss function
class myloss(tf.keras.losses.Loss):
def __init__(self, coef=None, name='myloss'):
super().__init__(name=name)
self.coef = coef
def call(self, y, y_pred):
return tf.math.reduce_mean(tf.square(y - y_pred), axis=1)
# some dummines
import numpy as np
y_true = np.array([[0., 1.], [0., 0.]])
y_pred = np.array([[1., 1.], [1., 0.]])
# calling function
m = myloss()
m(y_true, y_pred).numpy()
0.5
# saving loss value into numpy array and reloading
np.save('./loss', m(y_true, y_pred).numpy())
rloss = np.load('loss.npy')
rloss
0.5
I just notice your comment in the call method, where you mentioned saving y_pred. Not fully sure if I'm properly following you but here is some catch (and let me know on this):
def call(self, y, y_pred):
np.save('./loss', y_pred)
return tf.math.reduce_mean(tf.square(y - y_pred), axis=1)
...
rloss = np.load('loss.npy')
rloss
array([[1., 1.],
[1., 0.]])

Categories

Resources