I am making a neural network model using pytorch.
I built a simple and shallow 3 layer model by referring to the tutorial.
However, training is random despite using the same model and script.
In other words, it can be seen that the loss does not drop about once out of 4, so it is not trained. I don't know why the model is shallow and unstable. I would be grateful if someone with the same experience as me or who has solved the problem can advise.
enter image description here
It's same script running result.
1 out of 4 times don't trained.
but I used same script and model.
The value of the input tensor is the same in both the case of learning and the case of not learning.
my script is under here. and x input shape is [10000, 1]
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import sys
import torch
from sklearn.preprocessing import StandardScaler
import re
os.chdir("...")
F1 = os.listdir(os.getcwd())
print(F1)
df = pd.read_excel('10000.xlsx', sheet_name=1)
Ang_tilt = torch.from_numpy(df['Ang_tilt'].values).unsqueeze(dim=1).float()
x_list = [Ang_tilt]
nb_epochs = 3000
import sys
#from aug_data_processing import *
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt
########################################
####################model
#print(x_list)
net = Net(x_dim=Ang_tilt.size()[1])
criterion = nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-9)
optimizer = torch.optim.SGD(net.parameters(), lr=1e-6, momentum=0.7)
losses = []
################forward
for step in range(nb_epochs + 1):
scaler = StandardScaler()
Ang_tilt = scaler.fit_transform(Ang_tilt)
Ang_tilt = torch.from_numpy(Ang_tilt).float()
#print(x_list[i])
prediction = net(Ang_tilt)
#print(prediction)
loss = criterion(input=prediction, target=y_label)
optimizer.zero_grad()
losses.append(loss.item())
loss.backward()
optimizer.step()
#print(Ang_tilt)
plt.title('3_layer_NN_loss_pre+post')
plt.xlabel('epoch')
plt.ylabel('losses')
plt.plot(range(nb_epochs+1), losses)
plt.show()
torch.save(obj=net, f='aug.pt')
And this is Network
from torch import nn
from torch.nn import functional as F
import torch
import torch
from torch.autograd import Variable
'''
x_dim = dimension을 바로
'''
class Net(nn.Module):
def __init__(self, x_dim):
super(Net, self).__init__()
self.fc1 = nn.Linear(x_dim, 150)
self.fc2 = nn.Linear(150, 100)
self.fc3 = nn.Linear(100, 40)
self.fc4 = nn.Linear(40,1)
self.dropout = nn.Dropout(p=0.5)
torch.nn.init.xavier_uniform_(self.fc1.weight)
torch.nn.init.xavier_uniform_(self.fc2.weight)
torch.nn.init.xavier_uniform_(self.fc3.weight)
torch.nn.init.xavier_uniform_(self.fc4.weight)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
return x
I found it by printing my parameters.
When not training, weight is so low.
so I changed model structrue than solved.
Related
I'm trying to implement a Bayesian neural network for genomic predictions. My X is a matrix that is scaled and gets normalized so that the values are between 0 and 1. The y is a vector of values that are again normalized so that the values are between 0 and 1.
The network seems to learn as seen here:
But, when I try to make predictions these look strange and seem to behave randomly. While the true values of y are distributed between 0 and 1. The predicted values are between ~ 0.4 - 0.6 and my R2 is negative. The MSE is around 0.02, what seems not to bad, but might be caused by the fact that the range of the predictions is quite narrow.
I'm a bit running out of ideas what could be wrong. Any suggestions are appreciated :).
I've also tried to predict the training data. That is also not working and I'm getting a negative R2.
X has the dimensions (5000,500) and y (5000,)
Increasing the number of hidden layers (up to 3) and units (upt to 128) doesn't change anything.
# Import necessary packages:
import sys
from os.path import join
import warnings
warnings.filterwarnings('ignore')
from IPython import display
import tensorflow as tf
from tensorflow import keras
import tensorflow_probability as tfp
import kerastuner as kt
from keras import backend as K
from keras import activations, initializers
from keras.layers import Layer
import tensorflow_docs as tfdocs
import tensorflow_docs.modeling
import tensorflow_docs.plots
import numpy as np
import numpy.ma as ma
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
import time
import tempfile
import math
import statsmodels.api as sm
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import r2_score,mean_absolute_error
from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.utils import shuffle
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, scale
from pandas_plink import read_plink
from pandas_plink import read_plink1_bin
from pandas_plink import get_data_folder
tfd = tfp.distributions
# Set random seed and start timer
np.random.seed(12345)
start = time.time()
### functions
def get_optimizer():
return tf.keras.optimizers.SGD()
def get_callbacks():
return [
#tfdocs.modeling.EpochDots(),
tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1, patience=500, restore_best_weights=True),
]
def normalize_data(df):
return (df - df.min())/(df.max() - df.min())
def compile_model(model, optimizer=None):
if optimizer is None:
optimizer = get_optimizer()
model.compile(optimizer=optimizer,
loss=keras.losses.MeanSquaredError())
return model
def MSE(test,pred):
sqr_err = np.subtract(test,pred)**2
return sqr_err.mean()
# ## load & preprocess data
# ### load genotype data
G = np.genfromtxt("genotype.txt")
G[np.isnan(G)] = 0.
G = normalize_data(G)
print(G.mean())
print(G.var())
# ### load phenotype data
traits = np.genfromtxt("phenotype.txt")
traits = normalize_data(traits)
print(traits.mean())
print(traits.var())
# ### split training and validation set
train_X, test_X, train_y, test_y = train_test_split(G, traits, test_size = 0.2, random_state = 42)
X = np.concatenate((train_X, test_X), axis=0)
y = np.concatenate((train_y, test_y), axis=0)
# ### parameter definition
N = G.shape[0]
p = G.shape[1]
NUM_FOLDS = 5
kfold = KFold(n_splits=NUM_FOLDS, shuffle=True)
INPUT_SHAPE = X.shape[1]
OUTPUT_SHAPE = y.shape[0]
BATCH_SIZE = 32
STEPS_PER_EPOCH = math.ceil((X.shape[0]*(1-1/NUM_FOLDS)*0.8)/BATCH_SIZE)
MAX_EPOCHS = 5000
df = pd.DataFrame(columns = ['method','MSE','R2'])
histories = {}
# Specify the surrogate posterior over `keras.layers.Dense` `kernel` and `bias`.
def posterior_mean_field(kernel_size, bias_size=0, dtype=None):
n = kernel_size + bias_size
c = np.log(np.expm1(1.))
return tf.keras.Sequential([
tfp.layers.VariableLayer(2 * n, dtype=dtype),
tfp.layers.DistributionLambda(lambda t: tfd.Independent(
tfd.Normal(loc=t[..., :n],
scale=1e-5 + tf.nn.softplus(c + t[..., n:])),
reinterpreted_batch_ndims=1)),
])
# Specify the prior over `keras.layers.Dense` `kernel` and `bias`.
def prior_trainable(kernel_size, bias_size=0, dtype=None):
n = kernel_size + bias_size
return tf.keras.Sequential([
tfp.layers.VariableLayer(n, dtype=dtype),
tfp.layers.DistributionLambda(lambda t: tfd.Independent(
tfd.Normal(loc=t, scale=1),
reinterpreted_batch_ndims=1)),
])
def neg_log_likelihood(y_true, y_pred, sigma=1.0):
dist = tfp.distributions.Normal(loc=y_pred, scale=sigma)
return K.sum(-dist.log_prob(y_true))
#neg_log_likelihood = lambda y, p_y: -p_y.log_prob(y)
kl_loss_weight = 1.0 / STEPS_PER_EPOCH
histories = {}
fold_no = 1
for train, test in kfold.split(X, y):
model = tf.keras.Sequential([
keras.layers.InputLayer(input_shape=(INPUT_SHAPE,)),
tfp.layers.DenseVariational(units=32,
make_posterior_fn=posterior_mean_field,
make_prior_fn=prior_trainable,
kl_weight=kl_loss_weight,
activation='sigmoid'),
tfp.layers.DenseVariational(units=1,
make_posterior_fn=posterior_mean_field,
make_prior_fn=prior_trainable,
kl_weight=kl_loss_weight,
activation='sigmoid'),
tfp.layers.DistributionLambda(lambda t: tfd.Normal(loc=t, scale=1)),
])
model.compile(loss=neg_log_likelihood, optimizer=tf.keras.optimizers.Adam(lr=0.0001), metrics=['mse'])
history = model.fit(X[train], y[train],
validation_split = 0.2,
steps_per_epoch = STEPS_PER_EPOCH,
epochs=MAX_EPOCHS,
callbacks=get_callbacks(),
verbose=0)
histories['BNN2_'+str(fold_no)] = history
y_pred_list = []
for i in range(500):
y_pred = model.predict(X[test])
y_pred_list.append(y_pred)
y_preds = np.concatenate(y_pred_list, axis=1)
y_mean = np.mean(y_preds, axis=1)
m_err = MSE(y[test],y_mean)
r2_acc = r2_score(y[test],y_mean)
df = df.append({'MSE':m_err, 'R2':r2_acc, 'method':'BNN2'}, ignore_index=True)
fold_no = fold_no + 1
df.to_csv("results.csv")
I'm working with some code that classifies the infamous dog vs cat image classification using a ResNet-18 model and I'd like to extend it to be able to classify for greater than two image categories (like dog vs cat vs hamster vs ....). In particular I've got 5 categories. I'm new at transfer learning and I'm not sure what I have to change in my code to make this work.
import torch
import numpy as np
import torch.nn.functional as F
from torch.nn import Linear
from torch.utils.data import DataLoader, random_split
from torch.optim import Adam
from torchvision.transforms import Compose, Resize, ToTensor
from torchvision.datasets import ImageFolder
from torchvision.models import resnet18
from matplotlib import pyplot as plt
import random
transform = Compose([Resize((128,128)), ToTensor()])
ds = ImageFolder("*Image_Folder*", transform=transform)
ds_train, ds_val = random_split(ds, [3250, 1073])
dl_train = DataLoader(ds_train, batch_size= 32, shuffle=True)
dl_val = DataLoader(ds_val, batch_size= len(ds_val), shuffle= True)
model = resnet18(pretrained=True)
model.requires_grad_(False)
model.fc = Linear(model.fc.in_features, 5)
X_val, y_val = next(dl_val.__iter__())
opt = torch.optim.Adam(model.parameters(), lr=0.001)
def accuracy(yy, y):
return torch.mean(1.0*(yy == y))
X_val.shape, y_val.shape
y_val = y_val.reshape(-1, 1).float()
for epoch in range(10):
losses = []
accs = []
losses_val = []
accs_val = []
model.train()
for X, y in dl_train:
y = y.reshape(-1, 1).float()
yy = torch.sigmoid(model(X))
loss = F.binary_cross_entropy(yy, y)
losses.append(loss.item())
loss.backward()
opt.step()
opt.zero_grad()
acc = accuracy(torch.round(yy), y)
accs.append(acc.item())
model.eval()
with torch.no_grad():
yy_val = torch.sigmoid(model(X_val))
loss_val = F.binary_cross_entropy(yy_val, y_val)
losses_val.append(loss_val.item())
acc_val = accuracy(torch.round(yy_val), y_val)
accs_val.append(acc_val.item())
print(f"Epoch {epoch}: t-loss = {np.mean(losses):.4f}, t-acc = {np.mean(accs):.4f}, v-loss = {loss_val:.4f}, v-acc = {acc_val:.4f}")
I believe the code is fine up to the for loop, however it could be something I need to add or alter. Currently the line loss = F.binary_cross_entropy(yy, y) is what's giving me an error ValueError: Using a target size (torch.Size([32, 1])) that is different to the input size (torch.Size([32, 5])) is deprecated. Please ensure they have the same size.
This is the data I'm working from: https://www.kaggle.com/alxmamaev/flowers-recognition
Binary Cross Entropy is a loss function designed for binary classification tasks.
In order to convert this model into one capable of 5-class classification, in addition to changing the final layer's width to 5, you need to change the loss function to a multinomial scorer e.g. CrossEntropyLoss().
I'm working on a linear regression problem with Pytorch (y=A*x, where the dimensions of A are 2x2). I wrote the following code. I don't know why the loss doesn't change... Can someone help me ?
Thanks,
Thomas
import torch
import numpy as np
from scipy.integrate import odeint
from matplotlib import pyplot as plt
from torch.autograd import Variable
def EDP(X,t):
X_0=-2*X[0]
X_1=-2*X[1]
grad=np.array([X_0,X_1])
return grad
T=np.arange(0,10,0.1)
X_train=odeint(EDP,[10,20],T)
Y_train=np.zeros_like(X_train)
for i in range(Y_train.shape[0]):
Y_train[i,:]=np.dot(np.array([[2,0],[0,2]]),X_train[i,:])
print(X_train,Y_train)
X_train=torch.Tensor(X_train)
torch.transpose(X_train,0,1)
Y_train=torch.Tensor(Y_train)
print(X_train.shape)
import torch.nn as nn
class LinearRegression(torch.nn.Module):
def __init__(self):
super(LinearRegression, self).__init__()
self.linear = torch.nn.Linear(2,2,bias = False) # bias is default True
def forward(self, x):
y_pred = self.linear(x)
return y_pred
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(our_model.parameters(), lr = 0.0001)
our_model = LinearRegression()
x_train = X_train
y_train = Y_train
#x_train.requires_grad=True
print(x_train.shape)
print(y_train.shape)
ntrain=10
for t in range(ntrain):
y_pred=our_model(x_train)
loss=criterion(y_train,y_pred)
loss.backward()
optimizer.step()
optimizer.zero_grad()
print(t,loss)
print(our_model.linear.weight)
In my laptop it worked ...
since you are running it on just 10 epochs ...and using lr = 0.0001 ,you wont see it in just 10 epochs.
i did this optimizer = torch.optim.SGD(our_model.parameters(), lr = 0.01) (increased lr )which actually decreased the loss in just 10 epochs
I'm new to developing CNNs and currently making a binary image classifier using PyTorch. My dataset was heavily unbalanced, and I've manually augmented my a testing split and training split to be balanced. I have a class 0 (Training set has 6500 images), and class 1 (training set has 5200ish images). When I try to use skorch's fit function, I get a validation accuracy equivalent to the percent of class 1 images in the set, and my prediction function only outputs 1 for all images.
This is the tutorial I adapted my CNN from: https://colab.research.google.com/github/dnouri/skorch/blob/master/notebooks/Transfer_Learning.ipynb#scrollTo=cane7VRWW3dO
Here's my CNN: (it's adapted from a tutorial)
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import numpy as np
from numpy import array
import pandas as pd
from skorch import NeuralNetClassifier
from skorch.helper import predefined_split
from skorch.callbacks import LRScheduler
from skorch.callbacks import Checkpoint
from skorch.callbacks import Freezer
from PIL import Image
import skorch
import os
import cv2
import glob
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer
#Define transforms (will be same w/o online transforms)
#Manually augmented earlier
train_transforms = transforms.Compose([
#transforms.RandomResizedCrop(224),
#transforms.RandomHorizontalFlip(),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])
])
val_transforms = transforms.Compose([
#transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])
])
train_ds = datasets.ImageFolder(train_split_path, train_transforms)
val_ds = datasets.ImageFolder(test_aug_split_path, val_transforms)
checkpoint = Checkpoint(
f_params='best_model.pt', monitor='valid_acc_best')
#Using ResNet with some layers for now
class PretrainedModel(nn.Module):
def __init__(self, output_features):
super().__init__()
model = models.resnet152(pretrained=True)
#Don't want to change pretrained weights
for param in model.parameters():
param.requires_grad = False
num_features = model.fc.in_features
fc_layers = nn.Sequential(
nn.Linear(num_features, 4096),
nn.ReLU(inplace=True),
nn.Dropout(p=0.3),
nn.Linear(4096, output_features),
nn.ReLU(inplace=True),
nn.Dropout(p=0.3),
)
model.fc = fc_layers
self.model = model
def forward(self, x):
return self.model(x)
use_cuda = torch.cuda.is_available()
net = NeuralNetClassifier(
module=PretrainedModel,
module__output_features = 2,
criterion=nn.CrossEntropyLoss,
batch_size = 16,
lr=0.0001,
max_epochs=3,
optimizer=optim.Adam,
train_split=predefined_split(val_ds),
callbacks=[checkpoint],
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
)
net.fit(train_ds, y=None)
Here are the results of the fit function:
Epoch 1: Train loss = .46 Valid_acc = .4301 Valid Loss = .6931
Epoch 2: Train loss = .6931 Valid_acc = .4301 Valid Loss = .6931
Epoch 3: Train loss = .6931 Valid_acc = .4301 Valid Loss = .6931
As it turns out for this particular dataset, exactly 43% of my validation images are Class 1 images.
y_pred = net.predict(val_ds) gives me the following:
array([1, 1, 1, ..., 1, 1, 1], dtype=int64)
I guess I have two questions:
1) Is there anything that I have done incorrectly in my initialization of my CNN that would cause this?
2) What would cause this, and is there anything I can do to correct for it?
I have the below code.
I would like to see how the weights and bias changes during training.
Ideally I would like to see it in tensorboard.
Would someone be able to show me how to do this.
from time import time
import numpy as np
import matplotlib.pyplot as plt
import keras
import tensorflow as tf
from keras.callbacks import TensorBoard
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
x = scaler.fit_transform(np.array([[1965.0], [1980.0]])).reshape(-1,1)
y = scaler.fit_transform(np.array([[320.0], [345.0]])).reshape(-1,1)
tensorboard = TensorBoard(log_dir='logs/{}'.format(time()), write_grads=True)
model = keras.Sequential([keras.layers.Dense(1, activation='linear')])
model.compile(optimizer='sgd',
loss="mean_squared_error")
model.fit(x=x, y=y, epochs=1000, callbacks=[tensorboard])
yHat = model.predict(x)
Based on the Keras documentation, all you need to do maybe is just run the command line:
tensorboard --logdir=logs
Notice that the logdir setting is pointing to the root of your log directory.