Related
I am trying to generate a LSTM model using Keras. I create a simple sine wave example which contain more thang 1000 point to predict the next point. But the result is not good as i expected. When i fit the model the result is moves between 0~1 not like the sine wave. I have tried to change parameter like epoch, batchsize, learning rate, but it is not better.
model predict image
What am I doing wrong?
import joblib
import numpy as np
import matplotlib.pyplot as plt
import copy
import gc
import os
import sys
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from keras.callbacks import Callback
learning_rate = 0.001
len_train = 30
total_predict = 300
len_test = 400
epoch = 100
batch_size = 32
workers = -1
class Callback_Class(Callback):
def load_data(self, x_test, y_test):
self.x_test = x_test
self.y_test = np.array(y_test)
def model_predict(self, data_close):
output_predict = []
for i in range(total_predict):
if (i==0):
data_close_ = data_close.reshape(-1, len_train, 1)
else:
data_close_ = np.delete(data_close_, 0)
data_close_ = np.append(data_close_, pred_close)
data_close_ = data_close_.reshape(-1, len_train, 1)
pred_close = model.predict(data_close_)
pred_close = pred_close.ravel()
pred_close = np.array(pred_close).reshape(len(pred_close), 1)
pred_cl = sc.inverse_transform(pred_close)
output_predict.append(pred_cl)
output_predict = np.array(output_predict)
return output_predict
def on_epoch_end(self, epoch, logs=None):
if (epoch % 20 == 0):
output_predict = self.model_predict(self.x_test)
fig, ax = plt.subplots(figsize=(12,6))
ax.grid(True)
plt.title(f"Model predict")
plt.plot(output_predict.ravel(), color="red", label='Predict')
plt.plot(self.y_test.ravel(), color="blue", label='REAL')
fig.tight_layout()
plt.legend(loc='lower left')
plt.savefig(f'Demo_lstm_epoch_{epoch}.png')
plt.clf()
plt.close()
def lstm_reg(input_shape=(60, 1), unit=40, clustering_params=None):
inputs = Input(input_shape)
lstm1f = Bidirectional(LSTM(units=32, return_sequences=True))(inputs)
lstm1f = Bidirectional(LSTM(units=32, return_sequences=False))(lstm1f)
outputs = Dense(units=1, activation='linear')(lstm1f)
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='mean_squared_error', metrics=["accuracy"])
return model
def create_data_train(data_time_series):
data_time_series = np.array(data_time_series).ravel()
X_train = []
y_train = []
for i in range(len_train, len(data_time_series)):
X_train.append(data_time_series[i-len_train:i])
y_train.append(data_time_series[i])
X_train, y_train = np.array(X_train), np.array(y_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
return X_train, y_train
x = np.linspace(-20*np.pi, 20*np.pi, 2001)
sin_alpha = np.sin(x).ravel()
sin_alpha_train = np.array(copy.deepcopy(sin_alpha))[:len(sin_alpha)-len_test]
sin_alpha_train = np.array(sin_alpha_train).reshape(len(sin_alpha_train), 1)
sc = MinMaxScaler(feature_range=(0, 1))
sin_alpha_train = sc.fit_transform(sin_alpha_train)
X_train, y_train = create_data_train(sin_alpha_train)
joblib.dump(sc, f'Demo_MinMaxScaler.gz')
sc = joblib.load(f"Demo_MinMaxScaler.gz")
X_test = np.array(copy.deepcopy(sin_alpha))[len(sin_alpha)-len_test:len(sin_alpha)-len_test+len_train]
X_test = np.array(X_test).reshape(len(X_test), 1)
X_test = sc.fit_transform(X_test)
y_test = np.array(copy.deepcopy(sin_alpha))[len(sin_alpha)-len_test+len_train:len(sin_alpha)-len_test+len_train+total_predict]
model = lstm_reg(input_shape=(len_train, 1), unit=int(2*(len_train+len(y_train))/3))
model.summary()
callback_class = Callback_Class()
callback_class.load_data(X_test, y_test)
model.fit(X_train, y_train, epochs=epoch, use_multiprocessing=True, verbose=1, callbacks=[callback_class], workers=workers, batch_size=batch_size)
It seems like you are normalizing your features and your labels in these lines
sc = MinMaxScaler(feature_range=(0, 1))
sin_alpha_train = sc.fit_transform(sin_alpha_train)
X_train, y_train = create_data_train(sin_alpha_train)
Try it without scaling your label set. Due to your output layer using the linear activation function, which is correct as you're working on a regression problem, the model should be able to handle non scaled labels. The model only learns your data in a range of 0 to 1 while your sine wave goes from -1 to 1.
I'm currently trying to visualise the performance of my prediction model by showing the val_mse in every epoch. The code that used to work for model.fit() doesn't work for tuner.search(). Can anyone provide me with some guide on this. Thank you.
Previous code:
import matplotlib.pyplot as plt
def plot_model(history):
hist = pd.DataFrame (history.history)
hist['epoch'] = history.epoch
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Error')
plt.plot(hist['epoch'], hist['mae'],
label='Train Error')
plt.plot(hist['epoch'], hist['val_mae'],
label = 'Val Error')
plt.legend()
plt.ylim([0,20])
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Mean Square Error')
plt.plot (hist['epoch'], hist['mse'],
label='Train Error')
plt.plot (hist['epoch'], hist['val_mse'],
label = 'Val Error')
plt.legend()
plt.ylim([0,400])
plot_model(history)
keras.tuner code:
history = tuner.search(x = normed_train_data,
y = y_train,
epochs = 200,
batch_size=64,
validation_data=(normed_test_data, y_test),
callbacks = [early_stopping])
Before using tuner.search to search the best model, you need to install and import keras_tuner:
!pip install keras-tuner --upgrade
import keras_tuner as kt
from tensorflow import keras
Then, define the hyperparameter (hp) in the model definition, for instance as below:
def build_model(hp):
model = keras.Sequential()
model.add(keras.layers.Dense(
hp.Choice('units', [8, 16, 32]), # define the hyperparameter
activation='relu'))
model.add(keras.layers.Dense(1, activation='relu'))
model.compile(loss='mse')
return model
Initialize the tuner:
tuner = kt.RandomSearch(build_model,objective='val_loss',max_trials=5)
Now, Start the search and get the best model by using tuner.search:
tuner.search(x = normed_train_data,
y = y_train,
epochs = 200,
batch_size=64,
validation_data=(normed_test_data, y_test),
callbacks = [early_stopping])
best_model = tuner.get_best_models()[0]
Hence, Now you can use this best_model to train and evaluate the model with your dataset and will get a significant decrease in loss.
Please check this link as a reference for more detail.
I've made a predictive model using LSTM which predicts future prices for raw materials like cotton,fibre,yarn etc. At the end of code I used matplotlib library to plot graph which displays the original prices, predicted prices and future predicted prices.
This is the graph which shows future prices according to dates
How do I display this graph on Django framework? Because I need to deploy this model on a web application using Django but the tutorials I've seen so far show predictive models which take user input and don't really show anything related to plots or graphs.
Following is the code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from datetime import datetime
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
import os
import glob
import pandas
import numpy
from sklearn import preprocessing
import numpy as np
# Importing Training Set
dataset_train = pd.read_csv('201222-yarn-market-price-china--034.csv1.csv')
dataset_train.info()
# Select features (columns) to be involved intro training and predictions
cols = list(dataset_train)[1:5]
# Extract dates (will be used in visualization)
datelist_train = list(dataset_train.iloc[0])
datelist_train = [dt.datetime.strptime(date, '%m/%d/%Y').date() for date in datelist_train]
print('Training set shape == {}'.format(dataset_train.shape))
print('All timestamps == {}'.format(len(datelist_train)))
print('Featured selected: {}'.format(cols))
dataset_train = dataset_train[cols].astype(str)
for i in cols:
for j in range(0, len(dataset_train)):
dataset_train[i][j] = dataset_train[i][j].replace(',', '')
dataset_train = dataset_train.astype(float)
# Using multiple features (predictors)
training_set = dataset_train.values
print('Shape of training set == {}.'.format(training_set.shape))
training_set
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
training_set_scaled = sc.fit_transform(training_set)
sc_predict = StandardScaler()
sc_predict.fit_transform(training_set[:, 0:1])
# Creating a data structure with 90 timestamps and 1 output
X_train = []
y_train = []
n_future = 60 # Number of days we want top predict into the future
n_past = 90 # Number of past days we want to use to predict the future
for i in range(n_past, len(training_set_scaled) - n_future +1):
X_train.append(training_set_scaled[i - n_past:i, 0:dataset_train.shape[1] - 1])
y_train.append(training_set_scaled[i + n_future - 1:i + n_future, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
print('X_train shape == {}.'.format(X_train.shape))
print('y_train shape == {}.'.format(y_train.shape))
# Import Libraries and packages from Keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
# Initializing the Neural Network based on LSTM
model = Sequential()
# Adding 1st LSTM layer
model.add(LSTM(units=64, return_sequences=True, input_shape=(n_past, dataset_train.shape[1]-1)))
# Adding 2nd LSTM layer
model.add(LSTM(units=10, return_sequences=False))
# Adding Dropout
model.add(Dropout(0.25))
# Output layer
model.add(Dense(units=1, activation='linear'))
# Compiling the Neural Network
model.compile(optimizer = Adam(learning_rate=0.01), loss='mean_squared_error')
es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)
mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', verbose=1,
save_best_only=True, save_weights_only=True)
tb = TensorBoard('logs')
history = model.fit(X_train, y_train, shuffle=True, epochs=30, callbacks=[es, rlr, mcp, tb],
validation_split=0.2, verbose=1, batch_size=256)
# Generate list of sequence of days for predictions
datelist_future = pd.date_range(datelist_train[-1], periods=n_future, freq='1d').tolist()
'''
Remeber, we have datelist_train from begining.
'''
# Convert Pandas Timestamp to Datetime object (for transformation) --> FUTURE
datelist_future_ = []
for this_timestamp in datelist_future:
datelist_future_.append(this_timestamp.date())
# Perform predictions
predictions_future = model.predict(X_train[-n_future:])
predictions_train = model.predict(X_train[n_past:])
# Inverse the predictions to original measurements
# ---> Special function: convert <datetime.date> to <Timestamp>
def datetime_to_timestamp(x):
'''
x : a given datetime value (datetime.date)
'''
return datetime.strptime(x.strftime('%m%d%Y'), '%m%d%Y')
y_pred_future = sc_predict.inverse_transform(predictions_future)
y_pred_train = sc_predict.inverse_transform(predictions_train)
a=dataset_train.iloc[:, 3]
print(a)
PREDICTIONS_FUTURE = pd.DataFrame(y_pred_future, columns=['Cotton
Yarn1']).set_index(pd.Series(datelist_future))
PREDICTION_TRAIN = pd.DataFrame(y_pred_train, columns=['Cotton
Yarn1']).set_index(pd.Series(datelist_train[2 * n_past + n_future -1:]))
# Convert <datetime.date> to <Timestamp> for PREDCITION_TRAIN
PREDICTION_TRAIN.index = PREDICTION_TRAIN.index.to_series().apply(datetime_to_timestamp)
print(PREDICTION_TRAIN.head(3))
#plt.rcParams["figure.figsize"] = (20,3)
#rcParams['figure.figsize'] = 14, 5
# Plot parameters
START_DATE_FOR_PLOTTING = '12/24/2019'
dataset_train = pd.DataFrame(dataset_train, columns=cols)
dataset_train.index = datelist_train
dataset_train.index = pd.to_datetime(dataset_train.index)
plt.plot(PREDICTIONS_FUTURE.index, PREDICTIONS_FUTURE['Cotton Yarn1'], color='r',
label='Predicted Stock Price')
plt.plot(PREDICTION_TRAIN.loc[START_DATE_FOR_PLOTTING:].index,
PREDICTION_TRAIN.loc[START_DATE_FOR_PLOTTING:]['Cotton Yarn1'], color='orange',
label='Training predictions')
plt.plot(dataset_train.loc[START_DATE_FOR_PLOTTING:].index,
dataset_train.loc[START_DATE_FOR_PLOTTING:]['Cotton Yarn1'], color='b', label='Actual Stock
Price')
plt.axvline(x = min(PREDICTIONS_FUTURE.index), color='green', linewidth=2, linestyle='--')
plt.grid(which='major', color='#cccccc', alpha=0.5)
plt.legend(shadow=True)
plt.title('Predcitions and Acutal Stock Prices', family='Arial', fontsize=12)
plt.xlabel('Timeline', family='Arial', fontsize=10)
plt.ylabel('Stock Price Value', family='Arial', fontsize=10)
plt.xticks(rotation=45, fontsize=8)
plt.show()
When training my model on the adult income data set and using minibatches training is very slow regardless if I use PyTorch's DataLoader or a basic implementation for minibatch training.
Is there a problem with my code or is there another way to speed up training for the adult income data set? I want to use one-hot encoding and cross-entropy loss + softmax. Do I have to use a different loss function or remove the softmax layer?
import pandas as pd
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.model_selection import train_test_split
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import warnings
warnings.filterwarnings('ignore')
device = torch.device("cpu")
class Model(nn.Module):
def __init__(self, input_dim):
super(Model, self).__init__()
self.layer1 = nn.Linear(input_dim, 12)
self.layer2 = nn.Linear(12, 2)
def forward(self, x):
x = F.sigmoid(self.layer1(x))
x = F.softmax(self.layer2(x)) # To check with the loss function
return x
# load dataset
filename = './datasets/adult-all.csv'
dataframe = read_csv(filename, header=None, na_values='?')
# drop rows with missing
dataframe = dataframe.dropna()
# summarize the class distribution
target = dataframe.values[:, -1]
# split into inputs and outputs
last_ix = len(dataframe.columns) - 1
X_, y = dataframe.drop(last_ix, axis=1), dataframe[last_ix]
# select categorical and numerical features
cat_ix = X_.select_dtypes(include=['object', 'bool']).columns
num_ix = X_.select_dtypes(include=['int64', 'float64']).columns
# label encode the target variable to have the classes 0 and 1
y = LabelEncoder().fit_transform(y)
# one-hot encoding of categorical features
df_cat = pd.get_dummies(X_[cat_ix])
# binning of numerical features
x = X_.drop(columns=cat_ix, axis=1)
est = KBinsDiscretizer(n_bins=3, encode='onehot-dense', strategy='uniform')
df_num = est.fit_transform(x)
X = pd.concat([df_cat.reset_index(drop=True), pd.DataFrame(df_num).reset_index(drop=True)], axis=1)
# split training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_tr = Variable(torch.tensor(X_train.values, dtype=torch.float))
X_te = Variable(torch.tensor(X_test.values, dtype=torch.float))
y_tr = Variable(torch.tensor(y_train, dtype=torch.long))
y_te = Variable(torch.tensor(y_test, dtype=torch.long))
def binary_cross_entropy_one_hot(input, target):
return torch.nn.CrossEntropyLoss()(input, target)
def _accuracy(y_pred, y_true):
classes = torch.argmax(y_pred, dim=1)
labels = y_true
accuracy = torch.mean((classes == labels).float())
return accuracy
model = Model(X.shape[1])
learning_rate = 1e-3
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
epochs = 1000
accuracy = 0.0
minibatch = True
# training loop
train_loss = []
for epoch in range(epochs):
if minibatch:
batch_size = 128 # or whatever
permutation = torch.randperm(X_tr.size()[0])
for i in range(0, X_tr.size()[0], batch_size):
optimizer.zero_grad()
indices = permutation[i:i + batch_size]
batch_x, batch_y = X_tr[indices], y_tr[indices]
# in case you wanted a semi-full example
outputs = model.forward(batch_x)
loss = binary_cross_entropy_one_hot(outputs, batch_y)
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print(f'epoch: {epoch:2} loss: {loss:10.8f}')
# train_ds = TensorDataset(X_tr, y_tr)
# train_dl = DataLoader(train_ds, batch_size=256, shuffle=True)
# batch_loss = 0.0
# batch_accuracy = 0.0
# for nb, (x_batch, y_batch) in enumerate(train_dl): # manually set number of batches?
# optimizer.zero_grad()
# y_pred_train = model(x_batch)
# loss = binary_cross_entropy_one_hot(y_pred_train, y_batch)
# loss.backward()
# optimizer.step()
# batch_loss += loss.item()
# batch_accuracy += _accuracy(y_pred_train, y_batch)
# train_loss.append(batch_loss / (nb + 1))
# accuracy = batch_accuracy / (nb + 1)
# if epoch % 100 == 0:
# print(f'epoch: {epoch:2} loss: {train_loss[epoch]:10.8f}')
else:
optimizer.zero_grad()
y_pred = model(X_tr)
# computing the loss function
loss = binary_cross_entropy_one_hot(y_pred, y_tr)
if epoch % 100 == 0:
print(f'epoch: {epoch:2} loss: {loss.item():10.8f}')
loss.backward()
optimizer.step()
accuracy = _accuracy(y_pred, y_tr)
# evaluation on test data
with torch.no_grad():
model.eval()
y_pred = model(X_te)
test_loss = binary_cross_entropy_one_hot(y_pred, y_te)
test_acc = _accuracy(y_pred, y_te)
print("Loss on test data: {:.4}".format(test_loss))
print("Accuracy on test data: {:.4}".format(test_acc))
Time would depend on your input_dim, the size of your dataset, and the number of updates per epoch (// the batch size). From what you've shared with us, I'm not exactly sure what the issue is and if there is actually any bottleneck. However, here are a couple of things I would point out, which might help you (in no particular order):
No need to wrap your data with torch.autograd.Variable. It has been deprecated and is no longer needed, Autograd automatically supports torch.tensors with requires_grad set to True.
If you are using torch.nn.CrossEntropyLoss, you shouldn't use F.softmax on your model's output. That's because CrossEntropyLoss includes nn.LogSoftmax() and nn.NLLLoss(). Also no need to initialize the module each time you want to call it:
criterion = torch.nn.CrossEntropyLoss()
def binary_cross_entropy_one_hot(input, target):
return criterion(input, target)
I see you are redefining your data loader on each epoch. Is that what you really want? If not you can just define it outside the training loop:
train_ds = TensorDataset(X_tr, y_tr)
train_dl = DataLoader(train_ds, batch_size=256, shuffle=True)
for epoch in range(epochs):
for x, y in train_dl:
# ...
I would call .item() on your accuracy (when calling _accuracy) to not keep it attached to the computation graph and release it from memory when it is ready.
Following the Pytorch Transfer learning tutorial, I am interested in reporting only train and test accuracy as well as confusion matrix (say using sklearn confusionmatrix). How can I do that? The current tutorial only reports train/val accuracy and I am having hard time figuring how to incorporate the sklearn confusionmatrix code there. Link to original tutorial here: https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
%matplotlib inline
from graphviz import Digraph
import torch
from torch.autograd import Variable
# Author: Sasank Chilamkurthy
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
plt.ion()
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = "images"
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def imshow(inp, title=None):
"""Imshow for Tensor."""
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
plt.imshow(inp)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated
# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))
# Make a grid from batch
out = torchvision.utils.make_grid(inputs)
imshow(out, title=[class_names[x] for x in classes])
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
# Each epoch has a training and validation phase
for phase in ['train', 'val']:
if phase == 'train':
scheduler.step()
model.train() # Set model to training mode
else:
model.eval() # Set model to evaluate mode
running_loss = 0.0
running_corrects = 0
# Iterate over data.
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'):
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs, labels)
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
# deep copy the model
if phase == 'val' and epoch_acc > best_acc:
best_acc = epoch_acc
best_model_wts = copy.deepcopy(model.state_dict())
print()
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
# load best model weights
model.load_state_dict(best_model_wts)
return model
def visualize_model(model, num_images=6):
was_training = model.training
model.eval()
images_so_far = 0
fig = plt.figure()
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for j in range(inputs.size()[0]):
images_so_far += 1
ax = plt.subplot(num_images//2, 2, images_so_far)
ax.axis('off')
ax.set_title('predicted: {}'.format(class_names[preds[j]]))
imshow(inputs.cpu().data[j])
if images_so_far == num_images:
model.train(mode=was_training)
return
model.train(mode=was_training)
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 9)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
num_epochs=25)
visualize_model(model_ft)
Answer given by ptrblck of PyTorch community. Thanks a lot!
nb_classes = 9
confusion_matrix = torch.zeros(nb_classes, nb_classes)
with torch.no_grad():
for i, (inputs, classes) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
classes = classes.to(device)
outputs = model_ft(inputs)
_, preds = torch.max(outputs, 1)
for t, p in zip(classes.view(-1), preds.view(-1)):
confusion_matrix[t.long(), p.long()] += 1
print(confusion_matrix)
To get the per-class accuracy:
print(confusion_matrix.diag()/confusion_matrix.sum(1))
Here is a slightly modified(direct) approach using sklearn's confusion_matrix:-
from sklearn.metrics import confusion_matrix
nb_classes = 9
# Initialize the prediction and label lists(tensors)
predlist=torch.zeros(0,dtype=torch.long, device='cpu')
lbllist=torch.zeros(0,dtype=torch.long, device='cpu')
with torch.no_grad():
for i, (inputs, classes) in enumerate(dataloaders['val']):
inputs = inputs.to(device)
classes = classes.to(device)
outputs = model_ft(inputs)
_, preds = torch.max(outputs, 1)
# Append batch prediction results
predlist=torch.cat([predlist,preds.view(-1).cpu()])
lbllist=torch.cat([lbllist,classes.view(-1).cpu()])
# Confusion matrix
conf_mat=confusion_matrix(lbllist.numpy(), predlist.numpy())
print(conf_mat)
# Per-class accuracy
class_accuracy=100*conf_mat.diagonal()/conf_mat.sum(1)
print(class_accuracy)
Follwing the answer above... Here is an answer with some visualization
nb_classes = 9
confusion_matrix = np.zeros((nb_classes, nb_classes))
with torch.no_grad():
for i, (inputs, classes) in enumerate(test_loader):
inputs = inputs.to(DEVICE)
classes = classes.to(DEVICE)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
for t, p in zip(classes.view(-1), preds.view(-1)):
confusion_matrix[t.long(), p.long()] += 1
plt.figure(figsize=(15,10))
class_names = list(label2class.values())
df_cm = pd.DataFrame(confusion_matrix, index=class_names, columns=class_names).astype(int)
heatmap = sns.heatmap(df_cm, annot=True, fmt="d")
heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right',fontsize=15)
heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right',fontsize=15)
plt.ylabel('True label')
plt.xlabel('Predicted label')
;
Another simple way to get accuracy is to use sklearns "accuracy_score".
Heres an example:
from sklearn.metrics import accuracy_score
y_pred = y_pred.data.numpy()
accuracy = accuracy_score(labels, np.argmax(y_pred, axis=1))
First you need to get the data from the variable.
"y_pred" is the predictions from your model, and labels are of course your labels.
np.argmax returns the index of the largest value inside the array. We want the largest value as it corresponds to the highest probability class when using softmax for multi-class classification. Accuracy score will return a percentage of matches between the labels and y_pred.
I used the following to convert the torch tensors to an int defining the predicted class.
x = [torch.max(tensor).item() for tensor in x_data]
y = [torch.max(tensor).item() for tensor in y_data]
i hope this helps! i'm still a noob so please be gentle...