How can I change threshold value in the evaluation of deep model?

How can I change threshold value in the evaluation of deep model? - python

import pandas as pd
import tensorflow as tf
import tempfile``
CSV_COLUMNS = [ ]
train_file = '/home/nick/
test_file = '/home/nick/
def input_fn(data_file, num_epochs, shuffle):
#"""Input builder function."""
df_data = pd.read_csv(
tf.gfile.Open(data_file),
names=CSV_COLUMNS,
skipinitialspace=True,
engine="python",
skiprows=1)
# remove NaN elements
df_data = df_data.dropna(how="any", axis=0)
labels = df_data["NPK"].apply(lambda x: "<10" in x).astype(int)
return tf.estimator.inputs.pandas_input_fn(
x=df_data,
y=labels,
batch_size=100,
num_epochs=num_epochs,
shuffle=shuffle,
num_threads=5)
DA = tf.feature_column.categorical_column_with_vocabulary_list( )
LO = tf.contrib.layers.sparse_column_with_hash_bucket( )
deep_columns = [tf.feature_column.indicator_column(DA) tf.feature_column.indicator_column(PD)
model_dir = tempfile.mkdtemp()
m = tf.contrib.learn.DNNClassifier(
feature_columns=deep_columns,
hidden_units=[1024, 512, 256],
optimizer=tf.train.ProximalAdagradOptimizer(
learning_rate=0.1,
l1_regularization_strength=0.001
))
# set num_epochs to None to get infinite stream of data.
m.fit(
input_fn=input_fn(train_file, num_epochs=None, shuffle=True),
steps=20000)
# set steps to None to run evaluation until all data consumed.
results = m.evaluate(
input_fn=input_fn(test_file, num_epochs=1, shuffle=False),
steps=None)
print("model directory = %s" % model_dir)
#in the result we have accuracy, precision auc and other things. How can I choose them?
for key in sorted(results):
print("%s: %s" % (key, results[key]))
I'd like to know how I can change threshold value in the evaluation of the deep model. This is the code, if you run it you can see that this value is 0.5, I'd like to change it from o to 1 to improve the model
I hope you can help me
thank you

Related

How to fix value error 'too many values to unpack (expected 2)' that makes in traing CIFAR10 dataset using Torch framework

I am going to train CIFAR10 dataset in the Torch framework. First I download this dataset and load it with two first functions. Then I train using the Pytorch framework. Eventually, I receive this error. It is my appreciate if you help to fix it. My code is long, so I put the summary of functions using in train.
too many values to unpack (expected 2)
def load_cifar10_batch(filename):
""" Load a single batch from CIFAR10 """
with open(filename, 'rb') as f:
datadict = pickle.load(f, encoding='bytes')
X=datadict[b'data']
Y = datadict[b'labels']
X = X.reshape(10000, 3, 32, 32).transpose(0, 2, 3, 1).astype('float')
Y = np.array(Y)
return X, Y
def load_cifar10(dir):
""" Load all batches of CIFAR10 """
# load train batch file
xs = []
ys = []
for i in range(1, 6):
filename = os.path.join(dir, 'data_batch_%d' % i)
X, Y = load_cifar10_batch(filename)
xs.append(X)
ys.append(Y)
Xtr = np.concatenate(xs)
Ytr = np.concatenate(ys)
del X, Y
# load test batch
Xte, Yte = load_cifar10_batch(os.path.join(dir, 'test_batch'))
return Xtr, Ytr, Xte, Yte
X_train, y_train, X_test, y_test = load_cifar10('cifar-10-batches-py')
'''we used just test set, because of the train set is so big file for train '''
from torch.utils.data import random_split
val_size = 3000
train_size = len(X_test) - val_size
train_ds, val_ds = random_split(X_test, [train_size, val_size])
len(train_ds), len(val_ds)
'''loading data '''
from torch.utils.data.dataloader import DataLoader
batch_size=16
train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_dl = DataLoader(val_ds, batch_size, num_workers=4, pin_memory=True)
'''our model '''
class Cifar10CnnModel(ImageClassificationBase):
def __init__(self):
def forward(self, xb):
return self.network(xb)
'''ImageClassificationBase'''
class ImageClassificationBase(nn.Module):
def training_step(self, batch):
images, labels = batch
out = self(images) # Generate predictions
loss = F.cross_entropy(out, labels) # Calculate loss
accu = accuracy(out,labels)
return loss,accu
def fit(model, train_loader, val_loader,epochs=2,learning_rate=0.001):
best_valid = None
history = []
optimizer = torch.optim.Adam(model.parameters(), learning_rate,weight_decay=0.0005)
for epoch in range(epochs):
# Training Phase
model.train()
train_losses = []
train_accuracy = []
for batch in tqdm(train_loader):
loss,accu = model.training_step(batch)
train_losses.append(loss)
train_accuracy.append(accu)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Validation phase
result = evaluate(model, val_loader)
result['train_loss'] = torch.stack(train_losses).mean().item()
result['train_accuracy'] = torch.stack(train_accuracy).mean().item()
model.epoch_end(epoch, result)
if(best_valid == None or best_valid<result['Accuracy']):
best_valid=result['Accuracy']
torch.save(model.state_dict(), 'cifar10-cnn.pth')
history.append(result)
return history
'''But the call to this function'''
''' train dataset '''
history = fit(model, train_dl, val_dl)
'''gives this error'''
0%| | 0/438 [00:31<?, ?it/s]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [44], in <cell line: 1>()
----> 1 history = fit(model, train_dl, val_dl)
Input In [43], in fit(model, train_loader, val_loader, epochs, learning_rate)
9 train_accuracy = []
10 for batch in tqdm(train_loader):
---> 11 loss,accu = model.training_step(batch)
12 train_losses.append(loss)
13 train_accuracy.append(accu)
Input In [27], in ImageClassificationBase.training_step(self, batch)
7 def training_step(self, batch):
----> 8 images, labels = batch
9 out = self(images) # Generate predictions
10 loss = F.cross_entropy(out, labels) # Calculate loss
ValueError: too many values to unpack (expected 2)

You perform the split on X_test only, losing the labels this way.
Try something like
dataset = torch.utils.data.TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))
train_ds, val_ds = random_split(dataset, [train_size, val_size])

Tensorflow Recommender - Saving large model with ScaNN index - memory bottleneck

I have a relatively large TF retrieval model using the TFRS library. It uses a ScaNN layer for indexing the recommendations. I am having a system host memory issue when I try to save this model via the tf.saved_model.save() method. I am running the official TF 2.9.1 Docker Container with TFRS on a VM in the cloud. I have 28 GB of memory to try to save the model.
Here is the quickstart example:
Basically we create the first embedding
user_model = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_user_ids, mask_token=None),
# We add an additional embedding to account for unknown tokens.
tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
])
Then create the model
class MovielensModel(tfrs.Model):
def __init__(self, user_model, movie_model):
super().__init__()
self.movie_model: tf.keras.Model = movie_model
self.user_model: tf.keras.Model = user_model
self.task: tf.keras.layers.Layer = task
def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
# We pick out the user features and pass them into the user model.
user_embeddings = self.user_model(features["user_id"])
# And pick out the movie features and pass them into the movie model,
# getting embeddings back.
positive_movie_embeddings = self.movie_model(features["movie_title"])
# The task computes the loss and the metrics.
return self.task(user_embeddings, positive_movie_embeddings)
Next we create the ScaNN indexing layer
scann_index = tfrs.layers.factorized_top_k.ScaNN(model.user_model)
scann_index.index_from_dataset(
tf.data.Dataset.zip((movies.batch(100), movies.batch(100).map(model.movie_model)))
)
# Get recommendations.
_, titles = scann_index(tf.constant(["42"]))
print(f"Recommendations for user 42: {titles[0, :3]}")
Finally the model is sent out to be saved
# Export the query model.
with tempfile.TemporaryDirectory() as tmp:
path = os.path.join(tmp, "model")
# Save the index.
tf.saved_model.save(
index,
path,
options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"])
)
# Load it back; can also be done in TensorFlow Serving.
loaded = tf.saved_model.load(path)
# Pass a user id in, get top predicted movie titles back.
scores, titles = loaded(["42"])
print(f"Recommendations: {titles[0][:3]}")
This is the problem line:
# Save the index.
tf.saved_model.save(
index,
path,
options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"])
)
I'm not sure if there is a memory leak or what, but when I train my model on 5M+ records... I can watch the host system memory spike to 100% and the process is killed. If I train on a smaller dataset... there is no problem, so I know the code is okay.
Can anyone suggest how to get around the memory bottleneck when saving a large ScaNN retrieval model, so I can eventually load the model back in for inference?

I think you are saving the TF model after training was completed. You just need the saved model to get trained weights from the model.
You can try the following code:
sku_ids = df['SKU_ID']
sku_ids_list = sku_ids.to_list()
q = embedding(sku_ids, output_mode='distance_matrix')
dist_mat = tf.cast(q, tf.float32)
tree = scann.Scann(n_tables=scann_tables_file_name,
n_clusters_per_table=scann_clusters_file_name,
dimension=embedding_dimensions,
space_type=dist_mat.dtype,
metric_type=tf.float32,
random_seed=seed,
transport_dtype=tf.float32,
symmetrize_query_and_dataset=True,
num_neighbors_per_table=scann_tables_number_of_neighbors)
q = tree.build_index(dist_mat)
p = tree.run(dist_mat)
model = keras.models.Sequential([
scann.Dense(1, use_bias=False, activation='linear', dtype=tf.float32),
keras.layers.Activation('sigmoid')
])
model.compile(
keras.optimizers.Adam(1e-3),
'binary_crossentropy', metrics=[metrics.BinaryAccuracy()])
idx = -1
number_of_epochs = 10
optimizer = keras.optimizers.Adam(1e-3)
optimizer_state = None
random_seed = seed
callbacks = [
keras.callbacks.EarlyStopping(
monitor='binary_accuracy', mode='max',
patience=10, restore_best_weights=True)]
batch_size = 1000
total_records = len(sku_ids)
epochs = number_of_epochs
epochs_completed = 0
while epochs_completed < epochs:
idx += 1
if idx * batch_size >= total_records:
idx = 0
epochs_completed += 1
optimizer_state = None
print("training epoch: {}".format(idx))
q_ = tree.transform(dist_mat[idx * batch_size : (idx + 1) * batch_size])
p_ = tree.transform(dist_mat)
y = p_[:, :, 0]
print("callbacks: {}".format(callbacks))
print("model compile: {}".format(model.compile))
model.fit(q_, y, epochs=1, batch_size=batch_size,
callbacks=callbacks,
validation_split=0.2,
verbose=0,
shuffle=True,
initial_epoch=0,
steps_per_epoch=None,
validation_steps=None,
validation_batch_size=None,
validation_freq=1,
class_weight=None,
max_queue_size=10,
workers=1,
use_multiprocessing=False,
shuffle=False, initial_epoch=0)
sku_ids_tensor = tf.constant(sku_ids_list,
shape=[len(sku_ids_list), 1],
dtype=tf.int64)
print("sku_ids_tensor shape: {}".format(sku_ids_tensor.shape))
tree_tensor = tree.transform(dist_mat)
print("tree_tensor shape: {}".format(tree_tensor.shape))
predictions = tf.constant(tf.sigmoid(model.predict(tree_tensor)),
dtype=tf.float32)
print("predictions shape: {}".format(predictions.shape))
recommendations = tf.concat([sku_ids_tensor, predictions], axis=1)
print("recommendations shape: {}".format(recommendations.shape))
retrieval_user_sku_recommendations = []
for u in unique_sku_list:
print("u: {}".format(u))
user_skus = sku_ids[sku_ids.isin([u])]
print("user_skus: {}".format(user_skus))
user_sku_id = user_skus.index[0]
print("user_sku_id: {}".format(user_sku_id))
user_sku_recommendations = recommendations[sku_ids.isin([u])]
print("user_sku_recommendations: {}".format(user_sku_recommendations))
retrieval_user_sku_recommendations.append(user_sku_recommendations)
retrieval_skus_df = pd.DataFrame(sku_ids_list, columns=['SKU_ID'])
retrieval_skus_df['SKU_ID'] = retrieval_skus_df['SKU_ID'].astype(int)
retrieval_skus_df.head()
user_sku_recommendations_list = []
for sku in retrieval_skus_df['SKU_ID']:
for u in unique_sku_list:
print("sku: {}".format(sku))
print("u: {}".format(u))
if sku == u:
user_skus = sku_ids[sku_ids.isin([sku])]
user_sku_id = user_skus.index[0]
user_sku_recommendations = recommendations[sku_ids.isin([sku])]
user_sku_recommendations_list.append(user_sku_recommendations)
tf.saved_model.save(model, ss_model_dir)

Save images from sagemaker training

I am trying to save images that I configure during training to the output bucket in sagemaker. I've read that all the information that needs to be saved during training goes into the model.tar.gz file. I've tried saving plots using the model_dir and the output_data_dir to no avail. The model itself is saved properly, but the additional information is not being stored with it. I want to reload this additional information (the saved images) during inference but have heard that storing all the information in the model.tar.gz can cause slow inference. I would love some help.
Here is my estimator
from sagemaker.pytorch import PyTorch
estimator = PyTorch(entry_point='XXXXXXXX/AWS/mnist.py',
role=role,
py_version='py3',
framework_version='1.8.0',
instance_count=1,
instance_type='ml.c5.xlarge',
output_path='s3://XXXXX-bucket/',
)
and the code in mnist.py:
import os
import pandas as pd
import torch
import matplotlib.pyplot as plt
import argparse
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
from torchvision.io import read_image
from torch import nn
import matplotlib.pyplot as plt
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.flatten = nn.Flatten()
self.linear_relu_stack = nn.Sequential(
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
nn.Linear(512, 10),
)
def forward(self, x):
x = self.flatten(x)
logits = self.linear_relu_stack(x)
return logits
def train_loop(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
# Compute prediction and loss
pred = model(X.to(device))
loss = loss_fn(pred, y.to(device))
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test_loop(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
pred = model(X.to(device))
test_loss += loss_fn(pred, y.to(device)).item()
correct += (pred.argmax(1) == y.to(device)).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
# Initialize the loss function
if __name__=='__main__':
# default to the value in environment variable `SM_MODEL_DIR`. Using args makes the script more portable.
parser = argparse.ArgumentParser()
parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])
parser.add_argument('--output-data-dir', type=str, default=os.environ['SM_OUTPUT_DATA_DIR'])
args, _ = parser.parse_known_args()
training_data = datasets.FashionMNIST(
root="data",
train=True,
download=True,
transform=ToTensor()
)
test_data = datasets.FashionMNIST(
root="data",
train=False,
download=True,
transform=ToTensor()
)
labels_map = {
0: "T-Shirt",
1: "Trouser",
2: "Pullover",
3: "Dress",
4: "Coat",
5: "Sandal",
6: "Shirt",
7: "Sneaker",
8: "Bag",
9: "Ankle Boot",
}
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
sample_idx = torch.randint(len(training_data), size=(1,)).item()
img, label = training_data[sample_idx]
figure.add_subplot(rows, cols, i)
plt.title(labels_map[label])
plt.axis("off")
plt.imsave(args.output_data_dir+'plot'+str(i)+'.jpg', img.squeeze(), cmap="gray")
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)
# Display image and label.
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
label = train_labels[0]
plt.imsave(args.output_data_dir+'sample.jpg', img, cmap="gray")
print("Saved img.")
print(f"Label: {label}")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")
model = NeuralNetwork().to(device)
print(model)
learning_rate = 1e-3
batch_size = 64
epochs = 5
# ... train `model`, then save it to `model_dir`
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
epochs = 1
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train_loop(train_dataloader, model, loss_fn, optimizer)
test_loop(test_dataloader, model, loss_fn)
print("Done!")
with open(os.path.join(args.model_dir, 'model.pth'), 'wb') as f:
torch.save(model.state_dict(), f)
plt.plot([1,2,3,4])
plt.ylabel('some numbers')
plt.show()
plt.savefig('test.jpeg')

I suspect there is an issue with string concatenation in plt.imsave because the environment variable SM_OUTPUT_DATA_DIR by default points to /opt/ml/output/data (that's the actual value of args.output_data_dir, since you don't pass this parameter) so the outcome is something like /opt/ml/output/dataplot1.jpg. The same happen if you use the model_dir in the same way. I'd rather use something like os.path.join like you're already doing for the model. here a nice exaplaination about these folders and environment variables in sagemaker.

Convolutional Neural Network Model - Why do I get different results on the same image

I'm new to Neural Networks and I'm trying to train a CNN model on a custom dataset (cats and dogs images in a single directory). So I guess I do the very usual stuff here which is in the most tutorials, but just in case I will give here my full code.
First I generate .csv file to be processed:
import os
import torch
device = ("cuda" if torch.cuda.is_available() else "cpu")
train_df = pd.DataFrame(columns=["img_name","label"])
train_df["img_name"] = os.listdir("train/")
for idx, i in enumerate(os.listdir("train/")):
if "cat" in i:
train_df["label"][idx] = 0
if "dog" in i:
train_df["label"][idx] = 1
train_df.to_csv (r'train_csv.csv', index = False, header=True)
Then I prepare the dataset:
from torch.utils.data import Dataset
import pandas as pd
import os
from PIL import Image
import torch
class CatsAndDogsDataset(Dataset):
def __init__(self, root_dir, annotation_file, transform=None):
self.root_dir = root_dir
self.annotations = pd.read_csv(annotation_file)
self.transform = transform
def __len__(self):
return len(self.annotations)
def __getitem__(self, index):
img_id = self.annotations.iloc[index, 0]
img = Image.open(os.path.join(self.root_dir, img_id)).convert("RGB")
y_label = torch.tensor(float(self.annotations.iloc[index, 1]))
if self.transform is not None:
img = self.transform(img)
return (img, y_label)
This is my model:
import torch.nn as nn
import torchvision.models as models
class CNN(nn.Module):
def __init__(self, train_CNN=False, num_classes=1):
super(CNN, self).__init__()
self.train_CNN = train_CNN
self.inception = models.inception_v3(pretrained=True, aux_logits=False)
self.inception.fc = nn.Linear(self.inception.fc.in_features, num_classes)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.5)
self.sigmoid = nn.Sigmoid()
def forward(self, images):
features = self.inception(images)
return self.sigmoid(self.dropout(self.relu(features))).squeeze(1)
This is my hyper-params, transformations and dataloaders:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
num_epochs = 10
learning_rate = 0.00001
train_CNN = False
batch_size = 32
shuffle = True
pin_memory = True
num_workers = 0
transform = transforms.Compose(
[
transforms.Resize((356, 356)),
transforms.RandomCrop((299, 299)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
]
)
dataset = CatsAndDogsDataset("train","train_csv.csv",transform=transform)
print(len(dataset))
train_set, validation_set = torch.utils.data.random_split(dataset,[162,40])
train_loader = DataLoader(dataset=train_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers,pin_memory=pin_memory)
validation_loader = DataLoader(dataset=validation_set, shuffle=shuffle, batch_size=batch_size,num_workers=num_workers, pin_memory=pin_memory)
model = CNN().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
for name, param in model.inception.named_parameters():
if "fc.weight" in name or "fc.bias" in name:
param.requires_grad = True
else:
param.requires_grad = train_CNN
and accuracy check:
def check_accuracy(loader, model):
if loader == train_loader:
print("Checking accuracy on training data")
else:
print("Checking accuracy on validation data")
num_correct = 0
num_samples = 0
model.eval()
with torch.no_grad():
for x, y in loader:
x = x.to(device=device)
y = y.to(device=device)
scores = model(x)
predictions = torch.tensor([1.0 if i >= 0.5 else 0.0 for i in scores]).to(device)
num_correct += (predictions == y).sum()
num_samples += predictions.size(0)
print(
f"Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}"
)
model.train()
return f"{float(num_correct)/float(num_samples)*100:.2f}"
And this is my training function:
from tqdm import tqdm
def train():
model.train()
for epoch in range(num_epochs):
loop = tqdm(train_loader, total = len(train_loader), leave = True)
if epoch % 2 == 0:
loop.set_postfix(val_acc = check_accuracy(validation_loader, model))
for imgs, labels in loop:
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loop.set_description(f"Epoch [{epoch}/{num_epochs}]")
loop.set_postfix(loss = loss.item())
if __name__ == "__main__":
train()
0%| | 0/6 [00:00<?, ?it/s]Checking accuracy on validation data
0%| | 0/6 [01:13<?, ?it/s, val_acc=60.00]Got 24 / 40 with accuracy 60.00
Epoch [0/10]: 100%|██████████| 6/6 [06:02<00:00, 60.39s/it, loss=0.693]
Epoch [1/10]: 100%|██████████| 6/6 [04:49<00:00, 48.23s/it, loss=0.693]
...
Epoch [8/10]: 100%|██████████| 6/6 [06:07<00:00, 61.29s/it, loss=0.693]
Epoch [9/10]: 100%|██████████| 6/6 [04:55<00:00, 49.19s/it, loss=0.781]
The model gets trained fine but when I try to use it for prediction I get different results each time I run this last piece in my Jupyter Notebooks:
model.eval()
img = Image.open('train/cat.22.png').convert("RGB")
img_t = transform(img)
batch_t = torch.unsqueeze(img_t, 0)
out = model(batch_t)
print(out)
tensor([0.5276], grad_fn=)
tensor([0.5000], grad_fn=)
tensor([0.5064], grad_fn=)
etc. Each time different result for the same image. Is this normal? Why this is happening?

I don't see you loading your trained model. This means every time you initialize the CNN module, the inception.fc layer will get initialized with random weights, this is most probably the reason why you are getting different results on each inference.
Edit: You have a random transform in your transformation pipeline, namely RandomCrop.

According to this answer on the use of model.eval(), I believe you might want to ensure that you have the lower half of the code cell wrapped in a with torch.no_grad(): context. I think it may still be learning/updating parameters unless inside that context.

ResNet object has no attribute 'predict'

I have trained a CNN model in PyTorch to detect skin diseases in 6 different classes. My model came out with an accuracy of 92% and I saved it in a .pth file. I wish to use this model for predictions but I don't know how to do so. If anyone can aid me in the necessary steps, I will be grateful.
I have tried just taking the image input straight from the folder, resizing it, and then running it through the model for predictions. The error I face is a ModuleAttributeAError which says there is no attribute named predict. Now I do not understand where I went wrong and I know this is a simple task for most but I was hoping for some guidance in this regard. The dataset I used is the Skin Cancer MNIST: HAM10000 dataset from Kaggle and trained it on ResNet18. If anyone has any pointers on fine-tuning the model, I would greatly appreciate it.
TLDR: I get an error called ModuleAttributeError that says the 'ResNet' module has no attribute 'predict'.
The image is preprocessed here as follows:
import os, cv2,itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
from tqdm import tqdm
from glob import glob
from PIL import Image
# pytorch libraries
import torch
from torch import optim,nn
from torch.autograd import Variable
from torch.utils.data import DataLoader,Dataset
from torchvision import models,transforms
# sklearn libraries
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
np.random.seed(10)
torch.manual_seed(10)
torch.cuda.manual_seed(10)
print(os.listdir("/content/drive/My Drive/input"))
from google.colab import drive
drive.mount('/content/drive')
"""**Data analysis and preprocessing**"""
data_dir = '/content/drive/My Drive/input'
all_image_path = glob(os.path.join(data_dir, '*', '*.jpg'))
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in all_image_path}
lesion_type_dict = {
'nv': 'Melanocytic nevi',
'mel': 'Melanoma',
'bkl': 'Benign keratosis-like lesions ',
'bcc': 'Basal cell carcinoma',
'akiec': 'Actinic keratoses',
'vasc': 'Vascular lesions',
'df': 'Dermatofibroma'
}
def compute_img_mean_std(image_paths):
"""
computing the mean and std of three channel on the whole dataset,
first we should normalize the image from 0-255 to 0-1
"""
img_h, img_w = 224, 224
imgs = []
means, stdevs = [], []
for i in tqdm(range(len(image_paths))):
img = cv2.imread(image_paths[i])
img = cv2.resize(img, (img_h, img_w))
imgs.append(img)
imgs = np.stack(imgs, axis=3)
print(imgs.shape)
imgs = imgs.astype(np.float32) / 255.
for i in range(3):
pixels = imgs[:, :, i, :].ravel() # resize to one row
means.append(np.mean(pixels))
stdevs.append(np.std(pixels))
means.reverse() # BGR --> RGB
stdevs.reverse()
print("normMean = {}".format(means))
print("normStd = {}".format(stdevs))
return means,stdevs
# norm_mean,norm_std = compute_img_mean_std(all_image_path)
norm_mean = (0.763035, 0.54564625, 0.5700399)
norm_std = (0.1409281, 0.15261264, 0.16997051)
df_original = pd.read_csv(os.path.join(data_dir, 'HAM10000_metadata.csv'))
df_original['path'] = df_original['image_id'].map(imageid_path_dict.get)
df_original['cell_type'] = df_original['dx'].map(lesion_type_dict.get)
df_original['cell_type_idx'] = pd.Categorical(df_original['cell_type']).codes
df_original.head()
# this will tell us how many images are associated with each lesion_id
df_undup = df_original.groupby('lesion_id').count()
# now we filter out lesion_id's that have only one image associated with it
df_undup = df_undup[df_undup['image_id'] == 1]
df_undup.reset_index(inplace=True)
df_undup.head()
# here we identify lesion_id's that have duplicate images and those that have only one image.
def get_duplicates(x):
unique_list = list(df_undup['lesion_id'])
if x in unique_list:
return 'unduplicated'
else:
return 'duplicated'
# create a new colum that is a copy of the lesion_id column
df_original['duplicates'] = df_original['lesion_id']
# apply the function to this new column
df_original['duplicates'] = df_original['duplicates'].apply(get_duplicates)
df_original.head()
df_original['duplicates'].value_counts()
# now we filter out images that don't have duplicates
df_undup = df_original[df_original['duplicates'] == 'unduplicated']
df_undup.shape
# now we create a val set using df because we are sure that none of these images have augmented duplicates in the train set
y = df_undup['cell_type_idx']
_, df_val = train_test_split(df_undup, test_size=0.2, random_state=101, stratify=y)
df_val.shape
df_val['cell_type_idx'].value_counts()
# This set will be df_original excluding all rows that are in the val set
# This function identifies if an image is part of the train or val set.
def get_val_rows(x):
# create a list of all the lesion_id's in the val set
val_list = list(df_val['image_id'])
if str(x) in val_list:
return 'val'
else:
return 'train'
# identify train and val rows
# create a new colum that is a copy of the image_id column
df_original['train_or_val'] = df_original['image_id']
# apply the function to this new column
df_original['train_or_val'] = df_original['train_or_val'].apply(get_val_rows)
# filter out train rows
df_train = df_original[df_original['train_or_val'] == 'train']
print(len(df_train))
print(len(df_val))
df_train['cell_type_idx'].value_counts()
df_val['cell_type'].value_counts()
# Copy fewer class to balance the number of 7 classes
data_aug_rate = [15,10,5,50,0,40,5]
for i in range(7):
if data_aug_rate[i]:
df_train=df_train.append([df_train.loc[df_train['cell_type_idx'] == i,:]]*(data_aug_rate[i]-1), ignore_index=True)
df_train['cell_type'].value_counts()
# # We can split the test set again in a validation set and a true test set:
# df_val, df_test = train_test_split(df_val, test_size=0.5)
df_train = df_train.reset_index()
df_val = df_val.reset_index()
# df_test = df_test.reset_index()
Here is where I build the model:
# feature_extract is a boolean that defines if we are finetuning or feature extracting.
# If feature_extract = False, the model is finetuned and all model parameters are updated.
# If feature_extract = True, only the last layer parameters are updated, the others remain fixed.
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
# Initialize these variables which will be set in this if statement. Each of these
# variables is model specific.
model_ft = None
input_size = 0
if model_name == "resnet":
""" Resnet18, resnet34, resnet50, resnet101
"""
model_ft = models.resnet18(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "vgg":
""" VGG11_bn
"""
model_ft = models.vgg11_bn(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier[6].in_features
model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
input_size = 224
elif model_name == "densenet":
""" Densenet121
"""
model_ft = models.densenet121(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_ft.classifier.in_features
model_ft.classifier = nn.Linear(num_ftrs, num_classes)
input_size = 224
elif model_name == "inception":
""" Inception v3
"""
model_ft = models.inception_v3(pretrained=use_pretrained)
set_parameter_requires_grad(model_ft, feature_extract)
# Handle the auxilary net
num_ftrs = model_ft.AuxLogits.fc.in_features
model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
# Handle the primary net
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs,num_classes)
input_size = 299
else:
print("Invalid model name, exiting...")
exit()
return model_ft, input_size
# resnet,vgg,densenet,inception
model_name = 'resnet'
num_classes = 7
feature_extract = False
# Initialize the model for this run
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
# Define the device:
device = torch.device('cuda:0')
# Put the model on the device:
model = model_ft.to(device)
# norm_mean = (0.49139968, 0.48215827, 0.44653124)
# norm_std = (0.24703233, 0.24348505, 0.26158768)
# define the transformation of the train images.
train_transform = transforms.Compose([transforms.Resize((input_size,input_size)),transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),transforms.RandomRotation(20),
transforms.ColorJitter(brightness=0.1, contrast=0.1, hue=0.1),
transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std)])
# define the transformation of the val images.
val_transform = transforms.Compose([transforms.Resize((input_size,input_size)), transforms.ToTensor(),
transforms.Normalize(norm_mean, norm_std)])
# Define a pytorch dataloader for this dataset
class HAM10000(Dataset):
def __init__(self, df, transform=None):
self.df = df
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, index):
# Load data and get label
X = Image.open(self.df['path'][index])
y = torch.tensor(int(self.df['cell_type_idx'][index]))
if self.transform:
X = self.transform(X)
return X, y
# Define the training set using the table train_df and using our defined transitions (train_transform)
training_set = HAM10000(df_train, transform=train_transform)
train_loader = DataLoader(training_set, batch_size=64, shuffle=True, num_workers=4)
# Same for the validation set:
validation_set = HAM10000(df_val, transform=train_transform)
val_loader = DataLoader(validation_set, batch_size=64, shuffle=False, num_workers=4)
# we use Adam optimizer, use cross entropy loss as our loss function
optimizer = optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.CrossEntropyLoss().to(device)
Lastly, is the training process with a prediction function:
# this function is used during training process, to calculation the loss and accuracy
class AverageMeter(object):
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
def update(self, val, n=1):
self.val = val
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
total_loss_train, total_acc_train = [],[]
def train(train_loader, model, criterion, optimizer, epoch):
model.train()
train_loss = AverageMeter()
train_acc = AverageMeter()
curr_iter = (epoch - 1) * len(train_loader)
for i, data in enumerate(train_loader):
images, labels = data
N = images.size(0)
# print('image shape:',images.size(0), 'label shape',labels.size(0))
images = Variable(images).to(device)
labels = Variable(labels).to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
prediction = outputs.max(1, keepdim=True)[1]
train_acc.update(prediction.eq(labels.view_as(prediction)).sum().item()/N)
train_loss.update(loss.item())
curr_iter += 1
if (i + 1) % 100 == 0:
print('[epoch %d], [iter %d / %d], [train loss %.5f], [train acc %.5f]' % (
epoch, i + 1, len(train_loader), train_loss.avg, train_acc.avg))
total_loss_train.append(train_loss.avg)
total_acc_train.append(train_acc.avg)
return train_loss.avg, train_acc.avg
def validate(val_loader, model, criterion, optimizer, epoch):
model.eval()
val_loss = AverageMeter()
val_acc = AverageMeter()
with torch.no_grad():
for i, data in enumerate(val_loader):
images, labels = data
N = images.size(0)
images = Variable(images).to(device)
labels = Variable(labels).to(device)
outputs = model(images)
prediction = outputs.max(1, keepdim=True)[1]
val_acc.update(prediction.eq(labels.view_as(prediction)).sum().item()/N)
val_loss.update(criterion(outputs, labels).item())
print('------------------------------------------------------------')
print('[epoch %d], [val loss %.5f], [val acc %.5f]' % (epoch, val_loss.avg, val_acc.avg))
print('------------------------------------------------------------')
return val_loss.avg, val_acc.avg
import cv2
from PIL import Image, ImageOps
import numpy as np
model = model_ft
model.load_state_dict(torch.load("/content/drive/MyDrive/input/trainbest.pth"))
model.eval()
def import_and_predict(image_data, model):
size = (224, 224)
image = ImageOps.fit(image_data, size, Image.ANTIALIAS)
img = np.asarray(image)
image_reshape = img[np.newaxis,...]
prediction = model.predict(img_reshape)
return prediction
image = Image.open('/content/0365-0596-abd-88-05-0712-gf03.jpg')
# st.image(image, use_column_width = True)
predictions = import_and_predict(image, model)
class_names = ["Melanocytic nevi", "dermatofibroma", "Benign keratosis-like lesions", "Basal cell carcinoma", "Actinic keratoses", "Vascular lesions", "Dermatofibroma"]
string = "It is: " + class_names[np.argmax(predictions)]
print(string)
Here is the error that comes immediately after this is executed.
---------------------------------------------------------------------------
ModuleAttributeError Traceback (most recent call last)
<ipython-input-219-d563271b78c6> in <module>()
32 image = Image.open('/content/0365-0596-abd-88-05-0712-gf03.jpg')
33 # st.image(image, use_column_width = True)
---> 34 predictions = import_and_predict(image, model)
35 class_names = ["Melanocytic nevi", "dermatofibroma", "Benign keratosis-like lesions", "Basal cell carcinoma", "Actinic keratoses", "Vascular lesions", "Dermatofibroma"]
36 string = "It is: " + class_names[np.argmax(predictions)]
1 frames
<ipython-input-219-d563271b78c6> in import_and_predict(image_data, model)
27 img = np.asarray(image)
28 image_reshape = img[np.newaxis,...]
---> 29 prediction = model.predict(img_reshape)
30 return prediction
31
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __getattr__(self, name)
777 return modules[name]
778 raise ModuleAttributeError("'{}' object has no attribute '{}'".format(
--> 779 type(self).__name__, name))
780
781 def __setattr__(self, name: str, value: Union[Tensor, 'Module']) -> None:
ModuleAttributeError: 'ResNet' object has no attribute 'predict'
If anyone can help me fix the issue and get this to work as a classifier for skin diseases, I would be ever so thankful.

nn.Module don't have a predict function, just call the object for inference:
prediction = model(img_reshape)
This will call the object's __call__ function which, in turns, callsthe model forward function.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How can I change threshold value in the evaluation of deep model? - python

Related

How to fix value error 'too many values to unpack (expected 2)' that makes in traing CIFAR10 dataset using Torch framework

Tensorflow Recommender - Saving large model with ScaNN index - memory bottleneck

Save images from sagemaker training

Convolutional Neural Network Model - Why do I get different results on the same image

ResNet object has no attribute 'predict'

Categories

Resources