Runtime error with Pytorch and num_workers

Runtime error with Pytorch and num_workers - python

Im trying to run this code in pycharm and is the pneumonia database from Kaggle
import torchvision
from torchvision import transforms
import torchmetrics
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
def load_file(path):
return np.load(path).astype(np.float32)
train_transforms = transforms.Compose([
transforms.ToTensor(), # Convert numpy array to tensor
transforms.Normalize(0.49, 0.248), # Use mean and std from preprocessing notebook
transforms.RandomAffine( # Data Augmentation
degrees=(-5, 5), translate=(0, 0.05), scale=(0.9, 1.1)),
transforms.RandomResizedCrop((224, 224), scale=(0.35, 1))
])
val_transforms = transforms.Compose([
transforms.ToTensor(), # Convert numpy array to tensor
transforms.Normalize([0.49], [0.248]), # Use mean and std from preprocessing notebook
])
train_dataset = torchvision.datasets.DatasetFolder(
"Processed/train/",
loader=load_file, extensions="npy", transform=train_transforms)
val_dataset = torchvision.datasets.DatasetFolder(
"Processed/val/",
loader=load_file, extensions="npy", transform=val_transforms)
fig, axis = plt.subplots(2, 2, figsize=(9, 9))
for i in range(2):
for j in range(2):
random_index = np.random.randint(0, 20000)
x_ray, label = train_dataset[random_index]
axis[i][j].imshow(x_ray[0], cmap="bone")
axis[i][j].set_title(f"Label:{label}, id:{random_index}")
plt.show()
batch_size = 64#TODO
num_workers = 8# TODO
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory = True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False, pin_memory = True)
print(f"There are {len(train_dataset)} train images and {len(val_dataset)} val images")
np.unique(train_dataset.targets, return_counts=True), np.unique(val_dataset.targets, return_counts=True)
class PneumoniaModel(pl.LightningModule):
def __init__(self, weight=1):
super().__init__()
self.model = torchvision.models.resnet18()
# change conv1 from 3 to 1 input channels
self.model.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
# change out_feature of the last fully connected layer (called fc in resnet18) from 1000 to 1
self.model.fc = torch.nn.Linear(in_features=512, out_features=1)
self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-4)
self.loss_fn = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor([weight]))
# simple accuracy computation
self.train_acc = torchmetrics.Accuracy()
self.val_acc = torchmetrics.Accuracy()
def forward(self, data):
pred = self.model(data)
return pred
def training_step(self, batch, batch_idx):
x_ray, label = batch
label = label.float() # Convert label to float (just needed for loss computation)
pred = self(x_ray)[:, 0] # Prediction: Make sure prediction and label have same shape
loss = self.loss_fn(pred, label) # Compute the loss
# Log loss and batch accuracy
self.log("Train Loss", loss)
self.log("Step Train Acc", self.train_acc(torch.sigmoid(pred), label.int()))
return loss
def training_epoch_end(self, outs):
# After one epoch compute the whole train_data accuracy
self.log("Train Acc", self.train_acc.compute())
def validation_step(self, batch, batch_idx):
# Same steps as in the training_step
x_ray, label = batch
label = label.float()
pred = self(x_ray)[:, 0] # make sure prediction and label have same shape
loss = self.loss_fn(pred, label)
# Log validation metrics
self.log("Val Loss", loss)
self.log("Step Val Acc", self.val_acc(torch.sigmoid(pred), label.int()))
return loss
def validation_epoch_end(self, outs):
self.log("Val Acc", self.val_acc.compute())
def configure_optimizers(self):
# Caution! You always need to return a list here (just pack your optimizer into one :))
return [self.optimizer]
model = PneumoniaModel() # Instanciate the model
# Create the checkpoint callback
checkpoint_callback = ModelCheckpoint(
monitor='Val Acc',
save_top_k=10,
mode='max')
# Create the trainer
# Change the gpus parameter to the number of available gpus on your system. Use 0 for CPU training
gpus = 1 #TODO
trainer = pl.Trainer(gpus=gpus, logger=TensorBoardLogger(save_dir="./logs"), log_every_n_steps=1,
callbacks=checkpoint_callback,
max_epochs=35)
trainer.fit(model, train_loader, val_loader)
If I train the NN with num_workers set to 0, it works without problems but takes around 2 hrs to train, I have read that the number of workers can optimize the training but whenever I use a number bigger than one I get the next error.
RuntimeError:
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
I have tried to create the if name == 'main': at the end but is still giving me errors, any help?
Thank you

Related

Understanding Dataloader and how to speed up GPU training with num_workers

Being new to deep learning, I plan to open this post with a reproducible code example using Mnist, to understand fully on how to improve the training speed.
I'm using Ubuntu 20.04 LTS and have a RTX 3080, when I don't use the batch training and just train the whole 60,000 like below, it takes about 6-7 seconds to finish the training and GPU usage at 99-100%.
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import random
from matplotlib import pyplot as plt
from tqdm.auto import tqdm
import timeit
# Set Device function (to GPU)
def set_device():
device = "cuda" if torch.cuda.is_available() else "cpu"
if device != "cuda":
print("GPU is not enabled")
else:
print("GPU is enabled")
return device
DEVICE = set_device()
# set seed function
def set_seed(seed=None, seed_torch=True):
if seed is None:
seed = np.random.choice(2 ** 32)
random.seed(seed)
np.random.seed(seed)
if seed_torch:
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
print(f'Random seed {seed} has been set.')
SEED = 2021
# for DataLoader
def seed_worker(worker_id):
worker_seed = torch.initial_seed() % 2**32
np.random.seed(worker_seed)
random.seed(worker_seed)
# Download Mnist datasets
train_data = datasets.MNIST(
root='data',
train=True,
transform=ToTensor(),
download=True,
)
test_data = datasets.MNIST(
root='data',
train=False,
transform=ToTensor()
)
X = train_data.data.reshape(60000, -1).float()
y = train_data.train_labels
X_test = test_data.data.reshape(10000, -1).float()
y_test = test_data.train_labels
# Simple Neural Net
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# define layers
self.layers = nn.Sequential(
nn.Linear(784, 600),
nn.ReLU(),
nn.Linear(600, 300),
nn.ReLU(),
nn.Linear(300, 100),
nn.ReLU(),
nn.Linear(100, 10)
)
def forward(self, x):
return self.layers(x)
def predict(self, x):
return torch.argmax(self.forward(x), 1)
# simple train
X = X.to(DEVICE)
y = y.to(DEVICE)
X_test = X_test.to(DEVICE)
y_test = y_test.to(DEVICE)
SEED = 2021
set_seed(SEED)
model = Net().to(DEVICE)
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
loss_list = []
logits = model.forward(X)
loss = loss_function(logits, y)
start1 = timeit.default_timer()
for epoch in range(500):
logits = model.forward(X)
loss = loss_function(logits, y)
loss_list.append(loss.item())
loss.backward()
optimizer.step()
optimizer.zero_grad()
if epoch % 20 == 0:
print(f"epoch {epoch + 1}: loss: {loss:.5f},"
f"train_accuracy: {torch.sum(model.predict(X) == y) / 60000:.3f},"
f"test_accuracy:{torch.sum(model.predict(X_test) == y_test) / 10000:.3f}")
end1 = timeit.default_timer()
print(f"Time: {end1 - start1:.2f} seconds")
But when I use batch training like below, the speed drops significantly, and when num_workers=0, it takes 176 seconds to finish the training, and when num_workers=4, it takes 216 seconds to finish the training. And in both scenarios, the GPU usage hover around 20-30% and sometimes even lower. So my question is: is it normal to expect this time increase when using batch training, and if so, why should we use batch training? Is it to improve the test accuracy?
Secondly, why does increasing the num_workers take longer to train? Is there anything fundamentally wrong in the code? And is it normal to have GPU usage low when doing the batch training?
X = train_data.data.reshape(60000, -1).float()
y = train_data.train_labels
X_test = test_data.data.reshape(10000, -1).float()
y_test = test_data.train_labels
# Dataloader
g_seed = torch.Generator()
g_seed.manual_seed(SEED)
batch_size = 300
test_data = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_data, batch_size=batch_size,
shuffle=False, num_workers=8,
worker_init_fn=seed_worker,
generator=g_seed)
train_data = TensorDataset(X, y)
train_loader = DataLoader(train_data, batch_size=batch_size, drop_last=True,
shuffle=True, num_workers=8,
worker_init_fn=seed_worker,
generator=g_seed)
def train_test_classification(net, criterion, optimizer, train_loader,
test_loader, num_epochs=1, verbose=True,
training_plot=True, device='cuda'):
net.train()
training_losses = []
for epoch in tqdm(range(num_epochs)): # loop over the dataset multiple times
running_loss = 0.0
for (i, data) in enumerate(train_loader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
inputs = inputs.to(device).float()
labels = labels.to(device).long()
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
if verbose:
training_losses += [loss.item()]
net.eval()
def test(data_loader):
correct = 0
total = 0
for data in data_loader:
inputs, labels = data
inputs = inputs.to(device).float()
labels = labels.to(device).long()
outputs = net(inputs)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
acc = 100 * correct / total
return total, acc
train_total, train_acc = test(train_loader)
test_total, test_acc = test(test_loader)
if verbose:
print(f"Accuracy on the {train_total} training samples: {train_acc:0.2f}")
print(f"Accuracy on the {test_total} testing samples: {test_acc:0.2f}")
if training_plot:
plt.plot(training_losses)
plt.xlabel('Batch')
plt.ylabel('Training loss')
plt.show()
return train_acc, test_acc
set_seed(SEED)
net = Net().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.005, momentum=0.9)
num_epochs = 500
start = timeit.default_timer()
_, _ = train_test_classification(net, criterion, optimizer, train_loader,
test_loader, num_epochs=num_epochs,
training_plot=True, device=DEVICE)
end = timeit.default_timer()
print(f"Time: {end-start:.2f}")

Low GPU usage can sometimes be due to slow data transfer. Having a large number of workers does not always help though.
Consider using pin_memory=True in the DataLoader definition. This should speed up the data transfer between CPU and GPU. Here is a thread on the Pytorch forum if you want more details.
Another solution may be to add the argument non_blocking=True inside the to() method.

How to add BiLSTM on top of BERT from Huggingface + CUDA out of memory. Tried to allocate 16.00 MiB

I have the below code for a binary classification and it works fine but i would like to modify the nn.Sequential parameters and add an BiLSTM layer. I have the below code:
class BertClassifier(nn.Module):
def __init__(self, freeze_bert=False):
super(BertClassifier, self).__init__()
# Specify hidden size of BERT, hidden size of our classifier, and number of labels
D_in, H, D_out = 768, 50, 2
# Instantiate BERT model
self.bert = BertModel.from_pretrained('bert-base-multilingual-uncased')
# Instantiate an one-layer feed-forward classifier
self.classifier = nn.Sequential(nn.Linear(D_in, H),nn.ReLU(),nn.Linear(H, D_out))
# Freeze the BERT model
if freeze_bert:
for param in self.bert.parameters():
param.requires_grad = False
def forward(self, input_ids, attention_mask):
# Feed input to BERT
outputs = self.bert(input_ids=input_ids,attention_mask=attention_mask)
# Extract the last hidden state of the token `[CLS]` for classification task
last_hidden_state_cls = outputs[0][:, 0, :]
# Feed input to classifier to compute logits
logits = self.classifier(last_hidden_state_cls)
return logits
I have tried to modify the sequential like this self.classifier = nn.Sequential(nn.LSTM(D_in, H, batch_first=True, bidirectional=True),nn.ReLU(),nn.Linear(H, D_out)) but then it throws the error RuntimeError: input must have 3 dimensions, got 2 on line logits = self.classifier(last_hidden_state_cls). I found that I can use nn.ModuleDict instead of nn.Sequential and i made the below :
self.classifier = nn.ModuleDict({
'lstm': nn.LSTM(input_size=D_in, hidden_size=H,batch_first=True, bidirectional=True ),
'linear': nn.Linear(in_features=H,out_features=D_out)})
But now I'm having issues computing the forward function with this. Can someone advice how i can properly modify the forward function?
Update: I also installed CUDA and now when I run the code it returns the error CUDA out of memory. Tried to allocate 16.00 MiB and I tried to lower the batch size but that doesn't fix the problem. I also tried the below but didn't resolved either. Any advice, please?
import torch, gc
gc.collect()
torch.cuda.empty_cache()
Update with the code:
MAX_LEN = 64
# For fine-tuning BERT, the authors recommend a batch size of 16 or 32.
batch_size = 32
VALID_BATCH_SIZE = 4
file1 = open('MH.txt', 'r')
list_com = []
list_label = []
for line in file1:
possible_labels = 'positive|negative'
label = re.findall(possible_labels, line)
line = re.sub(possible_labels, ' ', line)
line = re.sub('\n', ' ', line)
list_com.append(line)
list_label.append(label[0])
list_tuples = list(zip(list_com, list_label))
file1.close()
labels = ['positive', 'negative']
df = pd.DataFrame(list_tuples, columns=['text', 'label'])
df['label'] = df['label'].map({'positive': 1, 'negative': 0})
for i in range(0,len(df['label'])):
list_label[i] = df['label'][i]
#print(df)
#print(df['label'].value_counts())
X = df.text.values
y = df.label.values
X_train, X_val, y_train, y_val =\
train_test_split(X, y, test_size=0.1, random_state=2020)
def text_preprocessing(text):
# Remove '#name'
text = re.sub(r'(#.*?)[\s]', ' ', text)
# Replace '&' with '&'
text = re.sub(r'&', '&', text)
# Remove trailing whitespace
text = re.sub(r'\s+', ' ', text).strip()
return text
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased', do_lower_case=True)
# Create a function to tokenize a set of texts
def preprocessing_for_bert(data):
input_ids = []
attention_masks = []
for sent in data:
encoded_sent = tokenizer.encode_plus(
text=text_preprocessing(sent), # Preprocess sentence
add_special_tokens=True, # Add `[CLS]` and `[SEP]`
max_length=MAX_LEN, # Max length to truncate/pad
pad_to_max_length=True, # Pad sentence to max length
# return_tensors='pt', # Return PyTorch tensor
return_attention_mask=True # Return attention mask
)
# Add the outputs to the lists
input_ids.append(encoded_sent.get('input_ids'))
attention_masks.append(encoded_sent.get('attention_mask'))
# Convert lists to tensors
input_ids = torch.tensor(input_ids)
attention_masks = torch.tensor(attention_masks)
return input_ids, attention_masks
train_inputs, train_masks = preprocessing_for_bert(X_train)
val_inputs, val_masks = preprocessing_for_bert(X_val)
# Convert other data types to torch.Tensor
train_labels = torch.tensor(y_train)
val_labels = torch.tensor(y_val)
# Create the DataLoader for our training set
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
# Create the DataLoader for our validation set
val_data = TensorDataset(val_inputs, val_masks, val_labels)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)
# Create the BertClassfier class
class BertClassifier(nn.Module):
"""Bert Model for Classification Tasks."""
def __init__(self, freeze_bert=False):
"""
#param bert: a BertModel object
#param classifier: a torch.nn.Module classifier
#param freeze_bert (bool): Set `False` to fine-tune the BERT model
"""
super(BertClassifier, self).__init__()
# Specify hidden size of BERT, hidden size of our classifier, and number of labels
D_in, H, D_out = 768, 50, 2
# Instantiate BERT model
self.bert = BertModel.from_pretrained('bert-base-multilingual-uncased')
# Instantiate an one-layer feed-forward classifier
self.classifier = nn.ModuleDict({
'lstm': nn.LSTM(input_size=D_in, hidden_size=H, batch_first=True, bidirectional=True),
'linear': nn.Linear(in_features=H, out_features=D_out)})
# Freeze the BERT model
if freeze_bert:
for param in self.bert.parameters():
param.requires_grad = False
def forward(self, input_ids, attention_mask):
outputs = self.bert(input_ids=input_ids,attention_mask=attention_mask)
sequence_output = outputs[0]
sequence_output, _ = self.lstm(sequence_output)
linear_output = self.linear(sequence_output[:, -1])
return linear_output
def initialize_model(epochs=4):
# Instantiate Bert Classifier
bert_classifier = BertClassifier(freeze_bert=False)
print(bert_classifier)
# Tell PyTorch to run the model on GPU
bert_classifier.to(device)
# Create the optimizer
optimizer = AdamW(bert_classifier.parameters(), lr=5e-5)
# Total number of training steps
total_steps = len(train_dataloader) * epochs
# Set up the learning rate scheduler
scheduler = get_linear_schedule_with_warmup(optimizer,num_warmup_steps=0,num_training_steps=total_steps)
return bert_classifier, optimizer, scheduler
# Specify loss function
loss_fn = nn.CrossEntropyLoss()
def set_seed(seed_value=42):
"""Set seed for reproducibility."""
random.seed(seed_value)
np.random.seed(seed_value)
torch.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
def train(model, train_dataloader, val_dataloader=None, epochs=4, evaluation=False):
"""Train the BertClassifier model."""
# Start training loop
print("Start training...\n")
for epoch_i in range(epochs):
# Print the header of the result table
print(f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}")
print("-" * 70)
# Measure the elapsed time of each epoch
t0_epoch, t0_batch = time.time(), time.time()
# Reset tracking variables at the beginning of each epoch
total_loss, batch_loss, batch_counts = 0, 0, 0
# Put the model into the training mode
model.train()
# For each batch of training data...
for step, batch in enumerate(train_dataloader):
batch_counts += 1
# Load batch to GPU
b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)
# Zero out any previously calculated gradients
model.zero_grad()
# Perform a forward pass. This will return logits.
logits = model(b_input_ids, b_attn_mask)
# Compute loss and accumulate the loss values
loss = loss_fn(logits, b_labels)
batch_loss += loss.item()
total_loss += loss.item()
# Perform a backward pass to calculate gradients
loss.backward()
# Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# Update parameters and the learning rate
optimizer.step()
scheduler.step()
# Print the loss values and time elapsed for every 20 batches
if (step % 20 == 0 and step != 0) or (step == len(train_dataloader) - 1):
# Calculate time elapsed for 20 batches
time_elapsed = time.time() - t0_batch
# Print training results
print(
f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}")
# Reset batch tracking variables
batch_loss, batch_counts = 0, 0
t0_batch = time.time()
# Calculate the average loss over the entire training data
avg_train_loss = total_loss / len(train_dataloader)
print("-" * 70)
#Evaluation
if evaluation == True:
# After the completion of each training epoch, measure the model's performance
# on our validation set.
val_loss, val_accuracy = evaluate(model, val_dataloader)
# Print performance over the entire training data
time_elapsed = time.time() - t0_epoch
print(
f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^9.2f} | {time_elapsed:^9.2f}")
print("-" * 70)
print("\n")
print("Training complete!")
def evaluate(model, val_dataloader):
"""After the completion of each training epoch, measure the model's performance
on our validation set.
"""
# Put the model into the evaluation mode. The dropout layers are disabled during
# the test time.
model.eval()
# Tracking variables
val_accuracy = []
val_loss = []
# For each batch in our validation set...
for batch in val_dataloader:
# Load batch to GPU
b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)
# Compute logits
with torch.no_grad():
logits = model(b_input_ids, b_attn_mask)
# Compute loss
loss = loss_fn(logits, b_labels)
val_loss.append(loss.item())
# Get the predictions
preds = torch.argmax(logits, dim=1).flatten()
# Calculate the accuracy rate
accuracy = (preds == b_labels).cpu().numpy().mean() * 100
val_accuracy.append(accuracy)
# Compute the average accuracy and loss over the validation set.
val_loss = np.mean(val_loss)
val_accuracy = np.mean(val_accuracy)
return val_loss, val_accuracy
def accuracy(probs, y_true):
"""
- Print AUC and accuracy on the test set
#params probs (np.array): an array of predicted probabilities with shape (len(y_true), 2)
#params y_true (np.array): an array of the true values with shape (len(y_true),)
fpr, tpr, threshold = roc_curve(y_true, preds)
roc_auc = auc(fpr, tpr)
print(f'AUC: {roc_auc:.4f}')
"""
preds = probs[:, 1]
# Get accuracy over the test set
y_pred = np.where(preds >= 0.5, 1, 0)
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
def bert_predict(model, test_dataloader):
"""Perform a forward pass on the trained BERT model to predict probabilities on the test set."""
# Put the model into the evaluation mode. The dropout layers are disabled during the test time.
model.eval()
all_logits = []
# For each batch in our test set...
for batch in test_dataloader:
# Load batch to GPU
b_input_ids, b_attn_mask = tuple(t.to(device) for t in batch)[:2]
# Compute logits
with torch.no_grad():
logits = model(b_input_ids, b_attn_mask)
all_logits.append(logits)
# Concatenate logits from each batch
all_logits = torch.cat(all_logits, dim=0)
# Apply softmax to calculate probabilities
probs = F.softmax(all_logits, dim=1).cpu().numpy()
return probs
set_seed(42) # Set seed for reproducibility
bert_classifier, optimizer, scheduler = initialize_model(epochs=3)
# start training
train(bert_classifier, train_dataloader, val_dataloader, epochs=3, evaluation=True)
# Compute predicted probabilities on the test set
probs = bert_predict(bert_classifier, val_dataloader)
# Evaluate the Bert classifier
accuracy(probs, y_val)

what is the pytorch equivalent of a tensorflow linear regression?

I am learning pytorch, that to do a basic linear regression on this data created this way here:
from sklearn.datasets import make_regression
x, y = make_regression(n_samples=100, n_features=1, noise=15, random_state=42)
y = y.reshape(-1, 1)
print(x.shape, y.shape)
plt.scatter(x, y)
I know that using tensorflow this code can solve:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(units=1, activation='linear', input_shape=(x.shape[1], )))
model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.05), loss='mse')
hist = model.fit(x, y, epochs=15, verbose=0)
but I need to know what the pytorch equivalent would be like, what I tried to do was this:
# Model Class
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.linear = nn.Linear(1,1)
def forward(self, x):
x = self.linear(x)
return x
def predict(self, x):
return self.forward(x)
model = Net()
loss_fn = F.mse_loss
opt = torch.optim.SGD(modelo.parameters(), lr=0.05)
# Funcao para treinar
def fit(num_epochs, model, loss_fn, opt, train_dl):
# Repeat for given number of epochs
for epoch in range(num_epochs):
# Train with batches of data
for xb, yb in train_dl:
# 1. Generate predictions
pred = model(xb)
# 2. Calculate Loss
loss = loss_fn(pred, yb)
# 3. Campute gradients
loss.backward()
# 4. Update parameters using gradients
opt.step()
# 5. Reset the gradients to zero
opt.zero_grad()
# Print the progress
if (epoch+1) % 10 == 0:
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
# Training
fit(200, model, loss_fn, opt, data_loader)
But the model doesn't learn anything, I don't know what I can do anymore.
The input/output dimensions is (1/1)

Dataset
First of all, you should define torch.utils.data.Dataset
import torch
from sklearn.datasets import make_regression
class RegressionDataset(torch.utils.data.Dataset):
def __init__(self):
data = make_regression(n_samples=100, n_features=1, noise=0.1, random_state=42)
self.x = torch.from_numpy(data[0]).float()
self.y = torch.from_numpy(data[1]).float()
def __len__(self):
return len(self.x)
def __getitem__(self, index):
return self.x[index], self.y[index]
It converts numpy data to PyTorch's tensor inside __init__ and converts data to float (numpy has double by default while PyTorch's default is float in order to use less memory).
Apart from that it will simply return tuple of features and respective regression targets.
Fit
Almost there, but you have to flatten output from the model (described below). torch.nn.Linear will return tensors of shape (batch, 1) while your targets are of shape (batch,). flatten() will remove unnecessary 1 dimension.
# 2. Calculate Loss
loss = criterion(pred.flatten(), yb)
Model
That is all you need actually:
model = torch.nn.Linear(1, 1)
Any layer can be called directly, no need for forward and inheritance for simple models.
Calling
The rest is almost okay, you just have to create torch.utils.data.DataLoader and pass instance of our dataset. What DataLoader does is it issues __getitem__ of dataset multiple times and creates a batch of specified size (there is some other funny business, but that's the idea):
dataset = RegressionDataset()
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32)
model = torch.nn.Linear(1, 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=3e-4)
fit(5000, model, criterion, optimizer, dataloader)
Also notice I've used torch.nn.MSELoss(), as we are passing object it looks better than function in this case.
Whole code
To make it easier:
import torch
from sklearn.datasets import make_regression
class RegressionDataset(torch.utils.data.Dataset):
def __init__(self):
data = make_regression(n_samples=100, n_features=1, noise=0.1, random_state=42)
self.x = torch.from_numpy(data[0]).float()
self.y = torch.from_numpy(data[1]).float()
def __len__(self):
return len(self.x)
def __getitem__(self, index):
return self.x[index], self.y[index]
# Funcao para treinar
def fit(num_epochs, model, criterion, optimizer, train_dl):
# Repeat for given number of epochs
for epoch in range(num_epochs):
# Train with batches of data
for xb, yb in train_dl:
# 1. Generate predictions
pred = model(xb)
# 2. Calculate Loss
loss = criterion(pred.flatten(), yb)
# 3. Compute gradients
loss.backward()
# 4. Update parameters using gradients
optimizer.step()
# 5. Reset the gradients to zero
optimizer.zero_grad()
# Print the progress
if (epoch + 1) % 10 == 0:
print(
"Epoch [{}/{}], Loss: {:.4f}".format(epoch + 1, num_epochs, loss.item())
)
dataset = RegressionDataset()
dataloader = torch.utils.data.DataLoader(dataset, batch_size=32)
model = torch.nn.Linear(1, 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=3e-4)
fit(5000, model, criterion, optimizer, dataloader)
You should get around 0.053 loss or so, vary noise or other params for harder/easier regression task.

Pytorch :RuntimeError: Cannot initialize CUDA without ATen_cuda library

I am new to model training in python I have installed pytorch using :
pip install torch==1.4.0+cpu torchvision==0.5.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
Now my model is giving me this error :
RuntimeError: Cannot initialize CUDA without ATen_cuda library. PyTorch splits its backend into two shared libraries: a CPU library and a CUDA library; this error has occurred because you are trying to use some CUDA functionality, but the CUDA library has not been loaded by the dynamic linker for some reason. The CUDA library MUST be loaded, EVEN IF you don't directly use any symbols from the CUDA library! One common culprit is a lack of -Wl,--no-as-needed in your link arguments; many dynamic linkers will delete dynamic library dependencies if you don't depend on any of their symbols. You can check if this has occurred by using ldd on your binary to see if there is a dependency on *_cuda.so library.
I know its cuda problem but I have defined in my model if cuda is not available then cpu.My code is
# import the usual resources
import matplotlib.pyplot as plt
import numpy as np
# import utilities to keep workspaces alive during model training
#from workspace_utils import active_session
# watch for any changes in model.py, if it changes, re-load it automatically
#%load_ext autoreload
#%autoreload 2
## TODO: Define the Net in models.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import datetime
## TODO: Once you've define the network, you can instantiate it
# one example conv layer has been provided for you
from torch.utils.data import Dataset, DataLoader
import torch
from torchvision import transforms, utils
from models import Net
#-------------------------------------------------------------------------------
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
#-------------------------------------------------------------------------------
print(device)
print(torch.version)
net = Net().to(device)
print(net)
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
# the dataset we created in Notebook 1 is copied in the helper file `data_load.py`
from data_load import FacialKeypointsDataset
# the transforms we defined in Notebook 1 are in the helper file `data_load.py`
from data_load import Rescale, RandomCrop, Normalize, ToTensor
## TODO: define the data_transform using transforms.Compose([all tx's, . , .])
# order matters! i.e. rescaling should come before a smaller crop
# testing that you've defined a transform
data_transform = transforms.Compose(
[Rescale(250), RandomCrop(224),
Normalize(), ToTensor()])
assert (data_transform is not None), 'Define a data_transform'
# create the transformed dataset
transformed_dataset = FacialKeypointsDataset(
csv_file=
"person_keypoints_train2017_foot_v1\\NEWOUT.csv",
root_dir=
"person_keypoints_train2017_foot_v1\\out\\",
transform=data_transform)
print('Number of images: ', len(transformed_dataset))
# iterate through the transformed dataset and print some stats about the first few samples
for i in range(4):
sample = transformed_dataset[i]
print(i, sample['image'].size(), sample['keypoints'].size())
# load training data in batches
batch_size = 32
train_loader = DataLoader(
transformed_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
# load in the test data, using the dataset class
# AND apply the data_transform you defined above
# create the test dataset
test_dataset = FacialKeypointsDataset(
csv_file=
"person_keypoints_val2017_foot_v1\\NEWOUT.csv",
root_dir=
"person_keypoints_val2017_foot_v1\\ready_val_out\\",
transform=data_transform)
test_loader = DataLoader(
test_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
# test the model on a batch of test images
def net_sample_output():
# iterate through the test dataset
for i, sample in enumerate(test_loader):
# get sample data: images and ground truth keypoints
images = sample['image']
key_pts = sample['keypoints']
# convert images to FloatTensors
images = images.type(torch.cuda.FloatTensor)
# forward pass to get net output
output_pts = net(images)
# reshape to batch_size x 68 x 2 pts ##CHANGED
output_pts = output_pts.view(output_pts.size()[0], 6, -1)
# break after first image is tested
if i == 0:
return images, output_pts, key_pts
test_images, test_outputs, gt_pts = net_sample_output()
# print out the dimensions of the data to see if they make sense
print(test_images.data.size())
print(test_outputs.data.size())
print(gt_pts.size())
def show_all_keypoints(image, predicted_key_pts, gt_pts=None):
"""Show image with predicted keypoints"""
# image is grayscale
plt.imshow(image, cmap='gray')
plt.scatter(
predicted_key_pts[:, 0],
predicted_key_pts[:, 1],
s=20,
marker='.',
c='m')
# plot ground truth points as green pts
if gt_pts is not None:
plt.scatter(gt_pts[:, 0], gt_pts[:, 1], s=20, marker='.', c='g')
# visualize the output
# by default this shows a batch of 10 images
def visualize_output(test_images, test_outputs, gt_pts=None, batch_size=4):
fig = plt.figure()
iimg = 1
sp = 1
for i in range(batch_size):
iimg += 1
if i % 2 == 0:
sp += 1
iimg = 1
fig.add_subplot(sp, batch_size, iimg)
# un-transform the image data
image = test_images.cpu(
)[i].data # get the image from it's Variable wrapper
image = image.numpy() # convert to numpy array from a Tensor
image = np.transpose(
image, (1, 2, 0)) # transpose to go from torch to numpy image
# un-transform the predicted key_pts data
predicted_key_pts = test_outputs.cpu()[i].data
predicted_key_pts = predicted_key_pts.numpy()
# undo normalization of keypoints
predicted_key_pts = predicted_key_pts * 50.0 + 100
# plot ground truth points for comparison, if they exist
ground_truth_pts = None
if gt_pts is not None:
ground_truth_pts = gt_pts[i]
ground_truth_pts = ground_truth_pts * 50.0 + 100
# call show_all_keypoints
show_all_keypoints(
np.squeeze(image), predicted_key_pts, ground_truth_pts)
plt.axis('off')
plt.show()
# call it
#visualize_output(test_images, test_outputs, gt_pts)
## TODO: Define the loss and optimization
import torch.optim as optim
#criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss()
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(params=net.parameters(), lr=0.001)
def train_net(n_epochs):
print("Training...")
# prepare the net for training
net.train()
for epoch in range(n_epochs): # loop over the dataset multiple times
running_loss = 0.0
# train on batches of data, assumes you already have train_loader
for batch_i, data in enumerate(train_loader):
# get the input images and their corresponding labels
images = data['image']
key_pts = data['keypoints']
# flatten pts
key_pts = key_pts.view(key_pts.size(0), -1)
# convert variables to floats for regression loss
key_pts = key_pts.type(torch.cuda.FloatTensor)
images = images.type(torch.cuda.FloatTensor)
# forward pass to get outputs
output_pts = net(images)
#output_pts = output_pts.type(torch.cuda.FloatTensor)
#print(output_pts.type)
#print(key_pts.type)
# calculate the loss between predicted and target keypoints
loss = criterion(output_pts, key_pts)
# zero the parameter (weight) gradients
optimizer.zero_grad()
# backward pass to calculate the weight gradients
loss.backward()
# update the weights
optimizer.step()
# print loss statistics
running_loss += loss.item()
if batch_i % 32 == 31: # print every 10 batches
print(datetime.datetime.now())
print('Epoch: {}, Batch: {}, Avg. Loss: {}'.format(
epoch + 1, batch_i + 1, running_loss / 1000))
running_loss = 0.0
print('Finished Training')
# train your network
n_epochs = 500 # start small, and increase when you've decided on your model structure and hyperparams
# this is a Workspaces-specific context manager to keep the connection
# alive while training your model, not part of pytorch
#with active_session():
#train_net(n_epochs)
## TODO: change the name to something uniqe for each new model
model_dir = 'D:\\Users\\Tsvetan\\FootDataset\\'
model_name = 'keypoints.pt'
cpnt = torch.load(model_dir + "500" + model_name)
print()
print()
for key in cpnt.keys():
print(key + " {}".format(cpnt[key]))
print()
print()
net.load_state_dict(torch.load(model_dir + "500" + model_name))
train_net(10)
net.eval()
# after training, save your model parameters in the dir 'saved_models'
#torch.save(net.state_dict(), model_dir + model_name)
# Get the weights in the first conv layer, "conv1"
# if necessary, change this to reflect the name of your first conv layer
weights1 = net.conv1.weight.data.cpu()
w = weights1.numpy()
filter_index = 0
print(w[filter_index][0])
print(w[filter_index][0].shape)
# display the filter weights
#plt.imshow(w[filter_index][0], cmap='gray')
####
test_images, test_outputs, gt_pts = net_sample_output()
visualize_output(test_images, test_outputs, gt_pts)
I have re-write the condition but its still giving me error.

How to implement contractive autoencoder in Pytorch?

I'm trying to create a contractive autoencoder in Pytorch. I found this thread and tried according to that. This is the snippet I wrote based on the mentioned thread:
import datetime
import numpy as np
import torch
import torchvision
from torchvision import datasets, transforms
from torchvision.utils import save_image, make_grid
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
%matplotlib inline
dataset_train = datasets.MNIST(root='MNIST',
train=True,
transform = transforms.ToTensor(),
download=True)
dataset_test = datasets.MNIST(root='MNIST',
train=False,
transform = transforms.ToTensor(),
download=True)
batch_size = 128
num_workers = 2
dataloader_train = torch.utils.data.DataLoader(dataset_train,
batch_size = batch_size,
shuffle=True,
num_workers = num_workers,
pin_memory=True)
dataloader_test = torch.utils.data.DataLoader(dataset_test,
batch_size = batch_size,
num_workers = num_workers,
pin_memory=True)
def view_images(imgs, labels, rows = 4, cols =11):
imgs = imgs.detach().cpu().numpy().transpose(0,2,3,1)
fig = plt.figure(figsize=(8,4))
for i in range(imgs.shape[0]):
ax = fig.add_subplot(rows, cols, i+1, xticks=[], yticks=[])
ax.imshow(imgs[i].squeeze(), cmap='Greys_r')
ax.set_title(labels[i].item())
# now let's view some
imgs, labels = next(iter(dataloader_train))
view_images(imgs, labels,13,10)
class Contractive_AutoEncoder(nn.Module):
def __init__(self):
super().__init__()
self.encoder = nn.Linear(784, 512)
self.decoder = nn.Linear(512, 784)
def forward(self, input):
# flatten the input
shape = input.shape
input = input.view(input.size(0), -1)
output_e = F.relu(self.encoder(input))
output = F.sigmoid(self.decoder(output_e))
output = output.view(*shape)
return output_e, output
def loss_function(output_e, outputs, imgs, device):
output_e.backward(torch.ones(output_e.size()).to(device), retain_graph=True)
criterion = nn.MSELoss()
assert outputs.shape == imgs.shape ,f'outputs.shape : {outputs.shape} != imgs.shape : {imgs.shape}'
imgs.grad.requires_grad = True
loss1 = criterion(outputs, imgs)
print(imgs.grad)
loss2 = torch.mean(pow(imgs.grad,2))
loss = loss1 + loss2
return loss
epochs = 50
interval = 2000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Contractive_AutoEncoder().to(device)
optimizer = optim.Adam(model.parameters(), lr =0.001)
for e in range(epochs):
for i, (imgs, labels) in enumerate(dataloader_train):
imgs = imgs.to(device)
labels = labels.to(device)
outputs_e, outputs = model(imgs)
loss = loss_function(outputs_e, outputs, imgs,device)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i%interval:
print('')
print(f'epoch/epoechs: {e}/{epochs} loss : {loss.item():.4f} ')
For the sake of brevity I just used one layer for the encoder and the decoder. It should work regardless of number of layers in either of them obviously!
But the catch here is, aside from the fact that I don't know if this is the correct way of doing this, (calculating gradients with respect to the input), I get an error which makes the former solution wrong/not applicable.
That is:
imgs.grad.requires_grad = True
produces the error :
AttributeError : 'NoneType' object has no attribute 'requires_grad'
I also tried the second method suggested in that thread which is as follows:
class Contractive_Encoder(nn.Module):
def __init__(self):
super().__init__()
self.encoder = nn.Linear(784, 512)
def forward(self, input):
# flatten the input
input = input.view(input.size(0), -1)
output_e = F.relu(self.encoder(input))
return output_e
class Contractive_Decoder(nn.Module):
def __init__(self):
super().__init__()
self.decoder = nn.Linear(512, 784)
def forward(self, input):
# flatten the input
output = F.sigmoid(self.decoder(input))
output = output.view(-1,1,28,28)
return output
epochs = 50
interval = 2000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_enc = Contractive_Encoder().to(device)
model_dec = Contractive_Decoder().to(device)
optimizer = optim.Adam([{"params":model_enc.parameters()},
{"params":model_dec.parameters()}], lr =0.001)
optimizer_cond = optim.Adam(model_enc.parameters(), lr = 0.001)
criterion = nn.MSELoss()
for e in range(epochs):
for i, (imgs, labels) in enumerate(dataloader_train):
imgs = imgs.to(device)
labels = labels.to(device)
outputs_e = model_enc(imgs)
outputs = model_dec(outputs_e)
loss_rec = criterion(outputs, imgs)
optimizer.zero_grad()
loss_rec.backward()
optimizer.step()
imgs.requires_grad_(True)
y = model_enc(imgs)
optimizer_cond.zero_grad()
y.backward(torch.ones(imgs.view(-1,28*28).size()))
imgs.grad.requires_grad = True
loss = torch.mean([pow(imgs.grad,2)])
optimizer_cond.zero_grad()
loss.backward()
optimizer_cond.step()
if i%interval:
print('')
print(f'epoch/epoechs: {e}/{epochs} loss : {loss.item():.4f} ')
but I face the error :
RuntimeError: invalid gradient at index 0 - got [128, 784] but expected shape compatible with [128, 512]
How should I go about this in Pytorch?

Summary
The final implementation for contractive loss that I wrote is as follows:
def loss_function(output_e, outputs, imgs, lamda = 1e-4, device=torch.device('cuda')):
criterion = nn.MSELoss()
assert outputs.shape == imgs.shape ,f'outputs.shape : {outputs.shape} != imgs.shape : {imgs.shape}'
loss1 = criterion(outputs, imgs)
output_e.backward(torch.ones(outputs_e.size()).to(device), retain_graph=True)
# Frobenious norm, the square root of sum of all elements (square value)
# in a jacobian matrix
loss2 = torch.sqrt(torch.sum(torch.pow(imgs.grad,2)))
imgs.grad.data.zero_()
loss = loss1 + (lamda*loss2)
return loss
and inside training loop you need to do:
for e in range(epochs):
for i, (imgs, labels) in enumerate(dataloader_train):
imgs = imgs.to(device)
labels = labels.to(device)
imgs.retain_grad()
imgs.requires_grad_(True)
outputs_e, outputs = model(imgs)
loss = loss_function(outputs_e, outputs, imgs, lam,device)
imgs.requires_grad_(False)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'epoch/epochs: {e}/{epochs} loss: {loss.item():.4f}')
Full explanation
As it turns out and rightfully #akshayk07 pointed out in the comments, the implementation found in Pytorch forum was wrong in multiple places. The notable thing, being it wasn't implementing the actual contractive loss that was introduced in Contractive Auto-Encoders:Explicit Invariance During Feature Extraction paper! and also aside from that, the implementation wouldn't work at all for obvious reasons that will be explained in a moment.
The changes are obvious so I try to explain what's going on here. First of all note that imgs is not a leaf node, so the gradients would not be retained in the image .grad attribute.
In order to retain gradients for non leaf nodes, you should use retain_graph(). grad is only populated for leaf Tensors. Also imgs.retain_grad() should be called before doing forward() as it will instruct the autograd to store grads into non-leaf nodes.
Update
Thanks to #Michael for pointing out that the correct calculation of Frobenius Norm is actually (from ScienceDirect):
the square root of the sum of the squares of all the matrix entries
and not
the the square root of the sum of the absolute values of all the
matrix entries as explained here

In PyTorch 1.5.0, a high level torch.autograd.functional.jacobian API is added. This should make the contractive objective easier to implement for an arbitrary encoder. For torch>=v1.5.0, the contractive loss would look like this:
contractive_loss = torch.norm(torch.autograd.functional.jacobian(self.encoder, imgs, create_graph=True))
The create_graph argument makes the jacobian differentiable.

The main challenge in implementing the contractive autoencoder is in calculating the Frobenius norm of the Jacobian, which is the gradient of the code or bottleneck layer (vector) with respect to the input layer (vector). This is the regularization term in the loss function. Fortunately, you have done the hard work in solving this for me. Thank you! You are using MSE loss for the first term. Cross entropy loss is sometimes used instead. It's worth considering. I think you are almost there with the Frobenius norm, except that you need to take the square root of the sum of the squares of the Jacobian, where you are calculating the square root of the sum of the absolute values. Here's how I'd define the loss function (sorry I changed notation a little to keep myself straight):
def cae_loss_fcn(code, img_out, img_in, lamda=1e-4, device=torch.device('cuda')):
# First term in the loss function, for ensuring representational fidelity
criterion=nn.MSELoss()
assert img_out.shape == img_in.shape, f'img_out.shape : {img_out.shape} != img_in.shape : {img_in.shape}'
loss1 = criterion(img_out, img_in)
# Second term in the loss function, for enforcing contraction of representation
code.backward(torch.ones(code.size()).to(device), retain_graph=True)
# Frobenius norm of Jacobian of code with respect to input image
loss2 = torch.sqrt(torch.sum(torch.pow(img_in.grad, 2))) # THE CORRECTION
img_in.grad.data.zero_()
# Total loss, the sum of the two loss terms, with weight applied to second term
loss = loss1 + (lamda*loss2)
return loss

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Runtime error with Pytorch and num_workers - python

Related

Understanding Dataloader and how to speed up GPU training with num_workers

How to add BiLSTM on top of BERT from Huggingface + CUDA out of memory. Tried to allocate 16.00 MiB

what is the pytorch equivalent of a tensorflow linear regression?

Pytorch :RuntimeError: Cannot initialize CUDA without ATen_cuda library

How to implement contractive autoencoder in Pytorch?

Categories

Resources