Inconsistent shapes between outputs and targets - python

I have the following code to train a transformer-based model (huggingface) for a multi-head regression task (I call it multi-head because the model predicts multiple output scores, not only one).
# select device
device = 'cuda' if cuda.is_available() else 'cpu'
print("DEVICE: ", device)
MAX_LEN = 512
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 8
TRAIN_EPOCHS = 1
TEST_EPOCHS = 1
LEARNING_RATE = 2e-05
REG_DROPOUT = 0.1
DISPLAY_STEP_THRESHOLD = 100
MODEL_NAME_CHECKPOINT = 'bert-base-uncased'
MODEL_FOLDER = 'autotune_multihead_regression_model'
# *************** DATA *********************
# load data
train_data = pd.read_csv(f"derived_data/traindev_fold{args.fold}.csv")
test_data = pd.read_csv(f"derived_data/test_fold{args.fold}.csv")
train_data['labels'] = train_data[train_data.columns[2:]].values.tolist()
test_data['labels'] = test_data[test_data.columns[2:]].values.tolist()
print("train data shape: ", train_data.shape)
print("test data shape: ", test_data.shape)
# make datasets
train_dataset = Dataset.from_pandas(train_data)
test_dataset = Dataset.from_pandas(test_data)
dataset = DatasetDict({
'train': train_dataset,
'test': test_dataset
})
# initialize tokenizer
tokenizer = BertTokenizer.from_pretrained(MODEL_NAME_CHECKPOINT)
def preprocess_function(examples):
return tokenizer(examples["full_text"], truncation=True, padding="max_length")
# apply the preprocessing on the entire dataset
encoded_dataset = dataset.map(preprocess_function, batched=True)
# ******************** FINETUNING ********************#
class BERTClass(torch.nn.Module):
def __init__(self):
super(BERTClass, self).__init__()
self.l0 = transformers.BertModel.from_pretrained(MODEL_NAME_CHECKPOINT)
self.l1 = torch.nn.Linear(768, 6)
def forward(self, input_ids, attention_mask, labels):
""""Override the function forward. Note that keys not appearing here will be removed by trainer.
It does not matter if trainer is not used.
"""
# _, output_0 = self.l0(ids, attention_mask=mask, token_type_ids=token_type_ids, return_dict=False)
_, output_0 = self.l0(input_ids, attention_mask=attention_mask, return_dict=False)
output = self.l1(output_0)
return output
def model_init():
model = BERTClass()
return model
args = TrainingArguments(
MODEL_FOLDER,
evaluation_strategy="epoch",
save_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=TRAIN_BATCH_SIZE,
per_device_eval_batch_size=VALID_BATCH_SIZE,
num_train_epochs=5,
weight_decay=0.01,
load_best_model_at_end=True,
# this is an advantage in the sense that the last model is not necessarily the best one.
metric_for_best_model="mean_rmse",
logging_strategy="steps",
logging_steps=100,
push_to_hub=False
)
def mean_rmse(outputs, targets):
""""
:param
outputs: 2D list
targets: 2D list
:returns
a scalar real number
"""
delta = outputs - targets
delta = torch.sqrt((delta ** 2).mean(axis=0))
return delta.mean()
def compute_metrics(eval_pred):
predictions, labels = eval_pred
return {"mean_rmse": mean_rmse(predictions, labels)}
class RegressionTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False):
labels = inputs.get("labels")
outputs = model(**inputs)
loss = torch.nn.MSELoss()(outputs.squeeze(), labels.squeeze())
return (loss, outputs) if return_outputs else loss
temp_dataset = encoded_dataset["train"].select(range(100))
trainer = RegressionTrainer(
model_init(),
args,
train_dataset=encoded_dataset["train"],
eval_dataset=temp_dataset,
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.train()
In the code, I use a customized model because I want to have the flexibility of the head. Also, I use Trainer to train the model because I want to use hyperparameter_search.
The target (labels) of the dataset is a (row) vector of 6 variables.
Now the training seems going well when I can see the loss decreasing.
However, the code crashes when it starts the evaluation.
In the code above, I use part of the training set for evaluation, and it throws:
***** Running Evaluation *****
Num examples = 100
Batch size = 8
Traceback (most recent call last):██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 13.35it/s]
File "autotune_multiregression_head_bert.py", line 152, in <module>
trainer.train()
File "/home/ubuntu/anaconda3/envs/pytorch_p37/lib/python3.7/site-packages/transformers/trainer.py", line 1504, in train
ignore_keys_for_eval=ignore_keys_for_eval,
File "/home/ubuntu/anaconda3/envs/pytorch_p37/lib/python3.7/site-packages/transformers/trainer.py", line 1834, in _inner_training_loop
self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
File "/home/ubuntu/anaconda3/envs/pytorch_p37/lib/python3.7/site-packages/transformers/trainer.py", line 2052, in _maybe_log_save_evaluate
metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
File "/home/ubuntu/anaconda3/envs/pytorch_p37/lib/python3.7/site-packages/transformers/trainer.py", line 2781, in evaluate
metric_key_prefix=metric_key_prefix,
File "/home/ubuntu/anaconda3/envs/pytorch_p37/lib/python3.7/site-packages/transformers/trainer.py", line 3059, in evaluation_loop
metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
File "autotune_multiregression_head_bert.py", line 130, in compute_metrics
return {"mcrmse": mcrmse_fn(predictions, labels)}
File "autotune_multiregression_head_bert.py", line 123, in mcrmse_fn
delta = outputs - targets
ValueError: operands could not be broadcast together with shapes (87,6) (100,6)
20%|
I got the same error when evaluating on the test set. I assume some examples got failed when evaluating and are not added to the final result, hence the final shape of outputs is not consistent with targets? (I debugged and see that outputs has 87 examples while targets has 100).
What has gone wrong?

Related

How to implement Laplace Posteriori Approximation on BERT in PyTorch?

I'm trying to implement the Laplace Posteriori Approximation on the last layer for the classification results obtained by BERT model. I get an error regarding input size, and after I fix it by extracting just embeddings and class labels from BERT to feed them into Laplace, I get another bunch of errors regarding input dimensions that I don't know how to debug.
As this is something I didn't find on the internet, and includes relatively new libraries, I will post here just the first error I got, code that might help in debugging and useful links.
I will update post if needed.
Of course, if someone knows how to implement Laplace Posteriori Approximation with BERT in some other library like Scikit or Trax, it would be helpful. Also, some other Transformer classification model with some other confidence approximation will be useful for me. Any help is appreciated!
Code:
# Import
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torch import nn
from transformers import BertTokenizer
from transformers import BertModel
from transformers import BertForSequenceClassification
from sklearn.model_selection import train_test_split
import time
import os
#Toy Data
data_a_b_c = ["""category a. This is category a. In category a we talk about animals.
This category includes lions, fish, tigers, birds, elephants, mouses, dogs, cats, and all other animals."""] * 60 \
+ ["""category b. This is category b. In category b we talk about people. This category members are
Abraham Maslow, John Lennon, Drazen Petrovic, Nikola Tesla, Slavoljub Penkala, Nenad Bakic and Larry Page."""] * 60 \
+ ["""category c. This is category c. Category c is dedicated to car brands like Lamborgini, Rimac-Buggati, BMW, Mercedes,
Honda, Opel, Wolkswagen, and etc."""] * 60
label_0_1_2 = [0] * 60 + [1] * 60 + [2] * 60
d = {'text': data_a_b_c, 'labels': label_0_1_2}
df = pd.DataFrame(data=d)
print(df.head(3))
print(df.tail(3))
print(df.info())
# Parameters
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
batch_size = 2
learning_rate = 3e-4
epochs = 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
labels = pd.Series(df.labels.values).to_dict()
num_classes = 3
print(f'Tokenizer: {tokenizer}, Batch size:{batch_size}, Learning rate:{learning_rate}, Epochs:{epochs}')
print('Device: ', device)
print('Number of possible classes: ', num_classes)
# Model Architecture
class TransformerModel(nn.Module):
def __init__(self, num_classes, dropout=0.5):
super(TransformerModel, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768, num_classes)
self.relu = nn.ReLU()
def forward(self, input_id, mask):
_, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output)
final_layer = self.relu(linear_output)
return final_layer
# Prepare Data Function
def prepare_data(data, labels):
texts = tokenizer(data, padding='max_length', max_length=512, truncation=True, return_tensors="pt")
input_ids = texts['input_ids']
attention_mask = texts['attention_mask']
train_dataset = TensorDataset(input_ids, attention_mask, torch.LongTensor(labels))
dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
return dataloader
#Run Training Function
def run_training(train_dataloader, val_dataloader, epochs=epochs, lr=learning_rate):
def train(dataloader):
model.train()
total_acc, total_count = 0, 0
log_interval = 128
start_time = time.time()
for idx, (input_id, mask, label) in enumerate(train_dataloader):
# print(idx)
mask = mask.to(device)
input_id = input_id.to(device)
label = label.type(torch.LongTensor).to(device)
output = model(input_id, mask)
optimizer.zero_grad()
loss = criterion(output, label)
loss.backward()
# torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
optimizer.step()
total_acc += (output.argmax(1) == label).sum().item()
total_count += label.size(0)
if idx % log_interval == 0 and idx > 0:
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches '
'| accuracy {:8.3f}'.format(epoch, idx, len(dataloader),
total_acc / total_count))
total_acc, total_count = 0, 0
start_time = time.time()
def evaluate(dataloader):
model.eval()
total_acc, total_count = 0, 0
with torch.no_grad():
for idx, (input_id, mask, label) in enumerate(dataloader):
mask = mask.to(device)
input_id = input_id.to(device)
label = label.to(device)
output = model(input_id, mask)
total_acc += (output.argmax(1) == label).sum().item()
total_count += label.size(0)
return total_acc / total_count
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
cuda = torch.cuda.is_available()
device = torch.device("cuda" if cuda else "cpu")
device = 'cuda'
model.to(device)
total_accu = None
for epoch in range(1, epochs + 1):
epoch_start_time = time.time()
train(train_dataloader)
accu_val = evaluate(val_dataloader)
if total_accu is not None and total_accu > accu_val:
scheduler.step()
else:
total_accu = accu_val
print('-' * 59)
print('| end of epoch {:3d} | time: {:5.2f}s | '
'valid accuracy {:8.3f} '.format(epoch,
time.time() - epoch_start_time,
accu_val))
print('-' * 59)
# Data Split And Preparation
X_train, X_test, y_train, y_test = train_test_split(df.text.values.tolist(), df.labels.values.tolist(), test_size=0.2, random_state=2)
train_dataloader = prepare_data(X_train, y_train)
val_dataloader = prepare_data(X_test, y_test)
# Run The Model
model = TransformerModel(num_classes)
run_training(train_dataloader, val_dataloader)
print('finished')
# Save And Load The Model (if needed)
PATH = ".../Torch_BERT_model"
torch.save(model, os.path.join(PATH, "Toy_Data_BERT.pth"))
model = torch.load(os.path.join(PATH, "Toy_Data_BERT.pth"))
print(model)
# Laplace
from laplace import Laplace
la = Laplace(model, 'classification', subset_of_weights='last_layer', hessian_structure='full')
la.fit(train_dataloader)
Error I get:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) ~\AppData\Local\Temp\ipykernel_7144\3779742208.py in <cell line:
2>()
1 la = Laplace(model, 'classification', subset_of_weights='last_layer', hessian_structure='full')
----> 2 la.fit(train_dataloader)
~\anaconda3\lib\site-packages\laplace\lllaplace.py in fit(self,
train_loader, override)
98
99 if self.model.last_layer is None:
--> 100 X, _ = next(iter(train_loader))
101 with torch.no_grad():
102 try:
ValueError: too many values to unpack (expected 2)
Useful link for Laplace implementation with examples:
https://aleximmer.github.io/Laplace/#full-example-optimization-of-the-marginal-likelihood-and-prediction
Code that might help in debugging:
for x in train_dataloader:
print("The length of batch is:", len(x))
print()
print("The batch looks like:", x)
print()
print("The length of the first element in the batch is:") #embedding
print(len(x[0]))
print("The length of the second element in the batch is:") #1 if place is filled with word, 0 if it's empty?
print(len(x[1]))
print("The length of the third element in the batch is:") #category
print(len(x[2]))
print()
print("The lengths of the first tensor and second tensor in the first element in the batch is:")
print(len(x[0][0]), len(x[0][1])) # = max_length (512)
print("The lengths of the first tensor and second tensor in the second element in the batch is:")
print(len(x[1][0]), len(x[1][1])) # = max_length (512)
print()
print()
The laplace library expects that the dataloader returns two parameters (X,y) and that the model requires exactly one argument to make its prediction (code). But your model forward pass requires two arguments, namely input_id and mask, and your dataloader returns three arguments input_id, mask, and labels.
There are several ways to work around this limitation (e.g. return a dict with input_ids and attention_mask). The way that requires the least understanding of the internals of the laplace library is to generate the attention mask at runtime in the forward pass (not great for the performance):
class TransformerModel(nn.Module):
def __init__(self, num_classes, pad_id, dropout=0.5):
super(TransformerModel, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-multilingual-cased')
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(768, num_classes)
self.relu = nn.ReLU()
self.pad_id = pad_id
def forward(self, input_id):
mask = (input_ids!=self.pad_id).type(input_ids.dtype)
_, pooled_output = self.bert(input_ids=input_id, attention_mask=mask, return_dict=False)
dropout_output = self.dropout(pooled_output)
linear_output = self.linear(dropout_output)
final_layer = self.relu(linear_output)
return final_layer
model = TransformerModel(num_classes, tokenizer.pad_token_id)

Python ERROR => h5py objects cannot be pickled

Can any one help me
I am facing this error "h5py objects cannot be pickled" while running (train.py) on https://github.com/RoyalSkye/Image-Caption
(my OS is Window 10)
train.py
#!/usr/bin/env python3
import h5py
import time
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
from torch import nn
from torch.nn.utils.rnn import pack_padded_sequence
from models import *
from transformer import *
from datasets import *
from utils import *
from nltk.translate.bleu_score import corpus_bleu
import argparse
import codecs
import numpy as np
def train(args, train_loader, encoder, decoder, criterion, encoder_optimizer, decoder_optimizer, epoch):
"""
Performs one epoch's training.
:param train_loader: DataLoader for training data
:param encoder: encoder model
:param decoder: decoder model
:param criterion: loss layer
:param encoder_optimizer: optimizer to update encoder's weights (if fine-tuning)
:param decoder_optimizer: optimizer to update decoder's weights
:param epoch: epoch number
"""
decoder.train() # train mode (dropout and batchnorm is used)
encoder.train()
batch_time = AverageMeter() # forward prop. + back prop. time
data_time = AverageMeter() # data loading time
losses = AverageMeter() # loss (per word decoded)
top5accs = AverageMeter() # top5 accuracy
start = time.time()
# Batches
for i, (imgs, caps, caplens) in enumerate(train_loader):
data_time.update(time.time() - start)
# Move to GPU, if available
imgs = imgs.to(device)
caps = caps.to(device)
caplens = caplens.to(device)
# Forward prop.
imgs = encoder(imgs)
# imgs: [batch_size, 14, 14, 2048]
# caps: [batch_size, 52]
# caplens: [batch_size, 1]
scores, caps_sorted, decode_lengths, alphas, sort_ind = decoder(imgs, caps, caplens)
# Since we decoded starting with <start>, the targets are all words after <start>, up to <end>
targets = caps_sorted[:, 1:]
# Remove timesteps that we didn't decode at, or are pads
# pack_padded_sequence is an easy trick to do this
scores = pack_padded_sequence(scores, decode_lengths, batch_first=True).data
targets = pack_padded_sequence(targets, decode_lengths, batch_first=True).data
# print(scores.size())
# print(targets.size())
# Calculate loss
loss = criterion(scores, targets)
# Add doubly stochastic attention regularization
# Second loss, mentioned in paper "Show, Attend and Tell: Neural Image Caption Generation with Visual Attention"
# https://arxiv.org/abs/1502.03044
# In section 4.2.1 Doubly stochastic attention regularization: We know the weights sum to 1 at a given timestep.
# But we also encourage the weights at a single pixel p to sum to 1 across all timesteps T.
# This means we want the model to attend to every pixel over the course of generating the entire sequence.
# Therefore, we want to minimize the difference between 1 and the sum of a pixel's weights across all timesteps.
if args.decoder_mode == "lstm":
loss += args.alpha_c * ((1. - alphas.sum(dim=1)) ** 2).mean()
elif args.decoder_mode == "transformer":
dec_alphas = alphas["dec_enc_attns"]
alpha_trans_c = args.alpha_c / (args.n_heads * args.decoder_layers)
for layer in range(args.decoder_layers): # args.decoder_layers = len(dec_alphas)
cur_layer_alphas = dec_alphas[layer] # [batch_size, n_heads, 52, 196]
for h in range(args.n_heads):
cur_head_alpha = cur_layer_alphas[:, h, :, :]
loss += alpha_trans_c * ((1. - cur_head_alpha.sum(dim=1)) ** 2).mean()
# Back prop.
decoder_optimizer.zero_grad()
if encoder_optimizer is not None:
encoder_optimizer.zero_grad()
loss.backward()
# Clip gradients
if args.grad_clip is not None:
clip_gradient(decoder_optimizer, args.grad_clip)
if encoder_optimizer is not None:
clip_gradient(encoder_optimizer, args.grad_clip)
# Update weights
decoder_optimizer.step()
if encoder_optimizer is not None:
encoder_optimizer.step()
# Keep track of metrics
top5 = accuracy(scores, targets, 5)
losses.update(loss.item(), sum(decode_lengths))
top5accs.update(top5, sum(decode_lengths))
batch_time.update(time.time() - start)
start = time.time()
if i % args.print_freq == 0:
print("Epoch: {}/{} step: {}/{} Loss: {} AVG_Loss: {} Top-5 Accuracy: {} Batch_time: {}s".format(epoch+1, args.epochs, i+1, len(train_loader), losses.val, losses.avg, top5accs.val, batch_time.val))
def validate(args, val_loader, encoder, decoder, criterion):
"""
Performs one epoch's validation.
:param val_loader: DataLoader for validation data.
:param encoder: encoder model
:param decoder: decoder model
:param criterion: loss layer
:return: score_dict {'Bleu_1': 0., 'Bleu_2': 0., 'Bleu_3': 0., 'Bleu_4': 0., 'METEOR': 0., 'ROUGE_L': 0., 'CIDEr': 1.}
"""
decoder.eval() # eval mode (no dropout or batchnorm)
if encoder is not None:
encoder.eval()
batch_time = AverageMeter()
losses = AverageMeter()
top5accs = AverageMeter()
start = time.time()
references = list() # references (true captions) for calculating BLEU-4 score
hypotheses = list() # hypotheses (predictions)
# explicitly disable gradient calculation to avoid CUDA memory error
with torch.no_grad():
# Batches
for i, (imgs, caps, caplens, allcaps) in enumerate(val_loader):
# Move to device, if available
imgs = imgs.to(device)
caps = caps.to(device)
caplens = caplens.to(device)
# Forward prop.
if encoder is not None:
imgs = encoder(imgs)
scores, caps_sorted, decode_lengths, alphas, sort_ind = decoder(imgs, caps, caplens)
# Since we decoded starting with <start>, the targets are all words after <start>, up to <end>
targets = caps_sorted[:, 1:]
# Remove timesteps that we didn't decode at, or are pads
# pack_padded_sequence is an easy trick to do this
scores_copy = scores.clone()
scores = pack_padded_sequence(scores, decode_lengths, batch_first=True).data
targets = pack_padded_sequence(targets, decode_lengths, batch_first=True).data
# Calculate loss
loss = criterion(scores, targets)
# Add doubly stochastic attention regularization
if args.decoder_mode == "lstm":
loss += args.alpha_c * ((1. - alphas.sum(dim=1)) ** 2).mean()
elif args.decoder_mode == "transformer":
dec_alphas = alphas["dec_enc_attns"]
alpha_trans_c = args.alpha_c / (args.n_heads * args.decoder_layers)
for layer in range(args.decoder_layers): # args.decoder_layers = len(dec_alphas)
cur_layer_alphas = dec_alphas[layer] # [batch_size, n_heads, 52, 196]
for h in range(args.n_heads):
cur_head_alpha = cur_layer_alphas[:, h, :, :]
loss += alpha_trans_c * ((1. - cur_head_alpha.sum(dim=1)) ** 2).mean()
# Keep track of metrics
losses.update(loss.item(), sum(decode_lengths))
top5 = accuracy(scores, targets, 5)
top5accs.update(top5, sum(decode_lengths))
batch_time.update(time.time() - start)
start = time.time()
# Store references (true captions), and hypothesis (prediction) for each image
# If for n images, we have n hypotheses, and references a, b, c... for each image, we need -
# references = [[ref1a, ref1b, ref1c], [ref2a, ref2b], ...], hypotheses = [hyp1, hyp2, ...]
# References
allcaps = allcaps[sort_ind] # because images were sorted in the decoder
for j in range(allcaps.shape[0]):
img_caps = allcaps[j].tolist()
img_captions = list(
map(lambda c: [w for w in c if w not in {word_map['<start>'], word_map['<pad>']}],
img_caps)) # remove <start> and pads
references.append(img_captions)
# Hypotheses
_, preds = torch.max(scores_copy, dim=2)
preds = preds.tolist()
temp_preds = list()
for j, p in enumerate(preds):
temp_preds.append(preds[j][:decode_lengths[j]]) # remove pads
preds = temp_preds
hypotheses.extend(preds)
assert len(references) == len(hypotheses)
# Calculate BLEU-1~4 scores
# metrics = {}
# weights = (1.0 / 1.0,)
# metrics["bleu1"] = corpus_bleu(references, hypotheses, weights)
# weights = (1.0/2.0, 1.0/2.0,)
# metrics["bleu2"] = corpus_bleu(references, hypotheses, weights)
# weights = (1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0,)
# metrics["bleu3"] = corpus_bleu(references, hypotheses, weights)
# metrics["bleu4"] = corpus_bleu(references, hypotheses)
# Calculate BLEU1~4, METEOR, ROUGE_L, CIDEr scores
metrics = get_eval_score(references, hypotheses)
print("EVA LOSS: {} TOP-5 Accuracy {} BLEU-1 {} BLEU2 {} BLEU3 {} BLEU-4 {} METEOR {} ROUGE_L {} CIDEr {}".format
(losses.avg, top5accs.avg, metrics["Bleu_1"], metrics["Bleu_2"], metrics["Bleu_3"], metrics["Bleu_4"],
metrics["METEOR"], metrics["ROUGE_L"], metrics["CIDEr"]))
return metrics
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Image_Captioning')
# Data parameters
parser.add_argument('--data_folder', default="./dataset/generated_data",
help='folder with data files saved by create_input_files.py.')
parser.add_argument('--data_name', default="coco_5_cap_per_img_5_min_word_freq",
help='base name shared by data files.')
# Model parameters
parser.add_argument('--emb_dim', type=int, default=300, help='dimension of word embeddings.')
parser.add_argument('--attention_dim', type=int, default=512, help='dimension of attention linear layers.')
parser.add_argument('--decoder_dim', type=int, default=512, help='dimension of decoder RNN.')
parser.add_argument('--n_heads', type=int, default=8, help='Multi-head attention.')
parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
parser.add_argument('--decoder_mode', default="transformer", help='which model does decoder use?') # lstm or transformer
parser.add_argument('--attention_method', default="ByPixel", help='which attention method to use?') # ByPixel or ByChannel
parser.add_argument('--encoder_layers', type=int, default=2, help='the number of layers of encoder in Transformer.')
parser.add_argument('--decoder_layers', type=int, default=6, help='the number of layers of decoder in Transformer.')
# Training parameters
parser.add_argument('--epochs', type=int, default=100,
help='number of epochs to train for (if early stopping is not triggered).')
parser.add_argument('--stop_criteria', type=int, default=25, help='training stop if epochs_since_improvement == stop_criteria')
parser.add_argument('--batch_size', type=int, default=32, help='batch_size')
parser.add_argument('--print_freq', type=int, default=100, help='print training/validation stats every __ batches.')
parser.add_argument('--workers', type=int, default=1, help='for data-loading; right now, only 1 works with h5pys.')
parser.add_argument('--encoder_lr', type=float, default=1e-4, help='learning rate for encoder if fine-tuning.')
parser.add_argument('--decoder_lr', type=float, default=1e-4, help='learning rate for decoder.')
parser.add_argument('--grad_clip', type=float, default=5., help='clip gradients at an absolute value of.')
parser.add_argument('--alpha_c', type=float, default=1.,
help='regularization parameter for doubly stochastic attention, as in the paper.')
parser.add_argument('--fine_tune_encoder', type=bool, default=False, help='whether fine-tune encoder or not')
parser.add_argument('--fine_tune_embedding', type=bool, default=False, help='whether fine-tune word embeddings or not')
parser.add_argument('--checkpoint', default=None, help='path to checkpoint, None if none.')
parser.add_argument('--embedding_path', default=None, help='path to pre-trained word Embedding.')
args = parser.parse_args()
# load checkpoint, these parameters can't be modified
final_args = {"emb_dim": args.emb_dim,
"attention_dim": args.attention_dim,
"decoder_dim": args.decoder_dim,
"n_heads": args.n_heads,
"dropout": args.dropout,
"decoder_mode": args.decoder_mode,
"attention_method": args.attention_method,
"encoder_layers": args.encoder_layers,
"decoder_layers": args.decoder_layers}
start_epoch = 0
best_bleu4 = 0. # BLEU-4 score right now
epochs_since_improvement = 0 # keeps track of number of epochs since there's been an improvement in validation BLEU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # sets device for model and PyTorch tensors
cudnn.benchmark = True # set to true only if inputs to model are fixed size; otherwise lot of computational overhead
print(device)
# Read word map
word_map_file = os.path.join(args.data_folder, 'WORDMAP_' + args.data_name + '.json')
with open(word_map_file, 'r') as j:
word_map = json.load(j)
# Initialize / load checkpoint
if args.checkpoint is None:
encoder = CNN_Encoder(attention_method=args.attention_method)
encoder.fine_tune(args.fine_tune_encoder)
encoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()),
lr=args.encoder_lr) if args.fine_tune_encoder else None
if args.decoder_mode == "lstm":
decoder = DecoderWithAttention(attention_dim=args.attention_dim,
embed_dim=args.emb_dim,
decoder_dim=args.decoder_dim,
vocab_size=len(word_map),
dropout=args.dropout)
elif args.decoder_mode == "transformer":
decoder = Transformer(vocab_size=len(word_map), embed_dim=args.emb_dim, encoder_layers=args.encoder_layers,
decoder_layers=args.decoder_layers, dropout=args.dropout,
attention_method=args.attention_method, n_heads=args.n_heads)
decoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, decoder.parameters()),
lr=args.decoder_lr)
# load pre-trained word embedding
if args.embedding_path is not None:
all_word_embeds = {}
for i, line in enumerate(codecs.open(args.embedding_path, 'r', 'utf-8')):
s = line.strip().split()
all_word_embeds[s[0]] = np.array([float(i) for i in s[1:]])
# change emb_dim
args.emb_dim = list(all_word_embeds.values())[-1].size
word_embeds = np.random.uniform(-np.sqrt(0.06), np.sqrt(0.06), (len(word_map), args.emb_dim))
for w in word_map:
if w in all_word_embeds:
word_embeds[word_map[w]] = all_word_embeds[w]
elif w.lower() in all_word_embeds:
word_embeds[word_map[w]] = all_word_embeds[w.lower()]
else:
# <pad> <start> <end> <unk>
embedding_i = torch.ones(1, args.emb_dim)
torch.nn.init.xavier_uniform_(embedding_i)
word_embeds[word_map[w]] = embedding_i
word_embeds = torch.FloatTensor(word_embeds).to(device)
decoder.load_pretrained_embeddings(word_embeds)
decoder.fine_tune_embeddings(args.fine_tune_embedding)
print('Loaded {} pre-trained word embeddings.'.format(len(word_embeds)))
else:
checkpoint = torch.load(args.checkpoint, map_location=str(device))
start_epoch = checkpoint['epoch'] + 1
epochs_since_improvement = checkpoint['epochs_since_improvement']
best_bleu4 = checkpoint['metrics']["Bleu_4"]
encoder = checkpoint['encoder']
encoder_optimizer = checkpoint['encoder_optimizer']
decoder = checkpoint['decoder']
decoder_optimizer = checkpoint['decoder_optimizer']
decoder.fine_tune_embeddings(args.fine_tune_embedding)
# load final_args from checkpoint
final_args = checkpoint['final_args']
for key in final_args.keys():
args.__setattr__(key, final_args[key])
if args.fine_tune_encoder is True and encoder_optimizer is None:
print("Encoder_Optimizer is None, Creating new Encoder_Optimizer!")
encoder.fine_tune(args.fine_tune_encoder)
encoder_optimizer = torch.optim.Adam(params=filter(lambda p: p.requires_grad, encoder.parameters()),
lr=args.encoder_lr)
# Move to GPU, if available
decoder = decoder.to(device)
encoder = encoder.to(device)
print("encoder_layers {} decoder_layers {} n_heads {} dropout {} attention_method {} encoder_lr {} "
"decoder_lr {} alpha_c {}".format(args.encoder_layers, args.decoder_layers, args.n_heads, args.dropout,
args.attention_method, args.encoder_lr, args.decoder_lr, args.alpha_c))
print(encoder)
print(decoder)
# Loss function
criterion = nn.CrossEntropyLoss().to(device)
# Custom dataloaders
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
# normalize = transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
# pin_memory: If True, the data loader will copy Tensors into CUDA pinned memory before returning them.
# If your data elements are a custom type, or your collate_fn returns a batch that is a custom type.
train_loader = torch.utils.data.DataLoader(
CaptionDataset(args.data_folder, args.data_name, 'TRAIN', transform=transforms.Compose([normalize])),
batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
val_loader = torch.utils.data.DataLoader(
CaptionDataset(args.data_folder, args.data_name, 'VAL', transform=transforms.Compose([normalize])),
batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True)
# Epochs
for epoch in range(start_epoch, args.epochs):
# Decay learning rate if there is no improvement for 5 consecutive epochs, and terminate training after 25
# 8 20
if epochs_since_improvement == args.stop_criteria:
print("the model has not improved in the last {} epochs".format(args.stop_criteria))
break
if epochs_since_improvement > 0 and epochs_since_improvement % 5 == 0:
adjust_learning_rate(decoder_optimizer, 0.8)
if args.fine_tune_encoder and encoder_optimizer is not None:
print(encoder_optimizer)
adjust_learning_rate(encoder_optimizer, 0.8)
# One epoch's training
train(args, train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion,
encoder_optimizer=encoder_optimizer, decoder_optimizer=decoder_optimizer, epoch=epoch)
# One epoch's validation
metrics = validate(args, val_loader=val_loader, encoder=encoder, decoder=decoder, criterion=criterion)
recent_bleu4 = metrics["Bleu_4"]
# Check if there was an improvement
is_best = recent_bleu4 > best_bleu4
best_bleu4 = max(recent_bleu4, best_bleu4)
if not is_best:
epochs_since_improvement += 1
print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,))
else:
epochs_since_improvement = 0
# Save checkpoint
save_checkpoint(args.data_name, epoch, epochs_since_improvement, encoder, decoder, encoder_optimizer,
decoder_optimizer, metrics, is_best, final_args)
Traceback (most recent call last):
File D:\COCO\imge_captioning_transform_github\3\Image-Caption-master\train.py:394 in
train(args, train_loader=train_loader, encoder=encoder, decoder=decoder, criterion=criterion,
File D:\COCO\imge_captioning_transform_github\3\Image-Caption-master\train.py:44 in train
for i, (imgs, caps, caplens) in enumerate(train_loader):
File ~\anaconda3\envs\my_envir_gpu\lib\site-packages\torch\utils\data\dataloader.py:368 in iter
return self._get_iterator()
File ~\anaconda3\envs\my_envir_gpu\lib\site-packages\torch\utils\data\dataloader.py:314 in _get_iterator
return _MultiProcessingDataLoaderIter(self)
File ~\anaconda3\envs\my_envir_gpu\lib\site-packages\torch\utils\data\dataloader.py:927 in init
w.start()
File ~\anaconda3\envs\my_envir_gpu\lib\multiprocessing\process.py:121 in start
self._popen = self._Popen(self)
File ~\anaconda3\envs\my_envir_gpu\lib\multiprocessing\context.py:224 in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File ~\anaconda3\envs\my_envir_gpu\lib\multiprocessing\context.py:327 in _Popen
return Popen(process_obj)
File ~\anaconda3\envs\my_envir_gpu\lib\multiprocessing\popen_spawn_win32.py:93 in init
reduction.dump(process_obj, to_child)
File ~\anaconda3\envs\my_envir_gpu\lib\multiprocessing\reduction.py:60 in dump
ForkingPickler(file, protocol).dump(obj)
File ~\anaconda3\envs\my_envir_gpu\lib\site-packages\h5py_hl\base.py:368 in getnewargs
raise TypeError("h5py objects cannot be pickled")
TypeError: h5py objects cannot be pickled
2022-06-30 17:24:41.206091: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-06-30 17:24:41.525476: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3497 MB memory: -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6
2022-06-30 17:24:44.486920: W tensorflow/core/common_runtime/forward_type_inference.cc:231] Type inference failed. This indicates an invalid graph that escaped type checking. Error message: INVALID_ARGUMENT: expected compatible input types, but input 1:
type_id: TFT_OPTIONAL
args {
type_id: TFT_PRODUCT
args {
type_id: TFT_TENSOR
args {
type_id: TFT_LEGACY_VARIANT
}
}
}
is neither a subtype nor a supertype of the combined inputs preceding it:
type_id: TFT_OPTIONAL
args {
type_id: TFT_PRODUCT
args {
type_id: TFT_TENSOR
args {
type_id: TFT_INT32
}
}
}
while inferring type of node 'cond_40/output/_25'
2022-06-30 17:24:45.077383: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
Traceback (most recent call last):
File "", line 1, in
File "C:\Users\MSI\anaconda3\envs\my_envir_gpu\lib\multiprocessing\spawn.py", line 116, in spawn_main
exitcode = _main(fd, parent_sentinel)
File "C:\Users\MSI\anaconda3\envs\my_envir_gpu\lib\multiprocessing\spawn.py", line 126, in _main
self = reduction.pickle.load(from_parent)
EOFError: Ran out of input
I am using Python 3.9, PyTorch 1.10 with Cuda 11.3 (WINDOWS 10)
Thanks,
I am trying ( num_workers=0 ) , but still same error

Pytorch TypeError: scatter_add() takes from 2 to 5 positional arguments but 6 were given

Could someone please explain how to fix the situation where I take an example straight from the Pytorch documentation here:
import torch
from torch_geometric.datasets import TUDataset
from torch_geometric.data import Data, Dataset,DataLoader
dataset = TUDataset(root='data/TUDataset', name='MUTAG')
print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')
data = dataset[0] # Get the first graph object.
print()
print(data)
print('=============================================================')
# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
#print(f'Has isolated nodes: {data.has_isolated_nodes()}')
#print(f'Has self-loops: {data.has_self_loops()}')
#print(f'Is undirected: {data.is_undirected()}')
torch.manual_seed(12345)
dataset = dataset.shuffle()
train_dataset = dataset[:150]
test_dataset = dataset[150:]
print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
for step, data in enumerate(train_loader):
print(f'Step {step + 1}:')
print('=======')
print(f'Number of graphs in the current batch: {data.num_graphs}')
print(data)
print()
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool
class GCN(torch.nn.Module):
def __init__(self, hidden_channels):
super(GCN, self).__init__()
torch.manual_seed(12345)
self.conv1 = GCNConv(dataset.num_node_features, hidden_channels)
self.conv2 = GCNConv(hidden_channels, hidden_channels)
self.conv3 = GCNConv(hidden_channels, hidden_channels)
self.lin = Linear(hidden_channels, dataset.num_classes)
def forward(self, x, edge_index, batch):
# 1. Obtain node embeddings
x = self.conv1(x, edge_index)
x = x.relu()
x = self.conv2(x, edge_index)
x = x.relu()
x = self.conv3(x, edge_index)
# 2. Readout layer
x = global_mean_pool(x, batch) # [batch_size, hidden_channels]
# 3. Apply a final classifier
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin(x)
return x
model = GCN(hidden_channels=64)
print(model)
model = GCN(hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
def train():
model.train()
for data in train_loader: # Iterate in batches over the training dataset.
out = model(data.x, data.edge_index, data.batch) # Perform a single forward pass.
loss = criterion(out, data.y) # Compute the loss.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
optimizer.zero_grad() # Clear gradients.
def test(loader):
model.eval()
correct = 0
for data in loader: # Iterate in batches over the training/test dataset.
out = model(data.x, data.edge_index, data.batch)
pred = out.argmax(dim=1) # Use the class with highest probability.
correct += int((pred == data.y).sum()) # Check against ground-truth labels.
return correct / len(loader.dataset) # Derive ratio of correct predictions.
for epoch in range(1, 171):
train()
train_acc = test(train_loader)
test_acc = test(test_loader)
print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')
I get the error:
out = model(data.x, data.edge_index, data.batch) # Perform a single forward pass.
File "/root/miniconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "base_test.py", line 67, in forward
x = self.conv1(x, edge_index)
File "/root/miniconda3/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/torch_geometric/nn/conv/gcn_conv.py", line 103, in forward
return self.propagate(edge_index, x=x, norm=norm)
File "/root/miniconda3/lib/python3.7/site-packages/torch_geometric/nn/conv/message_passing.py", line 127, in propagate
out = scatter_(self.aggr, out, edge_index[i], dim, dim_size=size[i])
File "/root/miniconda3/lib/python3.7/site-packages/torch_geometric/utils/scatter.py", line 34, in scatter_
out = op(src, index, dim, None, dim_size, fill_value)
TypeError: scatter_add() takes from 2 to 5 positional arguments but 6 were given
I am using:
torch 1.11.0
torch-cluster 1.6.0
torch-geometric 1.3.2
torch-scatter 2.0.9
torch-sparse 0.6.13
torchmetrics 0.9.1
Just to mention, the answer was just to uninstall torch-scatter and then re-install the exact same version.

timm: AssertionError: Batch size should be even when using this

I am implementing simple example for creating a model from scratch using timm. My batch is even but still i am getting an error and I am following below tutorials
https://gist.github.com/Chris-hughes10/a9e5ec2cd7e7736c651bf89b5484b4a9
import argparse
from pathlib import Path
import timm
import timm.data
import timm.loss
import timm.optim
import timm.utils
import torch
import torchmetrics
from timm.scheduler import CosineLRScheduler
from pytorch_accelerated.callbacks import SaveBestModelCallback
from pytorch_accelerated.trainer import Trainer, DEFAULT_CALLBACKS
def create_datasets(image_size, data_mean, data_std, train_path, val_path):
train_transforms = timm.data.create_transform(
input_size=image_size,
is_training=True,
mean=data_mean,
std=data_std,
auto_augment="rand-m7-mstd0.5-inc1",
)
eval_transforms = timm.data.create_transform(
input_size=image_size, mean=data_mean, std=data_std
)
train_dataset = timm.data.dataset.ImageDataset(
train_path, transform=train_transforms
)
eval_dataset = timm.data.dataset.ImageDataset(val_path, transform=eval_transforms)
return train_dataset, eval_dataset
class TimmMixupTrainer(Trainer):
def __init__(self, eval_loss_fn, mixup_args, num_classes, *args, **kwargs):
super().__init__(*args, **kwargs)
self.eval_loss_fn = eval_loss_fn
self.num_updates = None
self.mixup_fn = timm.data.Mixup(**mixup_args)
self.accuracy = torchmetrics.Accuracy(num_classes=num_classes)
self.ema_accuracy = torchmetrics.Accuracy(num_classes=num_classes)
self.ema_model = None
def create_scheduler(self):
return timm.scheduler.CosineLRScheduler(
self.optimizer,
t_initial=self.run_config.num_epochs,
cycle_decay=0.5,
lr_min=1e-6,
t_in_epochs=True,
warmup_t=3,
warmup_lr_init=1e-4,
cycle_limit=1,
)
def training_run_start(self):
# Model EMA requires the model without a DDP wrapper and before sync batchnorm conversion
self.ema_model = timm.utils.ModelEmaV2(
self._accelerator.unwrap_model(self.model), decay=0.9
)
if self.run_config.is_distributed:
self.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(self.model)
def train_epoch_start(self):
super().train_epoch_start()
self.num_updates = self.run_history.current_epoch * len(self._train_dataloader)
def calculate_train_batch_loss(self, batch):
xb, yb = batch
mixup_xb, mixup_yb = self.mixup_fn(xb, yb)
return super().calculate_train_batch_loss((mixup_xb, mixup_yb))
def train_epoch_end(
self,
):
self.ema_model.update(self.model)
self.ema_model.eval()
if hasattr(self.optimizer, "sync_lookahead"):
self.optimizer.sync_lookahead()
def scheduler_step(self):
self.num_updates += 1
if self.scheduler is not None:
self.scheduler.step_update(num_updates=self.num_updates)
def calculate_eval_batch_loss(self, batch):
with torch.no_grad():
xb, yb = batch
outputs = self.model(xb)
val_loss = self.eval_loss_fn(outputs, yb)
self.accuracy.update(outputs.argmax(-1), yb)
ema_model_preds = self.ema_model.module(xb).argmax(-1)
self.ema_accuracy.update(ema_model_preds, yb)
return {"loss": val_loss, "model_outputs": outputs, "batch_size": xb.size(0)}
def eval_epoch_end(self):
super().eval_epoch_end()
if self.scheduler is not None:
self.scheduler.step(self.run_history.current_epoch + 1)
self.run_history.update_metric("accuracy", self.accuracy.compute().cpu())
self.run_history.update_metric(
"ema_model_accuracy", self.ema_accuracy.compute().cpu()
)
self.accuracy.reset()
self.ema_accuracy.reset()
def main(data_path):
# Set training arguments, hardcoded here for clarity
image_size = (224, 224)
lr = 5e-3
smoothing = 0.1
mixup = 0.2
cutmix = 1.0
batch_size = 32
bce_target_thresh = 0.2
num_epochs = 40
data_path = Path(data_path)
train_path = data_path / "train"
val_path = data_path / "val"
num_classes = len(list(train_path.iterdir()))
mixup_args = dict(
mixup_alpha=mixup,
cutmix_alpha=cutmix,
label_smoothing=smoothing,
num_classes=num_classes,
)
# Create model using timm
model = timm.create_model(
"resnet50d", pretrained=False, num_classes=num_classes, drop_path_rate=0.05
)
# Load data config associated with the model to use in data augmentation pipeline
data_config = timm.data.resolve_data_config({}, model=model, verbose=True)
data_mean = data_config["mean"]
data_std = data_config["std"]
# Create training and validation datasets
train_dataset, eval_dataset = create_datasets(
train_path=train_path,
val_path=val_path,
image_size=image_size,
data_mean=data_mean,
data_std=data_std,
)
# Create optimizer
optimizer = timm.optim.create_optimizer_v2(
model, opt="lookahead_AdamW", lr=lr, weight_decay=0.01
)
# As we are using Mixup, we can use BCE during training and CE for evaluation
train_loss_fn = timm.loss.BinaryCrossEntropy(
target_threshold=bce_target_thresh, smoothing=smoothing
)
validate_loss_fn = torch.nn.CrossEntropyLoss()
# Create trainer and start training
trainer = TimmMixupTrainer(
model=model,
optimizer=optimizer,
loss_func=train_loss_fn,
eval_loss_fn=validate_loss_fn,
mixup_args=mixup_args,
num_classes=num_classes,
callbacks=[
*DEFAULT_CALLBACKS,
SaveBestModelCallback(watch_metric="accuracy", greater_is_better=True),
],
)
trainer.train(
per_device_batch_size=batch_size,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
num_epochs=num_epochs,
create_scheduler_fn=trainer.create_scheduler,
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Simple example of training script using timm.")
parser.add_argument("--data_dir", required=True, help="The data folder on disk.")
args = parser.parse_args()
main(args.data_dir)
Traceback
100%|█████████▉| 333/334 [00:37<00:00, 9.09it/s]Traceback (most recent call last):
File "/home/cvpr/PycharmProjects/timm_tutorials/scratch_model.py", line 201, in <module>
main(args.data_dir)
File "/home/cvpr/PycharmProjects/timm_tutorials/scratch_model.py", line 188, in main
trainer.train(
File "/home/cvpr/anaconda3/envs/timm_tutorials/lib/python3.8/site-packages/pytorch_accelerated/trainer.py", line 437, in train
self._run_training()
File "/home/cvpr/anaconda3/envs/timm_tutorials/lib/python3.8/site-packages/pytorch_accelerated/trainer.py", line 641, in _run_training
self._run_train_epoch(self._train_dataloader)
File "/home/cvpr/anaconda3/envs/timm_tutorials/lib/python3.8/site-packages/pytorch_accelerated/trainer.py", line 704, in _run_train_epoch
batch_output = self.calculate_train_batch_loss(batch)
File "/home/cvpr/PycharmProjects/timm_tutorials/scratch_model.py", line 78, in calculate_train_batch_loss
mixup_xb, mixup_yb = self.mixup_fn(xb, yb)
File "/home/cvpr/anaconda3/envs/timm_tutorials/lib/python3.8/site-packages/timm/data/mixup.py", line 210, in __call__
assert len(x) % 2 == 0, 'Batch size should be even when using this'
AssertionError: Batch size should be even when using this
100%|█████████▉| 333/334 [00:37<00:00, 8.93it/s]
As you can see the model works perfectly until the last batch of the epoch. It is because for the final batch, the loader get the remaining images and put them together in this batch. Unfortunately this final batch seems to have odd size.

Getting ValueError without having Numpy arrays

I am using keras to build a model to caption images(basically give them a description).But when I am executing this, I get an error right before the training starts. I am using tensorflow_gpu(2.0) and the latest keras version.This is the error I get(A little shortened)==>
Epoch 1/1
Traceback (most recent call last):
File "C:\Users\neelg\Documents\Atom_projects\Main\Img_cap.py", line 165, in <module>
model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\keras\engine\training.py", line 1732, in fit_generator
initial_epoch=initial_epoch)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\keras\engine\training_generator.py", line 220, in fit_generator
reset_metrics=False)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\keras\engine\training.py", line 1514, in train_on_batch
outputs = self.train_function(ins)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\keras\backend.py", line 3734, in __call__
value = ops.convert_to_tensor(value, dtype=tensor.dtype)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1184, in convert_to_tensor
return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1242, in convert_to_tensor_v2
as_ref=False)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\ops.py", line 1296, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\tensor_conversion_registry.py", line 52, in _default_conversion_function
return constant_op.constant(value, dtype, name=name)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\constant_op.py", line 227, in constant
allow_broadcast=True)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\constant_op.py", line 235, in _constant_impl
t = convert_to_eager_tensor(value, ctx, dtype)
File "C:\Users\neelg\TF2_GPU\lib\site-packages\tensorflow_core\python\framework\constant_op.py", line 96, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: setting an array element with a sequence.
The main issue is that only one line of the code is addressed here, rest other lines are inside the Tensorflow libraries.
BTW The line addressed by the error is:-
model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
I think this may be a possible bug. If anybody has any requests of some extra information required, I can edit out the question. When googling, There are mentions that the 'Numpy array' is not structured or has a data_type not specified. However, My code contains no Numpy arrays so I am unsure How to proceed.
Thanx in advance
Here is the code(as requested by makis)Note that I am using a Flickr8k dataset which cannot be uploaded:=====>
#This an Image Captioning Model developed by Neel Gupta :)
# IMPORTS GOES HERE -----------------
#import tensorflow as tf # Even tho we don't need it, It activates CUDA Functionality
from numpy import array
from pickle import load
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Embedding
from keras.layers import Dropout
from keras.layers.merge import add
from keras.callbacks import ModelCheckpoint
from os import path
def load_doc(filename):
file = open(filename, 'r') #Opening the file.
text = file.read()
file.close()
return text
def load_set(filename):
doc = load_doc(filename) #Loading the document
dataset = list()
for line in doc.split('/n'): #Weeding out the empty lines
if len(line) < 1:
continue
identifier = line.split('.')[0]
dataset.append(identifier)
return set(dataset)
def load_clean_descriptions(filename, dataset):
doc = load_doc(filename)
descriptions = dict()
for line in doc.split('\n'):
# split line by white space
tokens = line.split()
# split id from description
image_id, image_desc = tokens[0], tokens[1:]
# skip images not in the set
if image_id in dataset:
# create list
if image_id not in descriptions:
descriptions[image_id] = list()
# wrap description in tokens
desc = '#Start# ' + ' '.join(image_desc) + ' #End#'
descriptions[image_id].append(desc)
return descriptions
def load_photo_features(filename, dataset):
all_features = load(open(filename, 'rb'))
features = {k: all_features[k] for k in dataset}
return features
# convert a dictionary of clean descriptions to a list of descriptions
def to_lines(descriptions):
all_desc = list()
for key in dict.keys(descriptions):
[all_desc.append(d) for d in descriptions[key]]
return all_desc
def create_tokenizer(descriptions):
lines = to_lines(descriptions) #Fitting tokenizer
tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
return tokenizer
def max_length(descriptions):
lines = to_lines(descriptions)
return max(len(d.split()) for d in lines)
def create_sequences(tokenizer, max_length, desc_list, photo, vocab_size):
X1, X2, y = list(), list(), list()
for desc in desc_list:
# encode the sequence
seq = tokenizer.texts_to_sequences([desc])[0]
# split one sequence into multiple X,y pairs
for i in range(1, len(seq)):
# split into input and output pair
in_seq, out_seq = seq[:i], seq[i]
# pad input sequence
in_seq = pad_sequences([in_seq], maxlen=None)[0] #Removed maxlen argument
# encode output sequence
out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
# store
X1.append(photo)
X2.append(in_seq)
y.append(out_seq)
return array(X1), array(X2), array(y)
def define_model(vocab_size, max_length):
# feature extractor model
inputs1 = Input(shape=(4096,))
fe1 = Dropout(0.5)(inputs1)
fe2 = Dense(256, activation='relu')(fe1)
# sequence model
inputs2 = Input(shape=(1,)) #remove shape
se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
se2 = Dropout(0.5)(se1)
se3 = LSTM(256)(se2)
# decoder model
decoder1 = add([fe2, se3])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)
# tie it together [image, seq] [word]
model = Model(inputs=[inputs1, inputs2], outputs=outputs)
# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam')
# summarize model
model.summary()
#Uncomment the line below to deactivate/activate a graph being constructed==>
#plot_model(model, to_file='model.png', show_shapes=True)
return model
def data_generator(descriptions, photos, tokenizer, max_length, vocab_size):
# loop for ever over images
#This function is for saving ur RAM from utter destruction
while 1:
for key, desc_list in descriptions.items():
# retrieve the photo feature
photo = photos[key][0]
in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo, vocab_size)
yield [[in_img, in_seq], out_word]
# HERE GOESETH THE IMPORTANT STUFF:-----------
filename = 'C:/Users/neelg/Documents/Atom_projects/Main/Flickr8k_text/Flickr_8k.trainImages.txt'
# Be sure to replace the file-name with ur own!!!
train = load_set(filename)
#print('Dataset:%d' % len (train))
train_descriptions = load_clean_descriptions('C:/Users/neelg/Documents/Atom_projects/Main/descriptions.txt', train) #File name of clean descriptions
#print('Descriptions: train=%d' % len(train_descriptions))
#photo features
train_features = load_photo_features('C:/Users/neelg/Documents/Atom_projects/Main/features.pkl', train)
#print('Photos: train=%d' % len(train_features))
print("Loaded photo features!")
#Setting up the Tokenizer--
tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index) + 1
#print('Vocabulary Size: %d' % vocab_size)
print('\n', "Created tokenizers")
#max_length = max_length(descriptions) #Getting the max_length
#THE MODEL
model = define_model(vocab_size, max_length)
print('\n', "model ready for some action!")
# train the model, run epochs manually and save after each epoch
epochs = 20
steps = len(train_descriptions)
# test the data generator
print("Giving the Data generator a workout :)")
generator = data_generator(train_descriptions, train_features, tokenizer, max_length, vocab_size)
inputs, outputs = next(generator)
print(inputs[0].shape)
print(inputs[1].shape)
print(outputs.shape)
for i in range(epochs):
generator = data_generator(train_descriptions, train_features, tokenizer, max_length, vocab_size)
model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
print('Starting the training.....')
# save model
model.save('model_' + str(i) + '.h5')
In create_sequences, X2 has elements of different shapes. When you try to convert it to a Numpy array here.
return array(X1), array(X2), array(y)
it throws
ValueError: setting an array element with a sequence
Just returning X2, without converting it to Numpy arrays, should fix it.

Categories

Resources