CUDA batch out of memory - python

I have a small dataset and running a script called LightXML which is on a git:https://github.com/kongds/LightXML
I am getting this error:
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 192.00 MiB (GPU 0; 6.00 GiB total capacity; 4.71 GiB already allocated; 0 bytes free; 4.82 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
I have found multiple posts stating that I should reduce the batch size but I can't seem to find it defined.
The script is halting at:
train_loss = model.one_epoch(epoch, trainloader, optimizer, mode='train',
eval_loader=validloader if args.valid else testloader,
eval_step=args.eval_step, log=LOG)
The one_epoch method is the following and the script is stalling at outputs = self(**inputs)
def one_epoch(self, epoch, dataloader, optimizer,
mode='train', eval_loader=None, eval_step=20000, log=None):
bar = tqdm.tqdm(total=len(dataloader))
p1, p3, p5 = 0, 0, 0
g_p1, g_p3, g_p5 = 0, 0, 0
total, acc1, acc3, acc5 = 0, 0, 0, 0
g_acc1, g_acc3, g_acc5 = 0, 0, 0
train_loss = 0
if mode == 'train':
self.train()
else:
self.eval()
if self.use_swa and epoch == self.swa_warmup_epoch and mode == 'train':
self.swa_init()
if self.use_swa and mode == 'eval':
self.swa_swap_params()
pred_scores, pred_labels = [], []
bar.set_description(f'{mode}-{epoch}')
with torch.set_grad_enabled(mode == 'train'):
for step, data in enumerate(dataloader):
batch = tuple(t for t in data)
have_group = len(batch) > 4
check_memory()
inputs = {'input_ids': batch[0].cuda(),
'attention_mask': batch[1].cuda(),
'token_type_ids': batch[2].cuda()}
if mode == 'train':
inputs['labels'] = batch[3].cuda()
if self.group_y is not None:
inputs['group_labels'] = batch[4].cuda()
inputs['candidates'] = batch[5].cuda()
print("------------------------------------------------------------")
outputs = self(**inputs)
bar.update(1)
if mode == 'train':
loss = outputs[1]
loss /= self.update_count
train_loss += loss.item()
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
if step % self.update_count == 0:
optimizer.step()
self.zero_grad()
if step % eval_step == 0 and eval_loader is not None and step != 0:
results = self.one_epoch(epoch, eval_loader, optimizer, mode='eval')
p1, p3, p5 = results[3:6]
g_p1, g_p3, g_p5 = results[:3]
if self.group_y is not None:
log.log(f'{epoch:>2} {step:>6}: {p1:.4f}, {p3:.4f}, {p5:.4f}'
f' {g_p1:.4f}, {g_p3:.4f}, {g_p5:.4f}')
else:
log.log(f'{epoch:>2} {step:>6}: {p1:.4f}, {p3:.4f}, {p5:.4f}')
# NOTE: we don't reset model to train mode and keep model in eval mode
# which means all dropout will be remove after `eval_step` in every epoch
# this tricks makes LightXML converge fast
# self.train()
if self.use_swa and step % self.swa_update_step == 0:
self.swa_step()
bar.set_postfix(loss=loss.item())
elif self.group_y is None:
logits = outputs
if mode == 'eval':
labels = batch[3]
_total, _acc1, _acc3, _acc5 = self.get_accuracy(None, logits, labels.cpu().numpy())
total += _total; acc1 += _acc1; acc3 += _acc3; acc5 += _acc5
p1 = acc1 / total
p3 = acc3 / total / 3
p5 = acc5 / total / 5
bar.set_postfix(p1=p1, p3=p3, p5=p5)
elif mode == 'test':
pred_scores.append(logits.detach().cpu())
else:
group_logits, candidates, logits = outputs
if mode == 'eval':
labels = batch[3]
group_labels = batch[4]
_total, _acc1, _acc3, _acc5 = self.get_accuracy(candidates, logits, labels.cpu().numpy())
total += _total; acc1 += _acc1; acc3 += _acc3; acc5 += _acc5
p1 = acc1 / total
p3 = acc3 / total / 3
p5 = acc5 / total / 5
_, _g_acc1, _g_acc3, _g_acc5 = self.get_accuracy(None, group_logits, group_labels.cpu().numpy())
g_acc1 += _g_acc1; g_acc3 += _g_acc3; g_acc5 += _g_acc5
g_p1 = g_acc1 / total
g_p3 = g_acc3 / total / 3
g_p5 = g_acc5 / total / 5
bar.set_postfix(p1=p1, p3=p3, p5=p5, g_p1=g_p1, g_p3=g_p3, g_p5=g_p5)
elif mode == 'test':
_scores, _indices = torch.topk(logits.detach().cpu(), k=100)
_labels = torch.stack([candidates[i][_indices[i]] for i in range(_indices.shape[0])], dim=0)
pred_scores.append(_scores.cpu())
pred_labels.append(_labels.cpu())
if self.use_swa and mode == 'eval':
self.swa_swap_params()
bar.close()
if mode == 'eval':
return g_p1, g_p3, g_p5, p1, p3, p5
elif mode == 'test':
return torch.cat(pred_scores, dim=0).numpy(), torch.cat(pred_labels, dim=0).numpy() if len(pred_labels) != 0 else None
elif mode == 'train':
return train_loss
I am also including the forward function which is stopping at outs=self.bert()
def forward(self, input_ids, attention_mask, token_type_ids,
labels=None, group_labels=None, candidates=None):
is_training = labels is not None
outs = self.bert(
input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids
)[-1]
out = torch.cat([outs[-i][:, 0] for i in range(1, self.feature_layers+1)], dim=-1)
out = self.drop_out(out)
group_logits = self.l0(out)
if self.group_y is None:
logits = group_logits
if is_training:
loss_fn = torch.nn.BCEWithLogitsLoss()
loss = loss_fn(logits, labels)
return logits, loss
else:
return logits
if is_training:
l = labels.to(dtype=torch.bool)
target_candidates = torch.masked_select(candidates, l).detach().cpu()
target_candidates_num = l.sum(dim=1).detach().cpu()
groups, candidates, group_candidates_scores = self.get_candidates(group_logits,
group_gd=group_labels if is_training else None)
if is_training:
bs = 0
new_labels = []
for i, n in enumerate(target_candidates_num.numpy()):
be = bs + n
c = set(target_candidates[bs: be].numpy())
c2 = candidates[i]
new_labels.append(torch.tensor([1.0 if i in c else 0.0 for i in c2 ]))
if len(c) != new_labels[-1].sum():
s_c2 = set(c2)
for cc in list(c):
if cc in s_c2:
continue
for j in range(new_labels[-1].shape[0]):
if new_labels[-1][j].item() != 1:
c2[j] = cc
new_labels[-1][j] = 1.0
break
bs = be
labels = torch.stack(new_labels).cuda()
candidates, group_candidates_scores = torch.LongTensor(candidates).cuda(), torch.Tensor(group_candidates_scores).cuda()
emb = self.l1(out)
embed_weights = self.embed(candidates) # N, sampled_size, H
emb = emb.unsqueeze(-1)
logits = torch.bmm(embed_weights, emb).squeeze(-1)
if is_training:
loss_fn = torch.nn.BCEWithLogitsLoss()
loss = loss_fn(logits, labels) + loss_fn(group_logits, group_labels)
return logits, loss
else:
candidates_scores = torch.sigmoid(logits)
candidates_scores = candidates_scores * group_candidates_scores
return group_logits, candidates, candidates_scores

Yes, probably the problem is in the batch_size. Usually batch_size is defined in the DataLoader. In the main.py you can see:
trainloader = DataLoader(train_d, batch_size=args.batch, num_workers=5,
shuffle=True)
You can define it at the moment of the script running - args.batch.

Related

How to resolve the problem in google collab?

def cw_l2_attack(model, images, labels, targeted=False, c=1e-4, kappa=0, max_iter=1000, learning_rate=0.01) :
images = images.to(device)
labels = labels.to(device)
Define f-function
def f(x) :
outputs = model(x)
one_hot_labels = torch.eye(len(outputs[0]))[labels].to(device)
i, _ = torch.max((1-one_hot_labels)*outputs, dim=1)
j = torch.masked_select(outputs, one_hot_labels.byte())
If targeted, optimize for making the other class most likely
if targeted :
return torch.clamp(i-j, min=-kappa)
If untargeted, optimize for making the other class most likely
else :
return torch.clamp(j-i, min=-kappa)
w = torch.zeros_like(images, requires_grad=True).to(device)
optimizer = optim.Adam([w], lr=learning_rate)
prev = 1e10
for step in range(max_iter) :
a = 1/2*(nn.Tanh()(w) + 1)
loss1 = nn.MSELoss(reduction='sum')(a, images)
loss2 = torch.sum(c*f(a))
cost = loss1 + loss2
optimizer.zero_grad()
cost.backward()
optimizer.step()
Early Stop when loss does not converge.
if step % (max_iter//10) == 0 :
if cost > prev :
print('Attack Stopped due to CONVERGENCE....')
return a
prev = cost
print('- Learning Progress : %2.2f %% ' %((step+1)/max_iter*100), end='\r')
attack_images = 1/2*(nn.Tanh()(w) + 1)
return attack_images
print("Attack Image & Predicted Label")
model.eval()
correct = 0
total = 0
for images, labels in normal_loader:
images = cw_l2_attack(model, images, labels, targeted=False, c=0.1)
labels = labels.to(device)
outputs = model(images)
pre = torch.max(outputs.data, 1)
total += 1
correct += (pre == labels).sum()
imshow(torchvision.utils.make_grid(images.cpu().data, normalize=True), [normal_data.classes[i] for i in pre])
print('Accuracy of test text: %f %%' % (100 * float(correct) / total))
RuntimeError: indices should be either on cpu or on the same device as the indexed tensor (cpu)

for loop sending wrong data to list

Below is the code, i am running a for loop to train on different training sizes. The first loop works correctly, where when training begins, the training and validation accuracy are sent to a list, then a frame then finally a csv. But on the subsequent loops, a data generator is sent to the list. Can anyone see where the issue is, because I cant find it.
Also if you have a better way of doing this (data compiling for analysis), I'm all ears.
The first block is the code snippet, the second block is the full code. The for loop starts about halfway down.
for i in range(1,6):
training_loader, validation_loader, training_ones, training_zeros, validation_ones, validation_zeros = switcher().sets(case)
train_accuracy = []
val_accuracy = []
start_time = time.time()
for epoch in tqdm(range(1, epochs + 1), total=epochs):
train()
train_acc = test(training_loader)
train_accuracy.append(train_acc)
val_acc = test(validation_loader)
val_accuracy.append(val_acc)
accuracy = pd.DataFrame()
accuracy['train_acc'] = train_accuracy
accuracy['val_acc'] = val_accuracy
accuracy.to_csv(f'C:\\Users\\Anthony Sirico\\Documents\\GitHub\\PyGeo_Circuit_exp\\PyGeo_Circuit_exp\\imbalance_exp\\csv files\\accuracy_{i}.csv')
import sys
sys.path.insert(0, 'C:\\Users\\user\\Desktop\\imbalance_exp\\imbalance_exp\\imbalance_exp')
import torch
from torch_geometric.loader import DataLoader
import imb_dataset as imb
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GraphConv
from torch_geometric.nn import global_mean_pool
import neptune.new as neptune
import pandas as pd
from sklearn.metrics import confusion_matrix, matthews_corrcoef
import seaborn as sns
from neptune.new.types import File
from tqdm import tqdm
import time
known = imb.ImbalanceDataset(root='imb_50v2', set='known', split=0.5)
unknown = imb.ImbalanceDataset(root='imb_50v2', set='unknown', split=0.5)
all_data = imb.ImbalanceDataset(root='imb_50v2', set='All', split=None)
torch.manual_seed(12345)
known = known.shuffle()
lr = 0.001
training_perc = 0.9
N = len(known)
mini_batch_size = 32
epochs = 600
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
case = 2
class switcher:
def sets(self, case):
default = known
return getattr(self, 'case_' + str(case), lambda: default)()
def case_1(self):
training_set = known[:int(training_perc*len(known))]
validation_set = known[int(training_perc*len(known)):]
training_loader = DataLoader(training_set, batch_size=mini_batch_size, shuffle=True)
validation_loader = DataLoader(validation_set, batch_size=mini_batch_size, shuffle=False)
training_ones = []
training_zeros = []
validation_ones = []
validation_zeros = []
for i in range(len(training_set)):
if training_set[i].y == 1:
training_ones.append(training_set[i])
else:
training_zeros.append(training_set[i])
for i in range(len(validation_set)):
if validation_set[i].y == 1:
validation_ones.append(validation_set[i])
else:
validation_zeros.append(validation_set[i])
return training_loader, validation_loader, training_ones, training_zeros, validation_ones, validation_zeros
def case_2(self):
one_index = round(len(known) * 0.25)
known_ones = known[:one_index].copy()
known_ones.shuffle()
known_zeros = known[one_index:].copy()
known_zeros.shuffle()
training_ones = known_ones[:int(training_perc*len(known_ones))]
training_zeros = known_zeros[:len(training_ones)]
training_set = torch.utils.data.ConcatDataset([training_ones, training_zeros])
validation_ones = known_ones[int(training_perc*len(known_ones)):]
validation_zeros = known_zeros[len(training_ones):]
validation_set = torch.utils.data.ConcatDataset([validation_ones, validation_zeros])
training_loader = DataLoader(training_set, batch_size=mini_batch_size, shuffle=True)
validation_loader = DataLoader(validation_set, batch_size=mini_batch_size, shuffle=False)
training_ones = []
training_zeros = []
validation_ones = []
validation_zeros = []
for i in range(len(training_set)):
if training_set[i].y == 1:
training_ones.append(training_set[i])
else:
training_zeros.append(training_set[i])
for i in range(len(validation_set)):
if validation_set[i].y == 1:
validation_ones.append(validation_set[i])
else:
validation_zeros.append(validation_set[i])
return training_loader, validation_loader, training_ones, training_zeros, validation_ones, validation_zeros
class GCN(torch.nn.Module):
def __init__(self, hidden_channels):
super(GCN, self).__init__()
torch.manual_seed(12345)
self.conv1 = GraphConv(known.num_node_features, hidden_channels)
self.conv2 = GraphConv(hidden_channels, hidden_channels)
self.conv3 = GraphConv(hidden_channels, hidden_channels)
self.lin = Linear(hidden_channels, known.num_classes)
def forward(self, x, edge_index, batch):
# 1. Obtain node embeddings
x = self.conv1(x, edge_index)
x = x.relu()
x = self.conv2(x, edge_index)
x = x.relu()
x = self.conv3(x, edge_index)
# 2. Readout layer
x = global_mean_pool(x, batch) # [batch_size, hidden_channels]
# 3. Apply a final classifier
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin(x)
return x
model = GCN(hidden_channels=64).to(device)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
def train():
model.train()
total_loss = 0
for data in training_loader: # Iterate in batches over the training dataset.
data = data.to(device)
out = model(data.x, data.edge_index, data.batch) # Perform a single forward pass.
loss = criterion(out, data.y) # Compute the loss solely based on the training nodes.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
optimizer.zero_grad() # Clear gradients.
def test(loader):
model.eval()
correct = 0
for data in loader: # Iterate in batches over the training/test dataset.
data = data.to(device)
out = model(data.x, data.edge_index, data.batch)
pred = out.argmax(dim=1) # Use the class with highest probability.
correct += int((pred == data.y).sum()) # Check against ground-truth labels.
return correct / len(loader.dataset) # Derive ratio of correct predictions.
output_frame = pd.DataFrame(columns=['epoch', 'lr', 'known', 'unknown', 'train_ones', 'train_zeros', 'val_ones', 'val_zeros', 'tn_all', 'fp_all', 'fn_all', 'tp_all', 'tn_known', 'fp_known', 'fn_known', 'tp_known', 'precision_all', 'recall_all', 'f1_all', 'accuracy_all', 'mcc_all', 'precision_known', 'recall_known', 'f1_known', 'accuracy_known', 'mcc_known', 'time_elapsed'])
for i in range(1,6):
training_loader, validation_loader, training_ones, training_zeros, validation_ones, validation_zeros = switcher().sets(case)
train_accuracy = []
val_accuracy = []
start_time = time.time()
for epoch in tqdm(range(1, epochs + 1), total=epochs):
train()
train_acc = test(training_loader)
train_accuracy.append(train_acc)
val_acc = test(validation_loader)
val_accuracy.append(val_acc)
accuracy = pd.DataFrame()
accuracy['train_acc'] = train_accuracy
accuracy['val_acc'] = val_accuracy
accuracy.to_csv(f'C:\\Users\\Anthony Sirico\\Documents\\GitHub\\PyGeo_Circuit_exp\\PyGeo_Circuit_exp\\imbalance_exp\\csv files\\accuracy_{i}.csv')
unknown_loader = DataLoader(unknown, batch_size=1, shuffle=False)
predictions = []
all_correct = 0
known_correct = 0
for test in unknown_loader:
test = test.to(device)
out = model(test.x, test.edge_index, test.batch)
pred = out.argmax(dim=1)
predictions.append(pred)
all_correct += int((pred == test.y_all).sum())
known_correct += int((pred == test.y_known).sum())
pred_df = pd.DataFrame()
pred_df['y_all_true'] = [i.item() for i in unknown.data.y_all]
pred_df['y_known_true'] = [i.item() for i in unknown.data.y_known]
pred_df['y_pred'] = [i.item() for i in predictions]
pred_df.to_csv(f'C:\\Users\\Anthony Sirico\\Documents\\GitHub\\PyGeo_Circuit_exp\\PyGeo_Circuit_exp\\imbalance_exp\\csv files\\pred_df_{i}.csv')
cf_matrix_all = confusion_matrix(pred_df['y_all_true'], pred_df['y_pred'])
ax = sns.heatmap(cf_matrix_all, annot=True, fmt='g', cmap='Blues')
ax.title.set_text('Confusion Matrix based on all data')
tn_all, fp_all, fn_all, tp_all = cf_matrix_all.ravel()
end_time = time.time()
time_elapsed = end_time - start_time
precision_all = tp_all / (tp_all + fp_all)
recall_all = tp_all / (tp_all + fn_all)
f1_all = 2 * (precision_all * recall_all) / (precision_all + recall_all)
accuracy_all = (tp_all + tn_all) / (tp_all + tn_all + fp_all + fn_all)
mcc_all = matthews_corrcoef(pred_df['y_all_true'], pred_df['y_pred'])
cf_matrix_known = confusion_matrix(pred_df['y_known_true'], pred_df['y_pred'])
ax = sns.heatmap(cf_matrix_known, annot=True, fmt='g', cmap='Blues')
ax.title.set_text('Confusion Matrix based on known data')
tn_known, fp_known, fn_known, tp_known = cf_matrix_known.ravel()
precision_known = tp_known / (tp_known + fp_known)
recall_known = tp_known / (tp_known + fn_known)
f1_known = 2 * (precision_known * recall_known) / (precision_known + recall_known)
accuracy_known = (tp_known + tn_known) / (tp_known + tn_known + fp_known + fn_known)
mcc_known = matthews_corrcoef(pred_df['y_known_true'], pred_df['y_pred'])
#'epoch', 'lr', 'known', 'unknown', 'train_ones', 'train_zeros', 'val_ones', 'val_zeros', 'tn_all', 'fp_all', 'fn_all', 'tp_all', 'tn_known', 'fp_known', 'fn_known', 'tp_known
output_frame.loc[i] = [epochs, lr, len(known), len(unknown), len(training_ones), len(training_zeros), len(validation_ones), len(validation_zeros), tn_all, fp_all, fn_all, tp_all, tn_known, fp_known, fn_known, tp_known, precision_all, recall_all, f1_all, accuracy_all, mcc_all, precision_known, recall_known, f1_known, accuracy_known, mcc_known, time_elapsed]
output_frame.to_csv('C:\\Users\\Anthony Sirico\\Documents\\GitHub\\PyGeo_Circuit_exp\\PyGeo_Circuit_exp\\imbalance_exp\\csv files\\final_output.csv')
training_perc -= 0.2

Can keeping channels more than '3' in images crash CNNs?

I have an encoder model which was working fine with single channel 1024,1024 images, I'm trying to patch the original images (mega pixel images) to 256, 256, 64 images. I've changed my encoder input to match the images input that the model will get. The model call function is working fine, loss is getting calculated fine, but I'm getting the following error with tape.gradient:
2023-01-29 17:11:01.868555: F tensorflow/stream_executor/cuda/cuda_dnn.cc:593] Check failed: cudnnSetTensorNdDescriptor(handle_.get(), elem_type, nd, dims.data(), strides.data()) == CUDNN_STATUS_SUCCESS (3 vs. 0)batch_descriptor: {count: 10 feature_map_count: 64 spatial: 0 0 value_min: 0.000000 value_max: 0.000000 layout: BatchYXDepth} C:\ProgramData\Anaconda3\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py:318: UserWarning: resource_tracker: There appear to be 2 leaked folder objects to clean up at shutdown warnings.warn('resource_tracker: There appear to be %d ' C:\ProgramData\Anaconda3\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py:333: UserWarning: resource_tracker: C:\Users\kjhan\AppData\Local\Temp\joblib_memmapping_folder_12248_772bbeeeccff43089fa0e6d75271eebd_97f2f7c6edd04b468a4360bf96b91b84: FileNotFoundError(2, 'The system cannot find the path specified') warnings.warn('resource_tracker: %s: %r' % (name, e)) C:\ProgramData\Anaconda3\lib\site-packages\joblib\externals\loky\backend\resource_tracker.py:333: UserWarning: resource_tracker: C:\Users\kjhan\AppData\Local\Temp\joblib_memmapping_folder_12248_29db2f1e8ff54416b9a78c6f69dcff23_40a85063390f46d38d15c1877f99acc8: FileNotFoundError(2, 'The system cannot find the path specified') warnings.warn('resource_tracker: %s: %r' % (name, e)) [I 17:11:10.131 NotebookApp] KernelRestarter: restarting kernel (1/5), keep random ports kernel 286d1cc6-8ddd-46f9-baf7-5e1b05a2d033 restarted
My code is as below
class encoder(tf.keras.layers.Layer):
def __init__(self,size:tuple):
super(encoder, self).__init__()
#encoder Module
self.input_cnn = keras.layers.InputLayer(input_shape=(size[0],size[1],size[2]))
# Ex0panding features for computation
self.conv_1 = keras.layers.Conv2D(input_shape=(size[0],size[1],size[2]),filters=16,kernel_size=(3,3),padding='same',activation='relu')
# 1/4 size reduction
self.conv_2 = keras.layers.MaxPool2D(pool_size=(2,2),strides=(2,2))
self.conv_3 = keras.layers.Conv2D(filters = 16,kernel_size=(4,4),strides=(2,2),padding='same',activation='relu')
self.conv_4 = keras.layers.Conv2D(filters = 32,kernel_size=(4,4),strides=(4,4),padding='same',activation='relu')
self.conv_5 = keras.layers.BatchNormalization()
# 1/2 size reduction
self.conv_6 = keras.layers.MaxPool2D(pool_size=(2,2),strides=(2,2))
# 3x3 Filter third application
self.conv_7 = keras.layers.Conv2D(filters = 64,kernel_size=(8,8),strides=(8,8),padding='same',activation='relu')
# 1/4 size reduction
self.conv_8 = keras.layers.MaxPool2D(pool_size=(2,2),strides=(2,2))
# 3x3 Filter third application
self.conv_9 = keras.layers.BatchNormalization()
self.conv_10 = keras.layers.Conv2D(filters = 1 ,kernel_size=(3,3),strides=(1,1),padding='same',activation='relu')
def call(self,inputs,training = True):
x = self.input_cnn(inputs)
x = self.conv_1(x)
x = self.conv_2(x)
x = self.conv_3(x)
x = self.conv_4(x)
if training == True:
x = self.conv_5(x,training = True)
else:
x = self.conv_5(x,training = False)
x = self.conv_6(x)
x = self.conv_7(x)
x = self.conv_8(x)
if training == True:
x = self.conv_9(x,training = True)
else:
x = self.conv_9(x,training = False)
x = self.conv_10(x)
return x
size 0 is 256
size 1 is 256
size 2 is 64
Train_step from main model:
def __init__(self, size: tuple, optimizer = keras.optimizers.Adam(learning_rate=1e-3),loss_fn = keras.losses.BinaryCrossentropy(from_logits=False),metric = tf.keras.metrics.Accuracy()):
super(BCDClassifier, self).__init__()
#Input for catagorical data
self.input_cat = keras.layers.InputLayer(input_shape = (2,))
#Encoder Layer for each view
self.encode = encoder(size)
#flatten encoded output
self.flatten = keras.layers.Flatten()
#Concatenate Layer
self.concat = keras.layers.Concatenate(axis = 1)
#Classifier layer
self.classify = classifier(32)
#deffine model parameters
self.optimizer = optimizer
self.loss_fn = loss_fn
self.loss_tracker = keras.metrics.Mean(name="loss")
self.acc_tracker = metric
self.f1_tracker = tfa.metrics.F1Score(num_classes=2, threshold=0.5, average = 'micro')
self.sk_metric_acc = accuracy_score
self.sk_metric_f1 = f1_score
self.acc_history = []
self.loss_history = []
self.f1_history = []
# Forward pass of model - order does matter.
def call(self, cat_batch, view_batch, images_batch, training = True):
x1 = self.encode(images_batch,training)
x2 = self.input_cat(cat_batch)
x1 = self.flatten(x1)
x12 = self.concat([x1,x2])
x12 = self.classify(x12)
return x12
def train_step(self,cat_batch, views_batch, images_batch, target_batch, training = True):
with tf.GradientTape() as tape:
logits = self(cat_batch, views_batch, images_batch,training)
loss_value = self.loss_fn(target_batch, logits)
grads = tape.gradient(loss_value, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
self.loss_tracker.update_state(loss_value)
pred = []
target = []
threshold = 0.5
for val in logits.numpy():
if isinstance(val,np.ndarray):
for v_1 in val:
if isinstance(v_1,np.ndarray):
for v_2 in v_1:
if v_2 > threshold:
pred.append(1.0)
else:
pred.append(0.0)
else:
if v_1 > threshold:
pred.append(1.0)
else:
pred.append(0.0)
else:
if val > threshold:
pred.append(1.0)
else:
pred.append(0.0)
for val in target_batch:
if isinstance(val,np.ndarray):
for v_1 in val:
if isinstance(v_1,np.ndarray):
for v_2 in v_1:
target.append(v_2)
else:
target.append(v_1)
else:
target.append(val)
acc = self.sk_metric_acc(target,pred)
f1 = self.sk_metric_f1(target,pred)
self.f1_tracker.update_state(target_batch,logits)
return {"Loss": self.loss_tracker.result(), "Accuracy": acc, 'F1-score':f1}

How can I save DDPG model?

I try to save the model using the saver method (I use the save function in the DDPG class to save), but when restoring the model, the result is far from the one I saved (I save the model when the episodic award is zero, the restor method in the code is commented out ) My code is below with all the features. I use Python 3.7, gym 0.16.0 and TensorFlow version 1.13.1
import tensorflow as tf
import numpy as np
import gym
epsiode_steps = 500
# learning rate for actor
lr_a = 0.001
# learning rate for critic
lr_c = 0.002
gamma = 0.9
alpha = 0.01
memory = 10000
batch_size = 32
render = True
class DDPG(object):
def __init__(self, no_of_actions, no_of_states, a_bound, ):
self.memory = np.zeros((memory, no_of_states * 2 + no_of_actions + 1), dtype=np.float32)
# initialize pointer to point to our experience buffer
self.pointer = 0
self.sess = tf.Session()
# initialize the variance for OU process for exploring policies
self.noise_variance = 3.0
self.no_of_actions, self.no_of_states, self.a_bound = no_of_actions, no_of_states, a_bound,
self.state = tf.placeholder(tf.float32, [None, no_of_states], 's')
self.next_state = tf.placeholder(tf.float32, [None, no_of_states], 's_')
self.reward = tf.placeholder(tf.float32, [None, 1], 'r')
with tf.variable_scope('Actor'):
self.a = self.build_actor_network(self.state, scope='eval', trainable=True)
a_ = self.build_actor_network(self.next_state, scope='target', trainable=False)
with tf.variable_scope('Critic'):
q = self.build_crtic_network(self.state, self.a, scope='eval', trainable=True)
q_ = self.build_crtic_network(self.next_state, a_, scope='target', trainable=False)
self.ae_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/eval')
self.at_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/target')
self.ce_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/eval')
self.ct_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/target')
# update target value
self.soft_replace = [
[tf.assign(at, (1 - alpha) * at + alpha * ae), tf.assign(ct, (1 - alpha) * ct + alpha * ce)]
for at, ae, ct, ce in zip(self.at_params, self.ae_params, self.ct_params, self.ce_params)]
q_target = self.reward + gamma * q_
# compute TD error i.e actual - predicted values
td_error = tf.losses.mean_squared_error(labels=(self.reward + gamma * q_), predictions=q)
# train the critic network with adam optimizer
self.ctrain = tf.train.AdamOptimizer(lr_c).minimize(td_error, name="adam-ink", var_list=self.ce_params)
a_loss = - tf.reduce_mean(q)
# train the actor network with adam optimizer for minimizing the loss
self.atrain = tf.train.AdamOptimizer(lr_a).minimize(a_loss, var_list=self.ae_params)
tf.summary.FileWriter("logs2", self.sess.graph)
# initialize all variables
self.sess.run(tf.global_variables_initializer())
# saver
self.saver = tf.train.Saver()
# self.saver.restore(self.sess, "Pendulum/nn.ckpt")
def choose_action(self, s):
a = self.sess.run(self.a, {self.state: s[np.newaxis, :]})[0]
a = np.clip(np.random.normal(a, self.noise_variance), -2, 2)
return a
def learn(self):
# soft target replacement
self.sess.run(self.soft_replace)
indices = np.random.choice(memory, size=batch_size)
batch_transition = self.memory[indices, :]
batch_states = batch_transition[:, :self.no_of_states]
batch_actions = batch_transition[:, self.no_of_states: self.no_of_states + self.no_of_actions]
batch_rewards = batch_transition[:, -self.no_of_states - 1: -self.no_of_states]
batch_next_state = batch_transition[:, -self.no_of_states:]
self.sess.run(self.atrain, {self.state: batch_states})
self.sess.run(self.ctrain, {self.state: batch_states, self.a: batch_actions, self.reward: batch_rewards,
self.next_state: batch_next_state})
def store_transition(self, s, a, r, s_):
trans = np.hstack((s, a, [r], s_))
index = self.pointer % memory
self.memory[index, :] = trans
self.pointer += 1
if self.pointer > memory:
self.noise_variance *= 0.99995
self.learn()
def build_actor_network(self, s, scope, trainable):
# Actor DPG
with tf.variable_scope(scope):
l1 = tf.layers.dense(s, 30, activation=tf.nn.tanh, name='l1', trainable=trainable)
a = tf.layers.dense(l1, self.no_of_actions, activation=tf.nn.tanh, name='a', trainable=trainable)
return tf.multiply(a, self.a_bound, name="scaled_a")
def build_crtic_network(self, s, a, scope, trainable):
with tf.variable_scope(scope):
n_l1 = 30
w1_s = tf.get_variable('w1_s', [self.no_of_states, n_l1], trainable=trainable)
w1_a = tf.get_variable('w1_a', [self.no_of_actions, n_l1], trainable=trainable)
b1 = tf.get_variable('b1', [1, n_l1], trainable=trainable)
net = tf.nn.tanh(tf.matmul(s, w1_s) + tf.matmul(a, w1_a) + b1)
q = tf.layers.dense(net, 1, trainable=trainable)
return q
def save(self):
self.saver.save(self.sess, "Pendulum/nn.ckpt")
env = gym.make("Pendulum-v0")
env = env.unwrapped
env.seed(1)
no_of_states = env.observation_space.shape[0]
no_of_actions = env.action_space.shape[0]
a_bound = env.action_space.high
ddpg = DDPG(no_of_actions, no_of_states, a_bound)
total_reward = []
# set the number of episodes
no_of_episodes = 300
for i in range(no_of_episodes):
# initialize the environment
s = env.reset()
ep_reward = 0
for j in range(epsiode_steps):
env.render()
# select action by adding noise through OU process
a = ddpg.choose_action(s)
# peform the action and move to the next state s
s_, r, done, info = env.step(a)
# store the the transition to our experience buffer
# sample some minibatch of experience and train the network
ddpg.store_transition(s, a, r, s_)
# update current state as next state
s = s_
# add episodic rewards
ep_reward += r
if int(ep_reward) == 0 and i > 150:
ddpg.save()
print("save")
quit()
if j == epsiode_steps - 1:
total_reward.append(ep_reward)
print('Episode:', i, ' Reward: %i' % int(ep_reward))
break
I solved this problem completely by rewriting the code and adding the learning function in a separate session

Cartpole-v0 loss increasing using DQN

Hi I'm trying to train a DQN to solve gym's Cartpole problem.
For some reason the Loss looks like this (orange line). Can y'all take a look at my code and help with this? I've played around with the hyperparameters a decent bit so I don't think they're the issue here.
class DQN(nn.Module):
def __init__(self, input_dim, output_dim):
super(DQN, self).__init__()
self.linear1 = nn.Linear(input_dim, 16)
self.linear2 = nn.Linear(16, 32)
self.linear3 = nn.Linear(32, 32)
self.linear4 = nn.Linear(32, output_dim)
def forward(self, x):
x = F.relu(self.linear1(x))
x = F.relu(self.linear2(x))
x = F.relu(self.linear3(x))
return self.linear4(x)
final_epsilon = 0.05
initial_epsilon = 1
epsilon_decay = 5000
global steps_done
steps_done = 0
def select_action(state):
global steps_done
sample = random.random()
eps_threshold = final_epsilon + (initial_epsilon - final_epsilon) * \
math.exp(-1. * steps_done / epsilon_decay)
if sample > eps_threshold:
with torch.no_grad():
state = torch.Tensor(state)
steps_done += 1
q_calc = model(state)
node_activated = int(torch.argmax(q_calc))
return node_activated
else:
node_activated = random.randint(0,1)
steps_done += 1
return node_activated
class ReplayMemory(object): # Stores [state, reward, action, next_state, done]
def __init__(self, capacity):
self.capacity = capacity
self.memory = [[],[],[],[],[]]
def push(self, data):
"""Saves a transition."""
for idx, point in enumerate(data):
#print("Col {} appended {}".format(idx, point))
self.memory[idx].append(point)
def sample(self, batch_size):
rows = random.sample(range(0, len(self.memory[0])), batch_size)
experiences = [[],[],[],[],[]]
for row in rows:
for col in range(5):
experiences[col].append(self.memory[col][row])
return experiences
def __len__(self):
return len(self.memory[0])
input_dim, output_dim = 4, 2
model = DQN(input_dim, output_dim)
target_net = DQN(input_dim, output_dim)
target_net.load_state_dict(model.state_dict())
target_net.eval()
tau = 2
discount = 0.99
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
memory = ReplayMemory(65536)
BATCH_SIZE = 128
def optimize_model():
if len(memory) < BATCH_SIZE:
return 0
experiences = memory.sample(BATCH_SIZE)
state_batch = torch.Tensor(experiences[0])
action_batch = torch.LongTensor(experiences[1]).unsqueeze(1)
reward_batch = torch.Tensor(experiences[2])
next_state_batch = torch.Tensor(experiences[3])
done_batch = experiences[4]
pred_q = model(state_batch).gather(1, action_batch)
next_state_q_vals = torch.zeros(BATCH_SIZE)
for idx, next_state in enumerate(next_state_batch):
if done_batch[idx] == True:
next_state_q_vals[idx] = -1
else:
# .max in pytorch returns (values, idx), we only want vals
next_state_q_vals[idx] = (target_net(next_state_batch[idx]).max(0)[0]).detach()
better_pred = (reward_batch + next_state_q_vals).unsqueeze(1)
loss = F.smooth_l1_loss(pred_q, better_pred)
optimizer.zero_grad()
loss.backward()
for param in model.parameters():
param.grad.data.clamp_(-1, 1)
optimizer.step()
return loss
points = []
losspoints = []
#save_state = torch.load("models/DQN_target_11.pth")
#model.load_state_dict(save_state['state_dict'])
#optimizer.load_state_dict(save_state['optimizer'])
env = gym.make('CartPole-v0')
for i_episode in range(5000):
observation = env.reset()
episode_loss = 0
if episode % tau == 0:
target_net.load_state_dict(model.state_dict())
for t in range(1000):
#env.render()
state = observation
action = select_action(observation)
observation, reward, done, _ = env.step(action)
if done:
next_state = [0,0,0,0]
else:
next_state = observation
memory.push([state, action, reward, next_state, done])
episode_loss = episode_loss + float(optimize_model(i_episode))
if done:
points.append((i_episode, t+1))
print("Episode {} finished after {} timesteps".format(i_episode, t+1))
print("Avg Loss: ", episode_loss / (t+1))
losspoints.append((i_episode, episode_loss / (t+1)))
if (i_episode % 100 == 0):
eps = final_epsilon + (initial_epsilon - final_epsilon) * \
math.exp(-1. * steps_done / epsilon_decay)
print(eps)
if ((i_episode+1) % 5001 == 0):
save = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
torch.save(save, "models/DQN_target_" + str(i_episode // 5000) + ".pth")
break
env.close()
x = [coord[0] * 100 for coord in points]
y = [coord[1] for coord in points]
x2 = [coord[0] * 100 for coord in losspoints]
y2 = [coord[1] for coord in losspoints]
plt.plot(x, y)
plt.plot(x2, y2)
plt.show()
I basically followed the tutorial pytorch has, except using the state returned by the env rather than the pixels. I also changed the replay memory because I was having issues there. Other than that, I left everything else pretty much the same.
Edit:
I tried overfitting on a small batch and the Loss looks like this without updating the target net and this when updating it
Edit 2:
This is definitely an issue with the target net, I tried removing it and loss seemed to not increase exponentially
Your tau value is too small, small target network update cause DQN traning unstable. You can try to use 1000 (OpenAI Baseline's DQN example) or 10000 (Deepmind's Nature paper).
In Deepmind's 2015 Nature paper, it states that:
The second modification to online Q-learning aimed at further improving the stability of our method with neural networks is to use a separate network for generating the traget yj in the Q-learning update. More precisely, every C updates we clone the network Q to obtain a target network Q' and use Q' for generating the Q-learning targets yj for the following C updates to Q.
This modification makes the algorithm more stable compared to standard online Q-learning, where an update that increases Q(st,at) often also increases Q(st+1, a) for all a and hence also increases the target yj, possibly leading to oscillations or divergence of the policy. Generating the targets using the older set of parameters adds a delay between the time an update to Q is made and the time the update affects the targets yj, making divergence or oscillations much more unlikely.
Human-level control through deep reinforcement
learning, Mnih et al., 2015
I've run your code with settings of tau=2, tau=10, tau=100, tau=1000 and tau=10000. The update frequency of tau=100 solves the problem (reach maximum steps of 200).
tau=2
tau=10
tau=100
tau=1000
tau=10000
Below is the modified version of your code.
import random
import math
import matplotlib.pyplot as plt
import torch
from torch import nn
import torch.nn.functional as F
import gym
class DQN(nn.Module):
def __init__(self, input_dim, output_dim):
super(DQN, self).__init__()
self.linear1 = nn.Linear(input_dim, 16)
self.linear2 = nn.Linear(16, 32)
self.linear3 = nn.Linear(32, 32)
self.linear4 = nn.Linear(32, output_dim)
def forward(self, x):
x = F.relu(self.linear1(x))
x = F.relu(self.linear2(x))
x = F.relu(self.linear3(x))
return self.linear4(x)
final_epsilon = 0.05
initial_epsilon = 1
epsilon_decay = 5000
global steps_done
steps_done = 0
def select_action(state):
global steps_done
sample = random.random()
eps_threshold = final_epsilon + (initial_epsilon - final_epsilon) * \
math.exp(-1. * steps_done / epsilon_decay)
if sample > eps_threshold:
with torch.no_grad():
state = torch.Tensor(state)
steps_done += 1
q_calc = model(state)
node_activated = int(torch.argmax(q_calc))
return node_activated
else:
node_activated = random.randint(0,1)
steps_done += 1
return node_activated
class ReplayMemory(object): # Stores [state, reward, action, next_state, done]
def __init__(self, capacity):
self.capacity = capacity
self.memory = [[],[],[],[],[]]
def push(self, data):
"""Saves a transition."""
for idx, point in enumerate(data):
#print("Col {} appended {}".format(idx, point))
self.memory[idx].append(point)
def sample(self, batch_size):
rows = random.sample(range(0, len(self.memory[0])), batch_size)
experiences = [[],[],[],[],[]]
for row in rows:
for col in range(5):
experiences[col].append(self.memory[col][row])
return experiences
def __len__(self):
return len(self.memory[0])
input_dim, output_dim = 4, 2
model = DQN(input_dim, output_dim)
target_net = DQN(input_dim, output_dim)
target_net.load_state_dict(model.state_dict())
target_net.eval()
tau = 100
discount = 0.99
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
memory = ReplayMemory(65536)
BATCH_SIZE = 128
def optimize_model():
if len(memory) < BATCH_SIZE:
return 0
experiences = memory.sample(BATCH_SIZE)
state_batch = torch.Tensor(experiences[0])
action_batch = torch.LongTensor(experiences[1]).unsqueeze(1)
reward_batch = torch.Tensor(experiences[2])
next_state_batch = torch.Tensor(experiences[3])
done_batch = experiences[4]
pred_q = model(state_batch).gather(1, action_batch)
next_state_q_vals = torch.zeros(BATCH_SIZE)
for idx, next_state in enumerate(next_state_batch):
if done_batch[idx] == True:
next_state_q_vals[idx] = -1
else:
# .max in pytorch returns (values, idx), we only want vals
next_state_q_vals[idx] = (target_net(next_state_batch[idx]).max(0)[0]).detach()
better_pred = (reward_batch + next_state_q_vals).unsqueeze(1)
loss = F.smooth_l1_loss(pred_q, better_pred)
optimizer.zero_grad()
loss.backward()
for param in model.parameters():
param.grad.data.clamp_(-1, 1)
optimizer.step()
return loss
points = []
losspoints = []
#save_state = torch.load("models/DQN_target_11.pth")
#model.load_state_dict(save_state['state_dict'])
#optimizer.load_state_dict(save_state['optimizer'])
env = gym.make('CartPole-v0')
for i_episode in range(5000):
observation = env.reset()
episode_loss = 0
if i_episode % tau == 0:
target_net.load_state_dict(model.state_dict())
for t in range(1000):
#env.render()
state = observation
action = select_action(observation)
observation, reward, done, _ = env.step(action)
if done:
next_state = [0,0,0,0]
else:
next_state = observation
memory.push([state, action, reward, next_state, done])
episode_loss = episode_loss + float(optimize_model())
if done:
points.append((i_episode, t+1))
print("Episode {} finished after {} timesteps".format(i_episode, t+1))
print("Avg Loss: ", episode_loss / (t+1))
losspoints.append((i_episode, episode_loss / (t+1)))
if (i_episode % 100 == 0):
eps = final_epsilon + (initial_epsilon - final_epsilon) * \
math.exp(-1. * steps_done / epsilon_decay)
print(eps)
if ((i_episode+1) % 5001 == 0):
save = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
torch.save(save, "models/DQN_target_" + str(i_episode // 5000) + ".pth")
break
env.close()
x = [coord[0] * 100 for coord in points]
y = [coord[1] for coord in points]
x2 = [coord[0] * 100 for coord in losspoints]
y2 = [coord[1] for coord in losspoints]
plt.plot(x, y)
plt.plot(x2, y2)
plt.show()
Here's the result of your plotting code.
tau=100
tau=10000

Categories

Resources