How to resolve the problem in google collab?

How to resolve the problem in google collab? - python

def cw_l2_attack(model, images, labels, targeted=False, c=1e-4, kappa=0, max_iter=1000, learning_rate=0.01) :
images = images.to(device)
labels = labels.to(device)
Define f-function
def f(x) :
outputs = model(x)
one_hot_labels = torch.eye(len(outputs[0]))[labels].to(device)
i, _ = torch.max((1-one_hot_labels)*outputs, dim=1)
j = torch.masked_select(outputs, one_hot_labels.byte())
If targeted, optimize for making the other class most likely
if targeted :
return torch.clamp(i-j, min=-kappa)
If untargeted, optimize for making the other class most likely
else :
return torch.clamp(j-i, min=-kappa)
w = torch.zeros_like(images, requires_grad=True).to(device)
optimizer = optim.Adam([w], lr=learning_rate)
prev = 1e10
for step in range(max_iter) :
a = 1/2*(nn.Tanh()(w) + 1)
loss1 = nn.MSELoss(reduction='sum')(a, images)
loss2 = torch.sum(c*f(a))
cost = loss1 + loss2
optimizer.zero_grad()
cost.backward()
optimizer.step()
Early Stop when loss does not converge.
if step % (max_iter//10) == 0 :
if cost > prev :
print('Attack Stopped due to CONVERGENCE....')
return a
prev = cost
print('- Learning Progress : %2.2f %% ' %((step+1)/max_iter*100), end='\r')
attack_images = 1/2*(nn.Tanh()(w) + 1)
return attack_images
print("Attack Image & Predicted Label")
model.eval()
correct = 0
total = 0
for images, labels in normal_loader:
images = cw_l2_attack(model, images, labels, targeted=False, c=0.1)
labels = labels.to(device)
outputs = model(images)
pre = torch.max(outputs.data, 1)
total += 1
correct += (pre == labels).sum()
imshow(torchvision.utils.make_grid(images.cpu().data, normalize=True), [normal_data.classes[i] for i in pre])
print('Accuracy of test text: %f %%' % (100 * float(correct) / total))
RuntimeError: indices should be either on cpu or on the same device as the indexed tensor (cpu)

Related

CUDA batch out of memory

I have a small dataset and running a script called LightXML which is on a git:https://github.com/kongds/LightXML
I am getting this error:
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 192.00 MiB (GPU 0; 6.00 GiB total capacity; 4.71 GiB already allocated; 0 bytes free; 4.82 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
I have found multiple posts stating that I should reduce the batch size but I can't seem to find it defined.
The script is halting at:
train_loss = model.one_epoch(epoch, trainloader, optimizer, mode='train',
eval_loader=validloader if args.valid else testloader,
eval_step=args.eval_step, log=LOG)
The one_epoch method is the following and the script is stalling at outputs = self(**inputs)
def one_epoch(self, epoch, dataloader, optimizer,
mode='train', eval_loader=None, eval_step=20000, log=None):
bar = tqdm.tqdm(total=len(dataloader))
p1, p3, p5 = 0, 0, 0
g_p1, g_p3, g_p5 = 0, 0, 0
total, acc1, acc3, acc5 = 0, 0, 0, 0
g_acc1, g_acc3, g_acc5 = 0, 0, 0
train_loss = 0
if mode == 'train':
self.train()
else:
self.eval()
if self.use_swa and epoch == self.swa_warmup_epoch and mode == 'train':
self.swa_init()
if self.use_swa and mode == 'eval':
self.swa_swap_params()
pred_scores, pred_labels = [], []
bar.set_description(f'{mode}-{epoch}')
with torch.set_grad_enabled(mode == 'train'):
for step, data in enumerate(dataloader):
batch = tuple(t for t in data)
have_group = len(batch) > 4
check_memory()
inputs = {'input_ids': batch[0].cuda(),
'attention_mask': batch[1].cuda(),
'token_type_ids': batch[2].cuda()}
if mode == 'train':
inputs['labels'] = batch[3].cuda()
if self.group_y is not None:
inputs['group_labels'] = batch[4].cuda()
inputs['candidates'] = batch[5].cuda()
print("------------------------------------------------------------")
outputs = self(**inputs)
bar.update(1)
if mode == 'train':
loss = outputs[1]
loss /= self.update_count
train_loss += loss.item()
with amp.scale_loss(loss, optimizer) as scaled_loss:
scaled_loss.backward()
if step % self.update_count == 0:
optimizer.step()
self.zero_grad()
if step % eval_step == 0 and eval_loader is not None and step != 0:
results = self.one_epoch(epoch, eval_loader, optimizer, mode='eval')
p1, p3, p5 = results[3:6]
g_p1, g_p3, g_p5 = results[:3]
if self.group_y is not None:
log.log(f'{epoch:>2} {step:>6}: {p1:.4f}, {p3:.4f}, {p5:.4f}'
f' {g_p1:.4f}, {g_p3:.4f}, {g_p5:.4f}')
else:
log.log(f'{epoch:>2} {step:>6}: {p1:.4f}, {p3:.4f}, {p5:.4f}')
# NOTE: we don't reset model to train mode and keep model in eval mode
# which means all dropout will be remove after `eval_step` in every epoch
# this tricks makes LightXML converge fast
# self.train()
if self.use_swa and step % self.swa_update_step == 0:
self.swa_step()
bar.set_postfix(loss=loss.item())
elif self.group_y is None:
logits = outputs
if mode == 'eval':
labels = batch[3]
_total, _acc1, _acc3, _acc5 = self.get_accuracy(None, logits, labels.cpu().numpy())
total += _total; acc1 += _acc1; acc3 += _acc3; acc5 += _acc5
p1 = acc1 / total
p3 = acc3 / total / 3
p5 = acc5 / total / 5
bar.set_postfix(p1=p1, p3=p3, p5=p5)
elif mode == 'test':
pred_scores.append(logits.detach().cpu())
else:
group_logits, candidates, logits = outputs
if mode == 'eval':
labels = batch[3]
group_labels = batch[4]
_total, _acc1, _acc3, _acc5 = self.get_accuracy(candidates, logits, labels.cpu().numpy())
total += _total; acc1 += _acc1; acc3 += _acc3; acc5 += _acc5
p1 = acc1 / total
p3 = acc3 / total / 3
p5 = acc5 / total / 5
_, _g_acc1, _g_acc3, _g_acc5 = self.get_accuracy(None, group_logits, group_labels.cpu().numpy())
g_acc1 += _g_acc1; g_acc3 += _g_acc3; g_acc5 += _g_acc5
g_p1 = g_acc1 / total
g_p3 = g_acc3 / total / 3
g_p5 = g_acc5 / total / 5
bar.set_postfix(p1=p1, p3=p3, p5=p5, g_p1=g_p1, g_p3=g_p3, g_p5=g_p5)
elif mode == 'test':
_scores, _indices = torch.topk(logits.detach().cpu(), k=100)
_labels = torch.stack([candidates[i][_indices[i]] for i in range(_indices.shape[0])], dim=0)
pred_scores.append(_scores.cpu())
pred_labels.append(_labels.cpu())
if self.use_swa and mode == 'eval':
self.swa_swap_params()
bar.close()
if mode == 'eval':
return g_p1, g_p3, g_p5, p1, p3, p5
elif mode == 'test':
return torch.cat(pred_scores, dim=0).numpy(), torch.cat(pred_labels, dim=0).numpy() if len(pred_labels) != 0 else None
elif mode == 'train':
return train_loss
I am also including the forward function which is stopping at outs=self.bert()
def forward(self, input_ids, attention_mask, token_type_ids,
labels=None, group_labels=None, candidates=None):
is_training = labels is not None
outs = self.bert(
input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids
)[-1]
out = torch.cat([outs[-i][:, 0] for i in range(1, self.feature_layers+1)], dim=-1)
out = self.drop_out(out)
group_logits = self.l0(out)
if self.group_y is None:
logits = group_logits
if is_training:
loss_fn = torch.nn.BCEWithLogitsLoss()
loss = loss_fn(logits, labels)
return logits, loss
else:
return logits
if is_training:
l = labels.to(dtype=torch.bool)
target_candidates = torch.masked_select(candidates, l).detach().cpu()
target_candidates_num = l.sum(dim=1).detach().cpu()
groups, candidates, group_candidates_scores = self.get_candidates(group_logits,
group_gd=group_labels if is_training else None)
if is_training:
bs = 0
new_labels = []
for i, n in enumerate(target_candidates_num.numpy()):
be = bs + n
c = set(target_candidates[bs: be].numpy())
c2 = candidates[i]
new_labels.append(torch.tensor([1.0 if i in c else 0.0 for i in c2 ]))
if len(c) != new_labels[-1].sum():
s_c2 = set(c2)
for cc in list(c):
if cc in s_c2:
continue
for j in range(new_labels[-1].shape[0]):
if new_labels[-1][j].item() != 1:
c2[j] = cc
new_labels[-1][j] = 1.0
break
bs = be
labels = torch.stack(new_labels).cuda()
candidates, group_candidates_scores = torch.LongTensor(candidates).cuda(), torch.Tensor(group_candidates_scores).cuda()
emb = self.l1(out)
embed_weights = self.embed(candidates) # N, sampled_size, H
emb = emb.unsqueeze(-1)
logits = torch.bmm(embed_weights, emb).squeeze(-1)
if is_training:
loss_fn = torch.nn.BCEWithLogitsLoss()
loss = loss_fn(logits, labels) + loss_fn(group_logits, group_labels)
return logits, loss
else:
candidates_scores = torch.sigmoid(logits)
candidates_scores = candidates_scores * group_candidates_scores
return group_logits, candidates, candidates_scores

Yes, probably the problem is in the batch_size. Usually batch_size is defined in the DataLoader. In the main.py you can see:
trainloader = DataLoader(train_d, batch_size=args.batch, num_workers=5,
shuffle=True)
You can define it at the moment of the script running - args.batch.

[Theano]TypeError: cost must be a scalar

I am undergoing a research project that requires me to write a regularizer for a DNN.
import lasagne
from lasagne.nonlinearities import leaky_rectify, softmax
import theano, theano.tensor as T
import numpy as np
import sklearn.datasets, sklearn.preprocessing, sklearn.model_selection
import matplotlib.pyplot as plt
from tabulate import tabulate
import time
import math
#psi function that will be used in the penalty function
def psi(g,l):
m = g.shape[1]
C = (1/T.pow(2,m))*(1/T.pow(math.pi,((m-1)/2))) / (T.gamma((m+1)/2))
logDens = T.log(C) + m*T.log(l) - l*T.sqrt(T.sum(g**2))
dens = T.exp(logDens)
return(dens)
#pstar function that will be used in the penalty function
def pStar(g,lambda1,lambda0,theta):
psi1 = psi(g,lambda1)
psi0 = psi(g,lambda0)
## if a coefficient is really large then both these will numerically be zero
if theta*psi1 ==0 and (1-theta)*psi0==0:
p = 1
else:
p = (theta*psi1) / (theta*psi1 + (1 - theta)*psi0)
return p
#Seperable
def pen_S(l):
theta = 0.5
lambda1 = 1
lambda0 = 12
for j in range(len(l)):
t = l[j]
m = t.shape[1]
n = t.shape[0].eval()
cost = T.zeros((1,1))
for i in range(n):
g = t[i]
temp = -lambda1*T.sum(g**2) + T.log(pStar(T.zeros((1,m)),lambda1,lambda0,theta)/pStar(g,lambda1,lambda0,theta))
cost = cost + temp
return cost
# Number of simulations
N_runs = 1
# Maximum number of epochs
max_epochs = 1500
# Define number of layers and number of neurons
H_layers = np.asarray([40, 20])
# Minibatch size
batch_size = 300
# Lasagne Regularizers to be tested
regularizers = [pen_S]
# Define the regularization factors for each algorithm
reg_factors = [10**-3.5]
# Define the names (for display purposes)
names = ['SSGL_Sep']
# Load the dataset (DIGITS)
digits = sklearn.datasets.load_digits()
X = digits.data
y = digits.target
# MNIST
#mnist = sklearn.datasets.fetch_mldata('MNIST original', data_home='C:/Users/ISPAMM/Downloads')
#X = mnist.data
#y = mnist.target
# Preprocessing (input)
scaler = sklearn.preprocessing.MinMaxScaler()
X = scaler.fit_transform(X)
# Output structures
tr_errors = np.zeros((len(regularizers), N_runs))
tst_errors = np.zeros((len(regularizers), N_runs))
tr_times = np.zeros((len(regularizers), N_runs))
tr_obj = np.zeros((len(regularizers), N_runs, max_epochs))
sparsity_weights = np.zeros((len(regularizers), N_runs, len(H_layers)+1))
sparsity_neurons = np.zeros((len(regularizers), N_runs, len(H_layers)+1))
# Define the input and output symbolic variables
input_var = T.matrix(name='X')
target_var = T.ivector(name='y')
# Utility function for minibatches
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
if shuffle:
excerpt = indices[start_idx:start_idx + batchsize]
else:
excerpt = slice(start_idx, start_idx + batchsize)
yield inputs[excerpt], targets[excerpt]
for k in np.arange(0, N_runs):
print("Run ", k+1, " of ", N_runs, "...\n", end="")
# Split the data
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.25)
# Define the network structure
network = lasagne.layers.InputLayer((None, X.shape[1]), input_var)
for h in H_layers:
network = lasagne.layers.DenseLayer(network, h, nonlinearity=leaky_rectify, W=lasagne.init.GlorotNormal())
network = lasagne.layers.DenseLayer(network, len(np.unique(y)), nonlinearity=softmax, W=lasagne.init.GlorotNormal())
params_original = lasagne.layers.get_all_param_values(network)
params = lasagne.layers.get_all_params(network, trainable=True)
# Define the loss function
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
# Define the test function
test_prediction = lasagne.layers.get_output(network, deterministic=True)
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
dtype=theano.config.floatX)
test_fn = theano.function([input_var, target_var], test_acc, allow_input_downcast=True)
for r in np.arange(0, len(regularizers)):
# Set to original parameters
lasagne.layers.set_all_param_values(network, params_original)
# Define the regularized loss function
loss_reg = loss.mean() + reg_factors[r] * lasagne.regularization.regularize_network_params(network, regularizers[r])
# Update function
# updates_reg = lasagne.updates.nesterov_momentum(loss_reg, params,learning_rate=0.01)
updates_reg = lasagne.updates.adam(loss_reg, params)
# Training function
train_fn = theano.function([input_var, target_var], loss_reg, updates=updates_reg, allow_input_downcast=True)
# Train network
print("\tTraining with ", names[r], " regularization, epoch: ", end="")
start = time.time()
for epoch in range(max_epochs):
loss_epoch = 0
batches = 0
if np.mod(epoch, 10) == 0:
print(epoch, "... ", end="")
for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=True):
input_batch, target_batch = batch
loss_epoch += train_fn(input_batch, target_batch)
batches += 1
tr_obj[r,k,epoch] = loss_epoch/batches
end = time.time()
tr_times[r,k] = end - start
print(epoch, ".")
# Final test with accuracy
print("\tTesting the network with ", names[r], " regularization...")
tr_errors[r,k] = test_fn(X_train, y_train)
tst_errors[r,k] = test_fn(X_test, y_test)
# Check sparsity
params_trained = lasagne.layers.get_all_param_values(network, trainable=True)
sparsity_weights[r,k,:] = [1-(x.round(decimals=3).ravel().nonzero()[0].shape[0]/x.size) for x in params_trained[0::2]]
sparsity_neurons[r,k,:] = [x.round(decimals=3).sum(axis=1).nonzero()[0].shape[0] for x in params_trained[0::2]]
tr_obj_mean = np.mean(tr_obj, axis=1)
# Plot the average loss
plt.figure()
plt.title('Training objective')
for r in np.arange(0, len(regularizers)):
plt.semilogy(tr_obj_mean[r, :], label=names[r])
plt.legend()
# Print the results
print(tabulate([['Tr. accuracy [%]'] + np.mean(tr_errors, axis=1).round(decimals=4).tolist(),
['Test. accuracy [%]'] + np.mean(tst_errors, axis=1).round(decimals=4).tolist(),
['Tr. times [secs.]'] + np.mean(tr_times, axis=1).round(decimals=4).tolist(),
['Sparsity [%]'] + np.mean(sparsity_weights, axis=1).round(decimals=4).tolist(),
['Neurons'] + np.mean(sparsity_neurons, axis=1).round(decimals=4).tolist()],
headers=['']+names))
Here is my defined regularizer pen_S(l), but when I run the code to train the network, i was promted with 'TypeError: cost must be a scalar.' But I think my output of pen_S is already a scalar.
Can anyone help me with this?

Memory keep accumulating while training by PyTorch

I am training a deep learning model using PyTorch. Due to unknown reasons, memory keeps accumulating, which leads to session killed under 30 epochs and underfitting.
Some thoughts here:
Wondering if it's caused by matplotlib so I added plt.close('all'); didn't work
Added gc.collect(); didn't work
Wondering if it's caused by cv2.imwrite(), but don't know how to inspect this. Any suggestions?
PyTorch issues?
others...
model.train()
for epo in range(epoch):
for i, data in enumerate(trainloader, 0):
inputs = data
inputs = Variable(inputs)
optimizer.zero_grad()
top = model.upward(inputs + white(inputs))
outputs = model.downward(top, shortcut = True)
loss = criterion(inputs, outputs)
loss.backward()
optimizer.step()
# Print generated pictures every 100 iters
if i % 100 == 0:
inn = inputs[0].view(128, 128).detach().numpy() * 255
cv2.imwrite("/home/tk/Documents/recover/" + str(epo) + "_" + str(i) + ".png", inn)
out = outputs[0].view(128, 128).detach().numpy() * 255
cv2.imwrite("/home/tk/Documents/recover/" + str(epo) + "_" + str(i) + "_re.png", out)
# Print loss every 50 iters
if i % 50 == 0:
print ('[%d, %5d] loss: %.3f' % (epo, i, loss.item()))
gc.collect()
plt.close("all")
===================================================================
20181222 Update
Datasets & DalaLoader
class MSourceDataSet(Dataset):
def __init__(self, clean_dir):
for i in cleanfolder:
with open(clean_dir + '{}'.format(i)) as f:
clean_list.append(torch.Tensor(json.load(f)))
cleanblock = torch.cat(clean_list, 0)
self.spec = cleanblock
def __len__(self):
return self.spec.shape[0]
def __getitem__(self, index):
spec = self.spec[index]
return spec
trainset = MSourceDataSet(clean_dir)
trainloader = torch.utils.data.DataLoader(dataset = trainset,
batch_size = 4,
shuffle = True)
The model is really complicated and long...plus the memory accumulation issue didn't happen before (using the same model), so I will not post it here...

Multilayer Perceptron Fails to Converge

This is a simple MLP I am writing for binary image classification, with backpropagation:
class MLP:
def __init__(self, size, epochs = 1000, learning_rate = 1):
self.l1weights = numpy.random.random((size + 1, 3))
self.l2weights = numpy.random.random(3)
self.epochs = epochs
self.learning_rate = learning_rate
def predict(self, _input_):
#Append bias at the beginning of input
l1output = self.sigmoid(numpy.dot(numpy.append([1], _input_), self.l1weights))
l2output = self.sigmoid(numpy.dot(l1output, self.l2weights))
return l1output, l2output
def train(self, training_set, training_goal):
for epoch in range(self.epochs):
l1squared_error = 0
l2squarederror = 0
for set_index in range(training_goal.shape[0]):
set = training_set[set_index]
l1output, l2output = self.predict(set)
l2error = training_goal[set_index] - l2output
l1error = l2error * self.dsigmoid(l2output) * self.l2weights
self.l1weights[0] = self.l1weights[0] + self.learning_rate * l1error
for index in range(len(self.l1weights) - 1):
self.l1weights[index + 1] += self.learning_rate * l1error * self.dsigmoid(l1output)
for index in range(len(self.l2weights)):
self.l2weights[index] += self.learning_rate * l2error * self.dsigmoid(l2output)
l1squared_error += sum(l1error ** 2)
l2squarederror += l2error ** 2
print("Squared error at epoch " + str(epoch) + " : " + str(l1squared_error) + ", " + str(l2squarederror))
def sigmoid(self, _input_):
#Sigmoid sigmoid function
return 1 / (1 + numpy.exp(-_input_))
def dsigmoid(self, _input_):
return _input_ * (1 - _input_)
When run sometimes all output converges into 1 but for some reason the predictions for 0 converge into 0.5 while predictions for 1 stay near 0.75, with error from layer 2 staying the same after ~1000 epochs, if it does relatively more successfully. This is from testing with 2x2 image classification with the code below:
def image_class(input):
return 1 if input >= 2 else 0
training_set = ((numpy.arange(2**4)[:,None] & (1 << numpy.arange(4))) != 0)
training_goals = numpy.array([image_class(sum(i)) for i in training_set])
mlp = MLP(size=4)
mlp.train(training_set, training_goals)

I could solve this by adding a layer right after the output layer with step activation instead of sigmoid and training it separately from the initial network, at least with 2x2 recognition.

Speed up network training using Dataset API in tensorflow

I have a small dataset that fits nicely in gpu ram.
My goal is to better utilize my gpu (currently around 70%) and thus decrease training time using the new Dataset API in Tensorflow v1.4.
I would like to increase the utilization score of the gpu without adding more layers or increasing the batch size. How is this possible with the Dataset API?
Below is a simplified example of my current implementation:
import numpy as np
from time import time
import tensorflow as tf
"""
Simple regression example with Dataset API.
The training and val sets are small enought to fit in GPU ram.
"""
TRAIN_SET_SIZE = 130000
VAL_SET_SIZE = 30000
TRAIN_BATCH_SIZE = 100
VAL_BATCH_SIZE = 1000
TRAIN_PREFETCH = 200
VAL_PREFETCH = 1
INPUT_FEATURES = 120
LAYERS = [500, 500, 500, 500, 1] # last layer size should be 1
def fc_layer(in_tensor, in_dim, out_dim, name, act_fun=tf.nn.relu):
with tf.variable_scope(name):
sd = 1.0 / np.sqrt(in_dim)
W_fc = tf.Variable(tf.truncated_normal([in_dim, out_dim], stddev=sd), name='weights')
b_fc = tf.Variable(tf.truncated_normal([out_dim], stddev=sd), name='bias')
z_fc = tf.matmul(in_tensor, W_fc) + b_fc
if act_fun is None:
return z_fc
else:
return act_fun(z_fc)
# Create dummy data
train_set_x = np.random.uniform(low=-1, high=1, size=(TRAIN_SET_SIZE, INPUT_FEATURES)).astype(np.float32)
train_set_y = np.random.uniform(low=-1, high=2, size=(TRAIN_SET_SIZE)).astype(np.float32)
val_set_x = np.random.uniform(low=-1, high=1, size=(VAL_SET_SIZE, INPUT_FEATURES)).astype(np.float32)
val_set_y = np.random.uniform(low=-1, high=2, size=(VAL_SET_SIZE)).astype(np.float32)
# Reset graph
tf.reset_default_graph()
with tf.device('/gpu:0'):
# Dummy train data
train_set = tf.data.Dataset.from_tensor_slices((train_set_x, train_set_y))
# TODO First batch and then prefetch or inverse the order?
# TODO TRAIN_PREFETCH value?
train_set = train_set.shuffle(buffer_size=1000).batch(TRAIN_BATCH_SIZE).prefetch(TRAIN_PREFETCH)
# Dummy val data
val_set = tf.data.Dataset.from_tensor_slices((val_set_x, val_set_y))
# TODO VAL_PREFETCH value?
val_set = val_set.batch(VAL_BATCH_SIZE).prefetch(VAL_PREFETCH)
# Iterator
iterator = tf.data.Iterator.from_structure(train_set.output_types, train_set.output_shapes)
train_init_op = iterator.make_initializer(train_set)
val_init_op = iterator.make_initializer(val_set)
x, truth = iterator.get_next()
# Build graph
activations = []
activations.append(fc_layer(x,
INPUT_FEATURES,
LAYERS[0],
name='fc0'))
for layer_ix in range(1, len(LAYERS) - 1):
activations.append(fc_layer(activations[-1],
LAYERS[layer_ix - 1],
LAYERS[layer_ix],
name='fc' + str(layer_ix)))
activations.append(fc_layer(activations[-1],
LAYERS[-2],
LAYERS[-1],
act_fun=None,
name='fc' + str(len(LAYERS) - 1)))
prediction = activations[-1]
loss = tf.reduce_mean(tf.square(truth - prediction))
global_step = tf.Variable(0, name='global_step', trainable=False)
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train_step = optimizer.minimize(loss, global_step=global_step, name='train_step')
sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True),
log_device_placement=True,
allow_soft_placement=True))
sess.run(tf.global_variables_initializer())
for e in range(1, 6): # epochs
epoch_start_time = time()
# Train set
sess.run(train_init_op)
print('\nTrain init op time: %.4f' % (time() - epoch_start_time))
while True:
try:
batch_start_time = time()
batch_loss, step, _ = sess.run([loss, global_step, train_step])
# if step % 1000 == 0:
# print('Step: %5d Loss: %.2f, Batch Time : %.5f sec' % (step, batch_loss, time() - batch_start_time))
except tf.errors.OutOfRangeError:
break
# print('Epoch time (without computing val set loss): %.2f' % (time() - epoch_start_time))
# Val set
sess.run(val_init_op)
pred_err = np.ndarray([VAL_SET_SIZE])
ix = 0
while True:
try:
p, t = sess.run([prediction, truth])
pred_err[ix:ix + VAL_BATCH_SIZE] = p.reshape([-1]) - t
ix += VAL_BATCH_SIZE
except tf.errors.OutOfRangeError:
val_loss = np.mean(pred_err ** 2)
print('Epoch: %2d, Loss: %.2f, Epoch time: %.2f sec' % (e, val_loss, time() - epoch_start_time))
break

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How to resolve the problem in google collab? - python

Related

CUDA batch out of memory

[Theano]TypeError: cost must be a scalar

Memory keep accumulating while training by PyTorch

Multilayer Perceptron Fails to Converge

Speed up network training using Dataset API in tensorflow

Categories

Resources