Denoising linear autoencoder learns to output a constant instead of denoising

Denoising linear autoencoder learns to output a constant instead of denoising - python

I am trying to create a denoising autoencoder for 1d cyclic signals like cos(x) etc.
The process of creating the dataset is that I pass a list of cyclic functions and for each example generated it rolls random coefficients for each function in the list so every function generated is different yet cyclic. eg - 0.856cos(x) - 1.3cos(0.1x)
Then I add noise and normalize the signal to be between [0, 1).
Next, I train my autoencoder on it but it learns to output a constant (usually 0.5). my guess is that it happens because 0.5 is the usual mean value of the normalized functions. But this is not the result im aspiring to get at all.
I am providing the code I wrote for the autoencoder, the data generator and the training loop as well as two pictures depicting the problem im having.
first example:
second example:
Linear autoencoder:
class LinAutoencoder(nn.Module):
def __init__(self, in_channels, K, B, z_dim, out_channels):
super(LinAutoencoder, self).__init__()
self.in_channels = in_channels
self.K = K # number of samples per 2pi interval
self.B = B # how many intervals
self.out_channels = out_channels
encoder_layers = []
decoder_layers = []
encoder_layers += [
nn.Linear(in_channels * K * B, 2*z_dim, bias=True),
nn.ReLU(),
nn.Linear(2*z_dim, z_dim, bias=True),
nn.ReLU(),
nn.Linear(z_dim, z_dim, bias=True),
nn.ReLU()
]
decoder_layers += [
nn.Linear(z_dim, z_dim, bias=True),
nn.ReLU(),
nn.Linear(z_dim, 2*z_dim, bias=True),
nn.ReLU(),
nn.Linear(2*z_dim, out_channels * K * B, bias=True),
nn.Tanh()
]
self.encoder = nn.Sequential(*encoder_layers)
self.decoder = nn.Sequential(*decoder_layers)
def forward(self, x):
batch_size = x.shape[0]
x_flat = torch.flatten(x, start_dim=1)
enc = self.encoder(x_flat)
dec = self.decoder(enc)
res = dec.view((batch_size, self.out_channels, self.K * self.B))
return res
The data generator:
def lincomb_generate_data(batch_size, intervals, sample_length, functions, noise_type="gaussian", **kwargs)->torch.tensor:
channels = 1
mul_term = 2 * np.pi / sample_length
positions = np.arange(0, sample_length * intervals)
x_axis = positions * mul_term
X = np.tile(x_axis, (channels, 1))
y = X
Y = np.repeat(y[np.newaxis, :], batch_size, axis=0)
if noise_type == "gaussian":
# defaults to 0, 0.4
noise_mean = kwargs.get("noise_mean", 0)
noise_std = kwargs.get("noise_std", 0.4)
noise = np.random.normal(noise_mean, noise_std, Y.shape)
if noise_type == "uniform":
# defaults to 0, 1
noise_low = kwargs.get("noise_low", 0)
noise_high = kwargs.get("noise_high", 1)
noise = np.random.uniform(noise_low, noise_high, Y.shape)
coef_lo = -2
coef_hi = 2
coef_mat = np.random.uniform(coef_lo, coef_hi, (batch_size, len(functions))) # creating a matrix of coefficients
coef_mat = np.where(np.abs(coef_mat) < 10**-1, 0, coef_mat)
for i in range(batch_size):
curr_res = np.zeros((channels, sample_length * intervals))
for func_id, function in enumerate(functions):
curr_func = functions[func_id]
curr_coef = coef_mat[i][func_id]
curr_res += curr_coef * curr_func(Y[i, :, :])
Y[i, :, :] = curr_res
clean = Y
noisy = clean + noise
# Normalizing
clean -= clean.min(axis=2, keepdims=2)
clean /= clean.max(axis=2, keepdims=2) + 1e-5 #avoiding zero division
noisy -= noisy.min(axis=2, keepdims=2)
noisy /= noisy.max(axis=2, keepdims=2) + 1e-5 #avoiding zero division
clean = torch.from_numpy(clean)
noisy = torch.from_numpy(noisy)
return x_axis, clean, noisy
Training loop:
functions = [lambda x: np.cos(0.1*x),
lambda x: np.cos(x),
lambda x: np.cos(3*x)]
num_epochs = 200
lin_loss_list = []
criterion = torch.nn.MSELoss()
lin_optimizer = torch.optim.SGD(lin_model.parameters(), lr=0.01, momentum=0.9)
_, val_clean, val_noisy = util.lincomb_generate_data(batch_size, B, K, functions, noise_type="gaussian")
print("STARTED TRAINING")
for epoch in range(num_epochs):
# generate data returns the x-axis used for plotting as well as the clean and noisy data
_, t_clean, t_noisy = util.lincomb_generate_data(batch_size, B, K, functions, noise_type="gaussian")
# ===================forward=====================
lin_output = lin_model(t_noisy.float())
lin_loss = criterion(lin_output.float(), t_clean.float())
lin_loss_list.append(lin_loss.data)
# ===================backward====================
lin_optimizer.zero_grad()
lin_loss.backward()
lin_optimizer.step()
val_lin_loss = F.mse_loss(lin_model(val_noisy.float()), val_clean.float())
print("DONE TRAINING")
edit: shared the parameters requested
L = 1
K = 512
B = 2
batch_size = 64
z_dim = 64
noise_mean = 0
noise_std = 0.4

The problem was I didnt use nn.BatchNorm1d in my model so i guess something wrong happened during training (probably vanishing gradients).

Related

How to access the gradients of intermediate outputs during the training loop?

Let's say I have following (relatively) small lstm model:
First, let's create some pseudo input/target data:
import torch
# create pseudo input data (features)
features = torch.rand(size = (64, 24, 3)) # of shape (batch_size, num_time_steps, num_features)
# create pseudo target data
targets = torch.ones(size = (64, 24, 1)) # of shape (batch_size, num_time_steps, num_targets)
# store num. of time steps
num_time_steps = features.shape[1]
Now, let's define a simple lstm model:
# create a simple lstm model with lstm_cell
class SmallModel(torch.nn.Module):
def __init__(self):
super().__init__() # initialize the parent class
# define the layers
self.lstm_cell = torch.nn.LSTMCell(input_size = features.shape[2], hidden_size = 16)
self.fc = torch.nn.Linear(in_features = 16, out_features = targets.shape[2])
def forward(self, features):
# initialise states
hx = torch.randn(64, 16)
cx = torch.randn(64, 16)
# empty list to collect final preds
a_s = []
b_s = []
c_s = []
for t in range(num_time_steps): # loop through each time step
# select features at the current time step t
features_t = features[:, t, :]
# forward computation at the current time step t
hx, cx = self.lstm_cell(features_t, (hx, cx))
out_t = torch.relu(self.fc(hx))
# do some computation with the output
a = out_t * 0.8 + 20
b = a * 2
c = b * 0.9
a_s.append(a)
b_s.append(b)
c_s.append(c)
a_s = torch.stack(a_s, dim = 1) # of shape (batch_size, num_time_steps, num_targets)
b_s = torch.stack(b_s, dim = 1)
c_s = torch.stack(c_s, dim = 1)
return a_s, b_s, c_s
Instantiating model, loss fun. and optimizer:
# instantiate the model
model = SmallModel()
# loss function
loss_fn = torch.nn.MSELoss()
# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
Now, during the training loop, I want to print the gradients of the intermediate (a_s.grad, b_s.grad) outputs for each epoch:
# number of epochs
n_epoch = 10
# training loop
for epoch in range(n_epoch): # loop through each epoch
# zero out the grad because pytorch accumulates them
optimizer.zero_grad()
# make predictions
a_s, b_s, c_s = model(features)
# retain the gradients of intermediate outputs
a_s.retain_grad()
b_s.retain_grad()
c_s.retain_grad()
# compute loss
loss = loss_fn(c_s, targets)
# backward computation
loss.backward()
# print gradients of outpus at each epoch
print(a_s.grad)
print(b_s.grad)
# update the weights
optimizer.step()
But I get the following:
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
None
How can I get the actual gradients of the intermediate outputs?

c_s is not a function of a_s and b_s that is the problem.
In your code:
loss = func(c_s, *)
c_s = func(a, b)
# c_s = func(a_s, b_s) is not true
Hence during backward pass no grad will be calculated for variables a_s and b_s.
Try this modified forward function to get gradients for a_s and b_s where c_s = func(a_s, b_s):
def forward(self, features):
# initialise states
hx = torch.randn(64, 16)
cx = torch.randn(64, 16)
# empty list to collect final preds
a_s = []
b_s = []
c_s = []
for t in range(num_time_steps): # loop through each time step
# select features at the current time step t
features_t = features[:, t, :]
# forward computation at the current time step t
hx, cx = self.lstm_cell(features_t, (hx, cx))
out_t = torch.relu(self.fc(hx))
# do some computation with the output
a = out_t * 0.8 + 20
# b = a * 2
# c = b * 0.9
a_s.append(a)
# b_s.append(b)
# c_s.append(c)
a_s = torch.stack(a_s, dim = 1) # of shape (batch_size, num_time_steps, num_targets)
##########################################
## c_s = func(a_s, b_s)
##########################################
b_s = a_s * 2
c_s = b_s * 0.9
##########################################
##########################################
return a_s, b_s, c_s

RunTime Error: Function AddmmBackward returned an invalid gradient at index 1 (Mismatch in shape)

I am trying to implement a VAE, and i am having trouble calculating the gradient for the model. I believe this is happening in the decoder. The exact error message is Function AddmmBackward returned an invalid gradient at index 1 - got [10, 32] but expected shape compatible with [10, 1024]. Here is the decoder model.
class decoderNW(nn.Module):
def __init__(self):
super(decoderNW,self).__init__()
channels = 32
kernelSize = 4
padding = (2,0)
stride = (2,2)
outputpadding = (1,0)
self.FC1 = nn.Linear(channels, 1024)
self.FC2 = nn.Linear(channels, 10656)
self.deConv3x301 = nn.ConvTranspose2d(channels, 64, kernel_size=kernelSize, stride=stride, output_padding=outputpadding)
nn.init.xavier_uniform_(self.deConv3x301.weight)
self.deConv3x302 = nn.ConvTranspose2d(64, 128, kernel_size=kernelSize, stride=stride, output_padding=outputpadding)
nn.init.xavier_uniform_(self.deConv3x302.weight)
self.deConv3x303 = nn.ConvTranspose2d(128, 64, kernel_size=kernelSize, stride=stride, output_padding=outputpadding)
nn.init.xavier_uniform_(self.deConv3x303.weight)
self.deConv3x304 = nn.ConvTranspose2d(64, 3, kernel_size=kernelSize, stride=stride)
nn.init.xavier_uniform_(self.deConv3x304.weight)
self.bn1 = nn.BatchNorm1d(1024)
self.bn2 = nn.BatchNorm2d(64)
self.bn3 = nn.BatchNorm2d(128)
self.bn4 = nn.BatchNorm2d(64)
self.ReLU = nn.ReLU(inplace=True)
self.sigmoid = nn.Sigmoid()
def forward(self,x):
x = self.FC1(x)
x = self.bn1(x)
x = self.ReLU(x)
# Shape of x => 10x1024
x = self.FC2(x)
# Shape of x => 10x10656
# Reshape x as 10x8x42x75
x = x.view(x.size(0),32,9,37)
x = self.deConv3x301(x)
x = self.bn2(x)
x = self.ReLU(x)
x = self.deConv3x302(x)
x = self.bn3(x)
x = self.ReLU(x)
x = self.deConv3x303(x)
x = self.bn4(x)
x = self.ReLU(x)
x = self.deConv3x304(x)
x = self.sigmoid(x)
return(x)
I believe its happening when I am trying to reshape the tensor into a 2D tensor (like image) from FC to deconv layer.
I have tried using reshape function, but the same problem persists. Im not sure where I am going wrong. Any help is greatly appreciated.
Thanks.
PS: I get this error when I run backward(). Here is the code snippet for that!
optimizerVAE.zero_grad()
variationalAE.train()
vaeT = vaeT.to('cuda')
mu, sigma, xHat, z = variationalAE(srcClrT)
loss = vaeLoss(srcClrT, mu, sigma, xHat, z)
loss.backward()
Edit 1: Added Code to my VAE loss.
class getVAELoss(torch.nn.Module):
def __init__(self):
super(getVAELoss, self).__init__()
def forward(self, x, mu, sigma, xHat, z):
# Caluclate ELBO
# ELBO = KLDivergence - reconstruction loss
# Reconstruction loss
# Compute the probability of x uner n-d distribution
logScale = nn.parameter.Parameter(torch.Tensor([0.0]).to('cuda'))
scale = torch.exp(logScale)
dist = torch.distributions.Normal(xHat,scale)
logProbXZ = dist.log_prob(x)
logProbXZ = logProbXZ.sum(dim=(1,2,3))
reconstructionLoss = logProbXZ
# KL Divergence
# create two probabilities p and q
# P is the reference distribution with zero mean and unit sigma
p = torch.distributions.Normal(torch.zeros_like(mu), torch.ones_like(sigma))
q = torch.distributions.Normal(mu,sigma)
# Calculating the log Probablility with the Z
logQZX = q.log_prob(z)
logPz = p.log_prob(z)
KL = logQZX - logPz
KL = KL.sum(-1)
elbo = KL - reconstructionLoss
elbo = elbo.mean()
return(elbo)
The VAE loss is very similar to one shown in here
EDIT 2
Looking at several VAE network architectures, I realized that only 1 FC is used in the decoder network, so removing the second FC layer and changing the size of the first FC, removed the error. But I don't understand why this is happening.
self.FC1 = nn.Linear(channels, 1024*4*13)
#self.FC2 = nn.Linear(channels, 10656)

My Loss Function doesn't get smaller values during training

I am trying to predict the center of my palm
The structure of my neural network consists of 2 cnn which both are followed by max-pooling and a linear layer that has 2 outputs, one for x and the other one for y. The input is a 720x720 image.
class MyNeuralNetwork(torch.nn.Module):
def __init__(self):
super(MyNeuralNetwork, self).__init__()
self.conv1 = torch.nn.Conv2d(4, 5, 5)
self.conv2 = torch.nn.Conv2d(5, 5, 5)
self.pool = torch.nn.MaxPool2d(3, 3)
self.linear = torch.nn.Linear(5 * 78 * 78, 2)
def forward(self, x):
x = self.conv1(x)
x = self.pool(x)
x = self.conv2(x)
x = self.pool(x)
x = x.view(x.size(0), -1)
x = self.linear(x)
return x
I have the pathnames of the images saved in a csv file. the x and y coordinates are saved in a different csv file. Here is the code for my Dataset.
class MyHand(Dataset):
"""Creating the proper dataset to feed my neural network"""
def __init__(self, name_path, root_dir, results_path, transform=None):
self.names = pd.read_csv(name_path)
self.rootdir = root_dir
self.transform = transform
self.results = pd.read_csv(results_path)
def __len__(self):
length = len(self.names.columns)
return length
def __getitem__(self, index):
img_path = os.path.join(self.rootdir, self.names.columns[index])
image = pl.imread(img_path)
x_top_left_corner = torch.tensor(self.results.iloc[index, 0])
y_top_left_corner = torch.tensor(self.results.iloc[index, 1])
width = torch.tensor(self.results.iloc[index, 2])
height = torch.tensor(self.results.iloc[index, 3])
# calculating the x and y center of my palm
x_center = x_top_left_corner + width/2
y_center = y_top_left_corner - height/2
if self.transform:
image = self.transform(image)
return image, x_center, y_center
and the code for training the network is
dataset = MyHand(name_path='path to the names of the images csv',
results_path='path to the results cvs',
transform=torchvision.transforms.ToTensor( ))
loader = DataLoader(dataset=dataset, batch_size=4)
model = MyNeuralNetwork()
criterion = torch.nn.MSELoss()
EPOCHS = 5
LEARNING_RATE = 0.001
optimizer = optim.SGD(model.parameters(), LEARNING_RATE)
for epoch in range(EPOCHS):
print("epoch:", epoch)
for data in dataset:
pic, x, y = data
model.zero_grad()
outpout = model(pic[None, :, :, :])
loss1 = criterion(outpout[0, 0], x)
loss2 = criterion(outpout[0, 1], y)
loss = loss1 + loss2
loss.backward()
print(loss)
but as you can see below my loss function has exactly the same results at each epoch and it doesn't decrease at all. What can i do for that? I tried different values of learning rate but still the same.

Your loss values are extremly high as you see. I would propose that you normalize your outputs by using the sigmoid activation function. Now the coordinates are in the range 0-1 and can be later translated to the image by multiplying them with 720. To calculate the loss, you have to divide your target cooridnates by 720. Then you should get a nice and stable loss in the range 0-1.
Also:
either decay your learning rate or try a smaller one
scale your image down (I don't know how the images look like but 720x720 is quite big)
use three convolutions with smaller kernels
add a second linear layer

Splitting ndarray gives unexpected results (TensorFlow RNN tutorial)

I am following a tutorial on rnn's in TensorFlow but I have a question concerning the input formats.
They are taking raw_x (one hot vector) and basically first cutting that up in pieces of length 200 (batch_size) to form data_x. That is good.
Then they further cut up data_x in pieces of length 5 (num_step, or graph width) with:
for i in range(epoch_size):
x = data_x[:, i * num_steps:(i + 1) * num_steps]
y = data_y[:, i * num_steps:(i + 1) * num_steps]
yield (x, y)
However, if I look in the data, the slices of x do not match data_x. The first one does, but then they diverge.
Am I misunderstanding the above code? I would like to understand how x is being created or what it is supposed to look like.
I had expected the second item to be 0 1 0 1 0.
Also, I thought an epoch is when you go through the data completely, from this it seems that they split up the data in 1000 parts (epoch size)?
If it helps, this is my full code. I am trying to figure out what is going on in x. at line 48:
import numpy as np
import tensorflow as tf
# %matplotlib inline
import matplotlib.pyplot as plt
# Global config variables
num_steps = 5 # number of truncated backprop steps ('n' in the discussion above)
batch_size = 200
num_classes = 2
state_size = 4
learning_rate = 0.1
def gen_data(size=1000000):
print('generating data');
X = np.array(np.random.choice(2, size=(size,)))
Y = []
for i in range(size):
threshold = 0.5
if X[i-3] == 1:
threshold += 0.5
if X[i-8] == 1:
threshold -= 0.25
if np.random.rand() > threshold:
Y.append(0)
else:
Y.append(1)
return X, np.array(Y)
# adapted from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py
def gen_batch(raw_data, batch_size, num_steps):
print('generating batches');
raw_x, raw_y = raw_data
data_length = len(raw_x)
# partition raw data into batches and stack them vertically in a data matrix
batch_partition_length = data_length // batch_size
data_x = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
data_y = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
for i in range(batch_size):
data_x[i] = raw_x[batch_partition_length * i:batch_partition_length * (i + 1)]
data_y[i] = raw_y[batch_partition_length * i:batch_partition_length * (i + 1)]
# further divide batch partitions into num_steps for truncated backprop
epoch_size = batch_partition_length // num_steps
for i in range(epoch_size):
x = data_x[:, i * num_steps:(i + 1) * num_steps]
y = data_y[:, i * num_steps:(i + 1) * num_steps]
yield (x, y)
def gen_epochs(n, num_steps):
for i in range(n):
yield gen_batch(gen_data(), batch_size, num_steps)
"""
Placeholders
"""
x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])
"""
RNN Inputs
"""
# Turn our x placeholder into a list of one-hot tensors:
# rnn_inputs is a list of num_steps tensors with shape [batch_size, num_classes]
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unstack(x_one_hot, axis=1)
"""
Definition of rnn_cell
This is very similar to the __call__ method on Tensorflow's BasicRNNCell. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py
"""
with tf.variable_scope('rnn_cell'):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
def rnn_cell(rnn_input, state):
with tf.variable_scope('rnn_cell', reuse=True):
W = tf.get_variable('W', [num_classes + state_size, state_size])
b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
return tf.tanh(tf.matmul(tf.concat(axis=1, values=[rnn_input, state]), W) + b)
"""
Adding rnn_cells to graph
This is a simplified version of the "rnn" function from Tensorflow's api. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py
"""
state = init_state
rnn_outputs = []
for rnn_input in rnn_inputs:
state = rnn_cell(rnn_input, state)
rnn_outputs.append(state)
final_state = rnn_outputs[-1]
"""
Predictions, loss, training step
Losses and total_loss are simlar to the "sequence_loss_by_example" and "sequence_loss"
functions, respectively, from Tensorflow's api. See:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py
"""
#logits and predictions
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [state_size, num_classes])
b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]
# Turn our y placeholder into a list labels
y_as_list = [tf.squeeze(i, axis=[1]) for i in tf.split(axis=1, num_or_size_splits=num_steps, value=y)]
#losses and train_step
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit,labels=label) for \
logit, label in zip(logits, y_as_list)]
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
"""
Function to train the network
"""
def train_network(num_epochs, num_steps, state_size=4, verbose=True):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
training_losses = []
for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)):
training_loss = 0
training_state = np.zeros((batch_size, state_size))
if verbose:
print("\nEPOCH", idx)
for step, (X, Y) in enumerate(epoch):
tr_losses, training_loss_, training_state, _ = \
sess.run([losses,
total_loss,
final_state,
train_step],
feed_dict={x:X, y:Y, init_state:training_state})
training_loss += training_loss_
if step % 100 == 0 and step > 0:
if verbose:
print("Average loss at step", step,
"for last 250 steps:", training_loss/100)
training_losses.append(training_loss/100)
training_loss = 0
return training_losses
training_losses = train_network(1,num_steps)
plt.plot(training_losses)

Seems like the batches are actually transposed.
So the first elements of the x-matrix (200 x 5) will fit the first 5 elements of x_raw.
Then only in the next iteration, the next 5-10 elements of x_raw will be in the first elements (again) of x.

SummaryWriter not outputting graph in TensorFlow [duplicate]

This question already has answers here:
Save Tensorflow graph for viewing in Tensorboard without summary operations
(5 answers)
Closed 5 years ago.
I am trying to use tensorboard to analyse a graph in tensorflow with summaryWriter. However, TensorFlow is not outputting a 'graph' folder with information. Perhaps I am missing a command or it is not in the right place?
writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph());
Is what I used. I think this may not work for TensorFlow 1.0 anymore (just the summarywriter command)
import numpy as np
import tensorflow as tf
# %matplotlib inline
import matplotlib.pyplot as plt
# Global config variables
num_steps = 5 # number of truncated backprop steps ('n' in the discussion above)
batch_size = 200
num_classes = 2
state_size = 4
learning_rate = 0.1
logs_path = "./graph"
def gen_data(size=1000000):
X = np.array(np.random.choice(2, size=(size,)))
Y = []
for i in range(size):
threshold = 0.5
if X[i-3] == 1:
threshold += 0.5
if X[i-8] == 1:
threshold -= 0.25
if np.random.rand() > threshold:
Y.append(0)
else:
Y.append(1)
return X, np.array(Y)
# adapted from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/models/rnn/ptb/reader.py
def gen_batch(raw_data, batch_size, num_steps):
raw_x, raw_y = raw_data
data_length = len(raw_x)
# partition raw data into batches and stack them vertically in a data matrix
batch_partition_length = data_length // batch_size
data_x = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
data_y = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
for i in range(batch_size):
data_x[i] = raw_x[batch_partition_length * i:batch_partition_length * (i + 1)]
data_y[i] = raw_y[batch_partition_length * i:batch_partition_length * (i + 1)]
# further divide batch partitions into num_steps for truncated backprop
epoch_size = batch_partition_length // num_steps
for i in range(epoch_size):
x = data_x[:, i * num_steps:(i + 1) * num_steps]
y = data_y[:, i * num_steps:(i + 1) * num_steps]
yield (x, y)
def gen_epochs(n, num_steps):
for i in range(n):
yield gen_batch(gen_data(), batch_size, num_steps)
"""
Placeholders
"""
x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')
init_state = tf.zeros([batch_size, state_size])
"""
Inputs
"""
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unstack(x_one_hot, axis=1)
"""
RNN
"""
cell = tf.contrib.rnn.BasicRNNCell(state_size)
rnn_outputs, final_state = tf.contrib.rnn.static_rnn(cell, rnn_inputs, initial_state=init_state)
"""
Predictions, loss, training step
"""
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [state_size, num_classes])
b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]
y_as_list = [tf.squeeze(i, axis=[1]) for i in tf.split(axis=1, num_or_size_splits=num_steps, value=y)]
loss_weights = [tf.ones([batch_size]) for i in range(num_steps)]
losses = tf.contrib.legacy_seq2seq.sequence_loss_by_example(logits, y_as_list, loss_weights)
tf.scalar_summary("losses", losses)
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
# Not sure why this is not outputting a graph for tensorboard
writer = tf.train.SummaryWriter(logs_path, graph=tf.get_default_graph());
"""
Function to train the network
"""
def train_network(num_epochs, num_steps, state_size=4, verbose=True):
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
training_losses = []
saved = gen_epochs(num_epochs, num_steps);
for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)):
training_loss = 0
training_state = np.zeros((batch_size, state_size))
if verbose:
print("\nEPOCH", idx)
for step, (X, Y) in enumerate(epoch):
tr_losses, training_loss_, training_state, _ = \
sess.run([losses,
total_loss,
final_state,
train_step],
feed_dict={x:X, y:Y, init_state:training_state})
training_loss += training_loss_
if step % 100 == 0 and step > 0:
if verbose:
print("Average loss at step", step,
"for last 250 steps:", training_loss/100)
training_losses.append(training_loss/100)
training_loss = 0
return training_losses
training_losses = train_network(1,num_steps)
plt.plot(training_losses)
# tensorboard --logdir="my_graph"

This worked for me:
writer = tf.summary.FileWriter(logdir='logdir', graph=tf.get_default_graph())
writer.flush()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Denoising linear autoencoder learns to output a constant instead of denoising - python

The problem was I didnt use nn.BatchNorm1d in my model so i guess something wrong happened during training (probably vanishing gradients).

Related

How to access the gradients of intermediate outputs during the training loop?

RunTime Error: Function AddmmBackward returned an invalid gradient at index 1 (Mismatch in shape)

My Loss Function doesn't get smaller values during training

Splitting ndarray gives unexpected results (TensorFlow RNN tutorial)

SummaryWriter not outputting graph in TensorFlow [duplicate]

Categories

Resources