Tensorflow next word predictor bug

Tensorflow next word predictor bug - python

I have the following code
flags = tf.flags
logging = tf.logging
flags.DEFINE_string('model', 'small',
'A type of model. Possible options are: small, medium, large.'
)
flags.DEFINE_string('data_path', None, 'data_path')
flags.DEFINE_string('checkpoint_dir', 'ckpt', 'checkpoint_dir')
flags.DEFINE_bool('use_fp16', False,
'Train using 16-bit floats instead of 32bit floats')
flags.DEFINE_bool('train', False, 'should we train or test')
FLAGS = flags.FLAGS
def data_type():
return tf.float16 if FLAGS.use_fp16 else tf.float32
class PTBModel(object):
"""The PTB model."""
def __init__(self, is_training, config):
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
size = config.hidden_size
vocab_size = config.vocab_size
self._input_data = tf.placeholder(tf.float32, [batch_size,
num_steps])
self._targets = tf.placeholder(tf.int32, [batch_size,
num_steps])
# Slightly better results can be obtained with forget gate biases
# initialized to 1 but the hyperparameters of the model would need to be
# different than reported in the paper.
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=0.0,
state_is_tuple=True)
if is_training and config.keep_prob < 1:
lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell,
output_keep_prob=config.keep_prob)
cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell]
* config.num_layers, state_is_tuple=True)
self._initial_state = cell.zero_state(batch_size, data_type())
with tf.device('/cpu:0'):
embedding = tf.get_variable('embedding', [vocab_size,
size], dtype=data_type())
inputs = tf.nn.embedding_lookup(embedding, self._input_data)
if is_training and config.keep_prob < 1:
inputs = tf.nn.dropout(inputs, config.keep_prob)
# Simplified version of tensorflow.models.rnn.rnn.py's rnn().
# This builds an unrolled LSTM for tutorial purposes only.
# In general, use the rnn() or state_saving_rnn() from rnn.py.
#
# The alternative version of the code below is:
#
# from tensorflow.models.rnn import rnn
inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(inputs, num_steps, axis=1)]
(outputs, state) = tf.nn.rnn(cell, inputs, initial_state=self._initial_state)
# outputs = []
# state = self._initial_state
# with tf.variable_scope("RNN"):
# for time_step in range(num_steps):
# if time_step > 0: tf.get_variable_scope().reuse_variables()
# (cell_output, state) = cell(inputs[:, time_step, :], state)
# outputs.append(cell_output)
output = tf.reshape(tf.concat(outputs, axis=1), [-1, size])
softmax_w = tf.get_variable('softmax_w', [size, vocab_size],
dtype=data_type())
softmax_b = tf.get_variable('softmax_b', [vocab_size],
dtype=data_type())
logits = tf.matmul(output, softmax_w) + softmax_b
loss = tf.nn.seq2seq.sequence_loss_by_example([logits],
[tf.reshape(self._targets, [-1])], [tf.ones([batch_size
* num_steps],
dtype=data_type())])
self._cost = cost = tf.reduce_sum(loss) / batch_size
self._final_state = state
# RANI
self.logits = logits
if not is_training:
return
self._lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
(grads, _) = tf.clip_by_global_norm(tf.gradients(cost, tvars),
config.max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(self._lr)
self._train_op = optimizer.apply_gradients(zip(grads, tvars))
self._new_lr = tf.placeholder(tf.float32, shape=[],
name='new_learning_rate')
self._lr_update = tf.assign(self._lr, self._new_lr)
def assign_lr(self, session, lr_value):
session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
...
However, When I run it, I get the following errors
File "ptb_word_lm.py", line 349, in <module>
tf.app.run()
File "C:\Users\Josh Goldman\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\platform\app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "ptb_word_lm.py", line 299, in main
m = PTBModel(is_training=True, config=config)
File "ptb_word_lm.py", line 60, in __init__
inputs = tf.nn.embedding_lookup(embedding, self._input_data)
File "C:\Users\Josh Goldman\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\embedding_ops.py", line 122, in embedding_lookup
return maybe_normalize(_do_gather(params[0], ids, name=name))
File "C:\Users\Josh Goldman\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\embedding_ops.py", line 42, in _do_gather
return array_ops.gather(params, ids, name=name)
File "C:\Users\Josh Goldman\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 1179, in gather
validate_indices=validate_indices, name=name)
File "C:\Users\Josh Goldman\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 589, in apply_op
param_name=input_name)
File "C:\Users\Josh Goldman\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 60, in _SatisfiesTypeConstraint
", ".join(dtypes.as_dtype(x).name for x in allowed_list)))
TypeError: Value passed to parameter 'indices' has DataType float32 not in list of allowed values: int32, int64
Someone, please help me. I have all my packages upgraded to the newest version. I'm using the correct interpreter. I'm sorry if the error is very simple. I'm only 13 and am very new to programming. By the way, this code is not mine; I got it from Github.

The error is due to tensorflow version, syntax of tf.split is changed in the newer version. there is another same problem with tf.concat
# replace this line with the following one
inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs)]
# this support `tensorflow >= 1.0.0`
inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(inputs, num_steps, axis=1)]
# Also use dtype float32 for inputs
self._input_data = tf.placeholder(tf.float32, [batch_size,
num_steps])
# replace this line
output = tf.reshape(tf.concat(1, outputs), [-1, size])
# with this one
output = tf.reshape(tf.concat(outputs, axis=1), [-1, size])

Related

Torch Geometric - RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1024)

I am trying to create a GNN that models a protein. However, I am running into an error with GraphConv (I get the same error with GCNConv). I do not understand why I am getting this error when the shapes should be able to be multiplied. I think the error must have something to do with the custom dataset I created, but I am not 100% sure. Please let me know if you have had a similar issue or know how to fix this. Thank you.
EDIT: Even if I change embedding_size to 1479, I still get: RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1479).
Custom dataset:
class ProteinDataset(geom_data.Dataset):
def __init__(self, root, transform=None, pre_transform=None):
# root = where data set is stored
super(ProteinDataset, self).__init__(root, transform, pre_transform)
self.root = root
#property
def raw_file_names(self):
return os.listdir(f'{self.root}/raw')
#property
def processed_file_names(self):
inxs = []
for pdb in self.raw_paths:
inxs.append(pdb.split('/')[-1].split('.p')[0])
return [f'{i}.pt' for i in inxs]
def download(self):
pass
def process(self):
for pdb in self.raw_paths:
try:
mol_obj = Chem.rdmolfiles.MolFromPDBFile(pdb)
except AttributeError:
os.remove(pdb)
continue
# Get node features
node_feats = self._get_node_features(mol_obj).reshape([-1,1])
# Get edge features
edge_feats = self._get_edge_features(mol_obj).reshape([-1,1])
# Get adjacency info
edge_index = self._get_adjacency_info(mol_obj)
label = self._get_labels(pdb)
# Create Data object
data = geom_data.Data(x=node_feats,
edge_index=edge_index,
edge_attr=edge_feats,
y=label)
i = pdb.split('/')[-1].split('.p')[0]
torch.save(data, os.path.join(self.processed_dir,f'{i}.pt'))
def _get_node_features(self, mol):
all_node_feats = []
for atom in mol.GetAtoms():
all_node_feats.append(atom.GetMass())
all_node_feats = np.asarray(all_node_feats)
return torch.tensor(all_node_feats, dtype=torch.float)
def _get_edge_features(self, mol):
all_edge_feats = []
dists = Chem.rdmolops.Get3DDistanceMatrix(mol)
# CA-CA Distances
for bond in mol.GetBonds():
begin = bond.GetBeginAtomIdx()
end = bond.GetEndAtomIdx()
all_edge_feats.append(dists[begin,end])
all_edge_feats = np.asarray(all_edge_feats)
return torch.tensor(all_edge_feats, dtype=torch.float)
def _get_adjacency_info(self, mol):
adj_matrix = Chem.rdmolops.GetAdjacencyMatrix(mol)
row, col = np.where(adj_matrix)
coo = np.array(list(zip(row, col)))
coo = np.reshape(coo, (2, -1))
return torch.tensor(coo, dtype=torch.long)
def _get_labels(self, fn):
with open(fn, 'r') as f:
label = float(f.readline())
f.close()
label = np.asarray([label])
return torch.tensor(label, dtype=torch.float)
def len(self):
return len(self.raw_paths)
def get(self, inx):
data = torch.load(self.processed_paths[inx])
return data
Model:
class GNN(torch.nn.Module):
def __init__(self, feature_size):
super(GNN, self).__init__()
embedding_size = 1024
# GNN Layers
self.conv1 = GraphConv(feature_size, embedding_size)
self.head1 = Linear(embedding_size*3, embedding_size)
self.pool1 = TopKPooling(embedding_size, ratio=0.8)
self.conv2 = GraphConv(embedding_size, embedding_size)
self.head2 = Linear(embedding_size*3, embedding_size)
self.pool2 = TopKPooling(embedding_size, ratio=0.5)
self.conv3 = GraphConv(embedding_size, embedding_size)
self.head3 = Linear(embedding_size*3, embedding_size)
self.pool3 = TopKPooling(embedding_size, ratio=0.2)
# Linear Layers
self.fc1 = Linear(embedding_size*2, 1024)
self.fc2 = Linear(1024, 128)
self.fc3 = Linear(128, 1)
def forward(self, x, edge_attr, edge_index, batch_index):
# First block
x = self.conv1(x, edge_index).relu()
x = self.head1(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool1(x,
edge_index,
None,
batch_index)
x1 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Second block
x = self.conv2(x, edge_index).relu()
x = self.head2(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool2(x,
edge_index,
None,
batch_index)
x2 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Third block
x = self.conv3(x, edge_index).relu()
x = self.head3(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool3(x,
edge_index,
None,
batch_index)
x3 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Concat pooled vectors
x = x1 + x2 + x3
# Apply Linear Layers
x = self.fc1(x).relu()
x = self.fc2(x).relu()
x = self.fc3(x)
return x
Training:
device = torch.device('cuda')
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
# Loading the dataset
train_set = ProteinDataset(root='data/lys50_2/train')
test_set = ProteinDataset(root='data/lys50_2/test')
print('Shape of input:', train_set[0].x.shape[0])
# Loading the model
model = GNN(feature_size=train_set[0].x.shape[0])
model = model.to(device)
print(f'Number of parameters: {count_parameters(model)}')
print(model)
# Loss and Optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
print(optimizer)
# Prepare for training
train_loader = DataLoader(train_set, batch_size=1, shuffle=True)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
def train(m,opt):
loss_sum = 0.0
for _, batch in enumerate(train_loader):
# Use GPU
batch.to(device)
# Reset grad
opt.zero_grad()
# Pass node features and connections
pred = m(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculate loss and gradients
loss = loss_fn(pred, batch.y)
loss.backward()
loss_sum += loss.item()
# Update using the gradients
opt.step()
return loss_sum / len(train_loader)
def validate(m):
loss_sum = 0.0
for _, batch in enumerate(test_loader):
for _, batch in enumerate(test_loader):
# Use GPU
batch.to(device)
# No grad
with torch.no_grad():
pred = m(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculate loss and gradients
loss = loss_fn(pred, batch.y)
loss_sum += loss.item()
return loss_sum / len(test_loader)
model.zero_grad()
optimizer.zero_grad()
# Loop for training
for i in range(101):
loss = train(model,optimizer)
if (i%10==0):
loss_v = validate(model)
print(i, loss, loss_v)
else:
print(i, loss)
Error when running training:
Traceback (most recent call last):
File "/home/spencer/sh3/gnn/./train.py", line 79, in <module>
loss = train(model,optimizer)
File "/home/spencer/sh3/gnn/./train.py", line 44, in train
pred = m(batch.x.float(),
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/feig/s1/spencer/sh3/gnn/model2.py", line 32, in forward
x = self.conv1(x, edge_index).relu()
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/conv/graph_conv.py", line 71, in forward
out = self.lin_rel(out)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/dense/linear.py", line 109, in forward
return F.linear(x, self.weight, self.bias)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/functional.py", line 1848, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1024)

The error tells you that input shapes don't match.
You can reshape the input in the forward method like this: x = x.view(1, 1479) but make sure that this is what you need - this error usually indicates wrongly shaped dataset or passing the wrong input.

model(features) does not return EagerTensor

I have built a model which I can only train with a custom loss which I am trying to debug.
For this I have this simple loop here:
for (mel_specs, pred_inp), labels in train_dataset:
enc_predictions = model((mel_specs, pred_inp)) # <--- Returns a Tensor, not an EagerTensor
input_lengths = get_padded_length(mel_specs[:, :, 0])
label_lengths = get_padded_length(labels)
print(enc_predictions)
loss_value = rnnt_loss(enc_predictions, labels, input_lengths, label_lengths)
print(loss_value)
The model is just:
model = tf.keras.Model(
inputs=[mel_specs, pred_inp],
outputs=[outputs]
)
The problem is that model((mel_specs, pred_inp)) just gives me a regular Tensor but not a EagerTensor and I don't understand why. mel_specs and pred_inpu are EagerTensors coming from train_dataset which is a tf.data.Dataset.
What am I missing here?
Environment
$ pip freeze | grep tensorflow
tensorflow==2.2.0
tensorflow-addons==0.10.0
tensorflow-datasets==3.1.0
tensorflow-estimator==2.2.0
tensorflow-metadata==0.22.2
warprnnt-tensorflow==0.1
Update: MVCE
I was able to boil it down to the encoder part of the model. If I run this it will fail and print:
Calling model(x) didn't return EagerTensor
Traceback (most recent call last):
...
return loss_value, tape.gradient(loss_value, model.trainable_variables)
File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/eager/backprop.py", line 1042, in gradient
flat_grad = imperative_grad.imperative_grad(
File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/eager/imperative_grad.py", line 71, in imperative_grad
return pywrap_tfe.TFE_Py_TapeGradient(
File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/eager/backprop.py", line 157, in _gradient_function
return grad_fn(mock_op, *out_grads)
File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/ops/math_grad.py", line 252, in _MeanGrad
sum_grad = _SumGrad(op, grad)[0]
File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/ops/math_grad.py", line 211, in _SumGrad
output_shape_kept_dims = math_ops.reduced_shape(input_shape,
File "/home/sfalk/miniconda3/envs/asr2/lib/python3.8/site-packages/tensorflow/python/ops/math_ops.py", line 3735, in reduced_shape
input_shape = input_shape.numpy()
AttributeError: 'Tensor' object has no attribute 'numpy'
The code:
import numpy as np
import tensorflow as tf
from tensorflow.python.framework.ops import EagerTensor
class TimeReduction(tf.keras.layers.Layer):
def __init__(self,
reduction_factor,
batch_size=None,
**kwargs):
super(TimeReduction, self).__init__(**kwargs)
self.reduction_factor = reduction_factor
self.batch_size = batch_size
def call(self, inputs):
input_shape = tf.shape(inputs)
batch_size = self.batch_size
if batch_size is None:
batch_size = input_shape[0]
max_time = input_shape[1]
num_units = inputs.get_shape().as_list()[-1]
outputs = inputs
paddings = [[0, 0], [0, tf.math.floormod(max_time, self.reduction_factor)], [0, 0]]
outputs = tf.pad(outputs, paddings)
return tf.reshape(outputs, (batch_size, -1, num_units * self.reduction_factor))
def make_encoder_model(
input_shape: tuple,
out_dim: int,
num_layers: int,
d_model: int,
proj_size,
initializer=None,
dtype=tf.float32,
stateful: bool = False,
dropout=0.5,
reduction_index=1,
reduction_factor=2,
):
def lstm_cell():
return tf.compat.v1.nn.rnn_cell.LSTMCell(
d_model,
num_proj=proj_size,
initializer=initializer,
dtype=dtype
)
batch_size = None if not stateful else 1
inputs = tf.keras.Input(
shape=input_shape,
batch_size=batch_size,
dtype=tf.float32
)
x = tf.keras.layers.BatchNormalization()(inputs)
for i in range(num_layers):
rnn_layer = tf.keras.layers.RNN(lstm_cell(), return_sequences=True, stateful=stateful)
x = rnn_layer(x)
x = tf.keras.layers.Dropout(dropout)(x)
x = tf.keras.layers.LayerNormalization(dtype=dtype)(x)
if i == reduction_index:
x = TimeReduction(reduction_factor, batch_size=batch_size)(x)
outputs = tf.keras.layers.Dense(out_dim)(x)
return tf.keras.Model(
inputs=[inputs],
outputs=[outputs],
name='encoder'
)
def gradient(model, loss, inputs, y_true):
y_true = tf.transpose(y_true, perm=(0, 2, 1))
with tf.GradientTape() as tape:
y_pred = model(inputs, training=True)
loss_value = loss(y_true=y_true, y_pred=y_pred)
return loss_value, tape.gradient(loss_value, model.trainable_variables)
def main():
X, Y = [
np.random.rand(100, 512),
np.random.rand(100, 512)
], [[[0]*50], [[1]*50]]
# assert len(X) == len(Y)
encoder_model = make_encoder_model(
input_shape=(None, 512),
out_dim=1,
num_layers=2,
d_model=10,
proj_size=23,
dropout=0.5,
reduction_index=1,
reduction_factor=2
)
enc_dataset = tf.data.Dataset.from_generator(
lambda: zip(X, Y),
output_types=(tf.float32, tf.int32),
output_shapes=([None, 512], [None, None]),
).batch(2)
loss = tf.keras.losses.MeanSquaredError()
for x, y in enc_dataset:
from_predict = encoder_model.predict(x)
from_call = encoder_model(x)
if not isinstance(from_predict, np.ndarray):
print("Calling model.predict(x) didn't return np.ndarray")
if not isinstance(from_call, EagerTensor):
print("Calling model(x) didn't return EagerTensor")
loss_value, gradients = gradient(encoder_model, loss, x, y)
print(loss_value)
print(gradients)
print('All done.')
if __name__ == '__main__':
main()

Why do you use the LSTM cell from compat.v1? I would imagine this leads to compatibility issues.
Most importantly, those "pure Tensorflow" RNN cells are not made to be used with the keras RNN anyway -- they were used with tf.nn.dymanic_rnn for example, which is now deprecated and also found only in the compat.v1 module.
I would recommend that you simply use tf.keras.layers.LSTM directly as it's much faster anyway -- it allows for the use of highly optimized GPU kernels. Alternatively, you can replace the compat.v1.LSTMCell with a tf.keras.layers.LSTMCell and put this into the RNN.

Research Question (Help): RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same

No idea what's happening in my case:
Error message:
Traceback (most recent call last):
File "plot_parametric_pytorch.py", line 127, in <module>
ops = opfun(X_train[smpl])
File "plot_parametric_pytorch.py", line 81, in <lambda>
opfun = lambda X: model.forward(Variable(torch.from_numpy(X)))
File "/mnt_home/klee/LBSBGenGapSharpnessResearch/vgg.py", line 43, in forward
x = self.features(x).to(device)
File "/home/klee/anaconda3/envs/sharpenv/lib/python3.7/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/home/klee/anaconda3/envs/sharpenv/lib/python3.7/site-packages/torch/nn/modules/container.py", line 100, in forward
input = module(input)
File "/home/klee/anaconda3/envs/sharpenv/lib/python3.7/site-packages/torch/nn/modules/module.py", line 550, in __call__
result = self.forward(*input, **kwargs)
File "/home/klee/anaconda3/envs/sharpenv/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 349, in forward
return self._conv_forward(input, self.weight)
File "/home/klee/anaconda3/envs/sharpenv/lib/python3.7/site-packages/torch/nn/modules/conv.py", line 346, in _conv_forward
self.padding, self.dilation, self.groups)
RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same
Source code:
[import statements]
cudnn.benchmark = True
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
X_train = X_train.astype('float32')
X_train = np.transpose(X_train, axes=(0, 3, 1, 2))
X_test = X_test.astype('float32')
X_test = np.transpose(X_test, axes=(0, 3, 1, 2))
X_train /= 255
X_test /= 255
device = torch.device('cuda:0')
# This is where you can load any model of your choice.
# I stole PyTorch Vision's VGG network and modified it to work on CIFAR-10.
# You can take this line out and add any other network and the code
# should run just fine.
model = vgg.vgg11_bn()
model.to(device)
# Forward pass
opfun = lambda X: model.forward(Variable(torch.from_numpy(X))) <------
# Forward pass through the network given the input
predsfun = lambda op: np.argmax(op.data.numpy(), 1)
# Do the forward pass, then compute the accuracy
accfun = lambda op, y: np.mean(np.equal(predsfun(op), y.squeeze()))*100
# Initial point
x0 = deepcopy(model.state_dict())
# Number of epochs to train for
# Choose a large value since LB training needs higher values
# Changed from 150 to 30
nb_epochs = 30
batch_range = [25, 40, 50, 64, 80, 128, 256, 512, 625, 1024, 1250, 1750, 2048, 2500, 3125, 4096, 5000]
# parametric plot (i.e., don't train the network)
hotstart = False
if not hotstart:
for batch_size in batch_range:
optimizer = torch.optim.Adam(model.parameters())
model.load_state_dict(x0)
model.to(device)
average_loss_over_epoch = '-'
print('Optimizing the network with batch size %d' % batch_size)
np.random.seed(1337) #So that both networks see same sequence of batches
for e in range(nb_epochs):
model.eval()
print('Epoch:', e, ' of ', nb_epochs, 'Average loss:', average_loss_over_epoch)
average_loss_over_epoch = 0
# Checkpoint the model every epoch
torch.save(model.state_dict(), "./models/30EpochC3ExperimentBatchSize" + str(batch_size) + ".pth")
array = np.random.permutation(range(X_train.shape[0]))
slices = X_train.shape[0] // batch_size
beginning = 0
end = 1
# Training loop!
for _ in range(slices):
start_index = batch_size * beginning
end_index = batch_size * end
smpl = array[start_index:end_index]
model.train()
optimizer.zero_grad()
ops = opfun(X_train[smpl]) <-----
tgts = Variable(torch.from_numpy(y_train[smpl]).long().squeeze())
loss_fn = F.nll_loss(ops, tgts)
average_loss_over_epoch += loss_fn.data.numpy() / fractions_of_dataset
loss_fn.backward()
optimizer.step()
beginning += 1
end += 1
And here is where the model is constructed (and the forward method) [vgg.py]
import torch
import torch.nn as nn
F = nn.functional
import torch.utils.model_zoo as model_zoo
import math
__all__ = [
'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
'vgg19_bn', 'vgg19',
]
model_urls = {
'vgg11': 'https://s3.amazonaws.com/pytorch/models/vgg11-fb7e83b2.pth',
'vgg13': 'https://s3.amazonaws.com/pytorch/models/vgg13-58758d87.pth',
'vgg16': 'https://s3.amazonaws.com/pytorch/models/vgg16-82412952.pth',
'vgg19': 'https://s3.amazonaws.com/pytorch/models/vgg19-341d7465.pth',
}
class VGG(nn.Module):
def __init__(self, features):
super(VGG, self).__init__()
self.features = features.cuda()
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(512, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Linear(4096, 10),
)
self._initialize_weights()
def forward(self, x):
device = torch.device('cuda:0')
x.cuda()
x.to(device)
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return F.log_softmax(x)
My full source code is here https://github.com/kristyelee/LBSBGenGapSharpnessResearch/blob/master/plot_parametric_pytorch.py
https://github.com/kristyelee/LBSBGenGapSharpnessResearch/blob/master/vgg.py
I read this for some information, RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same but I'm not sure how to put the input tensors on the GPU.. I kind of tried to do this with x.cuda() not sure if this is right or not though. I really want to put my model on the GPU to train faster (because I'm currently just using the CPU otherwise, which is slow)

How to read multiple .mat files (which are too large to fit in memory) using tensorflow dataset

I have around 550K samples, each sample being 200x50x1. The size of this dataset is around 57GB.
I want to train a network on this set but I am having trouble reading it.
batch_size=8
def _read_py_function(filename,labels_slice):
with h5py.File(filename, 'r') as f:
data_slice = np.asarray(f['feats'])
print(data_slice.shape)
return data_slice, labels_slice
placeholder_files = tf.placeholder(tf.string, [None])
placeholder_labels = tf.placeholder(tf.int32, [None])
dataset = tf.data.Dataset.from_tensor_slices((placeholder_files,placeholder_labels))
dataset = dataset.map(
lambda filename, label: tuple(tf.py_func(
_read_py_function, [filename,label], [tf.uint8, tf.int32])))
dataset = dataset.shuffle(buffer_size=50000)
dataset = dataset.batch(batch_size)
iterator = tf.data.Iterator.from_structure(dataset.output_types, dataset.output_shapes)
data_X, data_y = iterator.get_next()
data_y = tf.cast(data_y, tf.int32)
net = conv_layer(inputs=data_X,num_outputs=8, kernel_size=3, stride=2, scope='rcl_0')
net = pool_layer(inputs=net,kernel_size=2,scope='pl_0')
net = dropout_layer(inputs=net,scope='dl_0')
net = flatten_layer(inputs=net,scope='flatten_0')
net = dense_layer(inputs=net,num_outputs=256,scope='dense_0')
net = dense_layer(inputs=net,num_outputs=64,scope='dense_1')
out = dense_layer(inputs=net,num_outputs=10,scope='dense_2')
And I run the session using :
sess.run(train_iterator, feed_dict = {placeholder_files: filenames, placeholder_labels: ytrain})
try:
while True:
_, loss, acc = sess.run([train_op, loss_op, accuracy_op])
train_loss += loss
train_accuracy += acc
except tf.errors.OutOfRangeError:
pass
But I am getting the error even before running the session :
Traceback (most recent call last):
File "SFCC-trial-134.py", line 297, in <module>
net = rcnn_layer(inputs=data_X,num_outputs=8, kernel_size=3, stride=2, scope='rcl_0')
File "SFCC-trial-134.py", line 123, in rcnn_layer
reuse=False)
File "SFCC-trial-134.py", line 109, in conv_layer
reuse = reuse
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args
return func(*args, **current_args)
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1154, in convolution2d
conv_dims=2)
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 183, in func_with_args
return func(*args, **current_args)
File "/home/priyam.jain/tensorflow-gpu-python3/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1025, in convolution
(conv_dims + 2, input_rank))
TypeError: %d format: a number is required, not NoneType
I though about using TFRecords but had a hard time creating those. Couldn't find a good post where I learn to create them for my kind of dataset.
conv_layer is defined as follows :
def conv_layer(inputs, num_outputs, kernel_size, stride, normalizer_fn=None, activation_fn=nn.relu, trainable=True, scope='noname', reuse=False):
net = slim.conv2d(inputs = inputs,
num_outputs = num_outputs,
kernel_size = kernel_size,
stride = stride,
normalizer_fn = normalizer_fn,
activation_fn = activation_fn,
trainable = trainable,
scope = scope,
reuse = reuse
)
return net

Do not pass tf.py_func inside your map function. You can read the file image by passing the function name directly inside your map function. I am posing only the relevant parts of the code.
def _read_py_function(filename, label):
return tf.zeros((224, 224, 3), dtype=tf.float32), tf.ones((1,), dtype=tf.int32)
dataset = dataset.map(lambda filename, label: _read_py_function(filename, label))
Another change is your iterator will expect only floating point of input. So you will have to change your tf.uint8 type of output to float.

Unable to feed value for placeholder tensor

I have written a simple version bidirectional lstm for sentence classification. But it keeps giving me "You must feed a value for placeholder tensor 'train_x'" error and it seems this come from the variable initialization step.
data = load_data(FLAGS.data)
model = RNNClassifier(FLAGS)
init = tf.initialize_all_variables()
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
sess.run(init)
print("Graph initialized..")
print()
np.random.seed(FLAGS.random_state)
for epoch in range(FLAGS.max_max_epoch):
loss = sess.run(model.cost, feed_dict={model.train_x: data.train_x, model.train_y: data.train_y,
model.embedding_placeholder: data.glove_vec})
print("Epoch {:2d}: Loss = {:.6f} = {:.5f}".format(epoch+1, loss))
coord.request_stop()
coord.join(threads)
And the RNNClassifier class code (in a different directory):
class RNNClassifier:
def __init__(self, FLAGS):
self.params = FLAGS
with tf.device("/cpu:0"):
self.train_x = tf.placeholder(tf.int32, [6248, 42], name='train_x')
self.train_y = tf.placeholder(tf.int32, [6248, 3], name='train_y')
self.embedding_placeholder = tf.placeholder(tf.float32, [1193515, 100])
with tf.variable_scope('forward_lstm'):
lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.params.num_hidden, use_peepholes=False,
activation=tf.nn.relu, forget_bias=0.0,
state_is_tuple=True)
with tf.variable_scope('backward_lstm'):
lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.params.num_hidden, use_peepholes=False,
activation=tf.nn.relu, forget_bias=0.0,
state_is_tuple=True)
fw_initial_state = lstm_fw_cell.zero_state(self.params.batch_size, tf.float32)
bw_initial_state = lstm_bw_cell.zero_state(self.params.batch_size, tf.float32)
self._initial_state = [fw_initial_state, bw_initial_state]
with tf.device("/cpu:0"), tf.variable_scope('softmax'):
self.W = tf.get_variable('W', [self.params.num_hidden*2, self.params.num_classes])
self.b = tf.get_variable('b', [self.params.num_classes], initializer=tf.constant_initializer(0.0))
batched_inputs, batched_labels = self.batch_data()
embed_inputs = self.use_embedding(batched_inputs)
rnn_outputs, output_state_fw, output_state_bw = tf.nn.bidirectional_rnn(
cell_fw=lstm_fw_cell,
cell_bw=lstm_bw_cell,
inputs=embed_inputs,
initial_state_fw=fw_initial_state,
initial_state_bw=bw_initial_state
)
logits = tf.matmul(rnn_outputs[-1], self.W) + self.b
self._cost = cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf.cast(batched_labels, tf.float32)))
optimizer = tf.train.AdamOptimizer(learning_rate=0.05).minimize(cost)
def batch_data(self):
# inputs = tf.convert_to_tensor(train_x, dtype=tf.int32)
# labels = tf.convert_to_tensor(train_y, dtype=tf.int32)
batched_inputs, batched_labels = tf.train.batch(
tensors=[self._train_x, self._train_y],
batch_size=self.params.batch_size,
dynamic_pad=True,
enqueue_many=True,
name='batching'
)
return batched_inputs, batched_labels
def use_embedding(self, batched_inputs):
with tf.device("/cpu:0"), tf.name_scope("input_embedding"):
embedding = tf.get_variable("embedding", shape=[1193515, 100], trainable=False)
embedding_init = embedding.assign(self.embedding_placeholder)
embed_inputs = tf.split(1, self.params.seq_len, tf.nn.embedding_lookup(embedding_init, batched_inputs))
embed_inputs = [tf.squeeze(input_, [1]) for input_ in embed_inputs]
return embed_inputs
#property
def cost(self):
return self._cost
The output (including the error):
I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
I tensorflow/core/common_runtime/gpu/gpu_init.cc:102] Found device 0 with properties:
name: GeForce GTX 750 Ti
major: 5 minor: 0 memoryClockRate (GHz) 1.0845
pciBusID 0000:01:00.0
Total memory: 2.00GiB
Free memory: 1.41GiB
I tensorflow/core/common_runtime/gpu/gpu_init.cc:126] DMA: 0
I tensorflow/core/common_runtime/gpu/gpu_init.cc:136] 0: Y
I tensorflow/core/common_runtime/gpu/gpu_device.cc:839] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 750 Ti, pci bus id: 0000:01:00.0)
E tensorflow/core/client/tensor_c_api.cc:485] You must feed a value for placeholder tensor 'train_x' with dtype int32 and shape [6248,42]
[[Node: train_x = Placeholder[dtype=DT_INT32, shape=[6248,42], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Graph initialized..
W tensorflow/core/framework/op_kernel.cc:936] Out of range: PaddingFIFOQueue '_0_batching/padding_fifo_queue' is closed and has insufficient elements (requested 50, current size 0)
[[Node: batching = QueueDequeueMany[_class=["loc:#batching/padding_fifo_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batching/padding_fifo_queue, batching/n)]]
W tensorflow/core/framework/op_kernel.cc:936] Out of range: PaddingFIFOQueue '_0_batching/padding_fifo_queue' is closed and has insufficient elements (requested 50, current size 0)
[[Node: batching = QueueDequeueMany[_class=["loc:#batching/padding_fifo_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batching/padding_fifo_queue, batching/n)]]
E tensorflow/core/client/tensor_c_api.cc:485] PaddingFIFOQueue '_0_batching/padding_fifo_queue' is closed and has insufficient elements (requested 50, current size 0)
[[Node: batching = QueueDequeueMany[_class=["loc:#batching/padding_fifo_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batching/padding_fifo_queue, batching/n)]]
[[Node: batching/_9 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_1191_batching", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
Traceback (most recent call last):
File "train_lstm.py", line 66, in <module>
model.embedding_placeholder: data.glove_vec})
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 382, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 655, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 723, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 743, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.OutOfRangeError: PaddingFIFOQueue '_0_batching/padding_fifo_queue' is closed and has insufficient elements (requested 50, current size 0)
[[Node: batching = QueueDequeueMany[_class=["loc:#batching/padding_fifo_queue"], component_types=[DT_INT32, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](batching/padding_fifo_queue, batching/n)]]
[[Node: batching/_9 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_1191_batching", tensor_type=DT_INT32, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
Caused by op u'batching', defined at:
File "train_lstm.py", line 49, in <module>
model = RNNClassifier(FLAGS)
File "/home/ccrmad/Code/TDLSTM/models/rnn_classifier.py", line 34, in __init__
batched_inputs, batched_labels = self.batch_data()
File "/home/ccrmad/Code/TDLSTM/models/rnn_classifier.py", line 74, in batch_data
name='batching'
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/input.py", line 595, in batch
dequeued = queue.dequeue_many(batch_size, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/data_flow_ops.py", line 435, in dequeue_many
self._queue_ref, n=n, component_types=self._dtypes, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 867, in _queue_dequeue_many
timeout_ms=timeout_ms, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2310, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1232, in __init__
self._traceback = _extract_stack()
I have tried move the train_x and train_y placeholder initialization before init = tf.initialize_all_variables() and feed them to RNNClassifier() as two args but it still give the same error. Why?

It looks like the problem is in this method, in RNNClassifier:
def batch_data(self):
# ...
batched_inputs, batched_labels = tf.train.batch(
tensors=[self._train_x, self._train_y],
batch_size=self.params.batch_size,
dynamic_pad=True,
enqueue_many=True,
name='batching')
The two tensor arguments to tf.train.batch() are self._train_x and self._train_y. In the RNNClassifier constructor, it seems like you create these as tf.placeholder() tensors:
def __init__(self, FLAGS):
# ...
with tf.device("/cpu:0"):
self.train_x = tf.placeholder(tf.int32, [6248, 42], name='train_x')
self.train_y = tf.placeholder(tf.int32, [6248, 3], name='train_y')
...though I'm assuming that the difference between self._train_x and self.train_x is a copy-paste error, because self._train_x doesn't seem to be defined anywhere else.
Now, one of the surprising things about tf.train.batch() is that it consumes its inputs in a completely separate thread, called a "queue runner", and started when you call tf.train.start_queue_runners(). This thread calls Session.run() on a subgraph that depends on your placeholders, but doesn't know how to feed these placeholders, so it is this call that fails, leading to the error you are seeing.
How then should you fix it? One option would be to use a "feeding queue runner", for which there is experimental support. A simpler option would be to use the tf.train.slice_input_producer() to produce slices from your input data, as follows:
def batch_data(self, train_x, train_y):
input_slice, label_slice = tf.train.slice_input_producer([train_x, train_y])
batched_inputs, batched_labels = tf.train.batch(
tensors=[input_slice, label_slice],
batch_size=self.params.batch_size,
dynamic_pad=False, # All rows are the same shape.
enqueue_many=False, # tf.train.slice_input_producer() produces one row at a time.
name='batching')
return batched_inputs, batched_labels
# ...
# Create batches from the entire training data, where `array_input` and
# `array_labels` are two NumPy arrays.
batched_inputs, batched_labels = model.batch_data(array_input, array_labels)

This is what I did..
I could change the way I initialised my input variables as:
data = load_data(FLAGS.data)
model = RNNClassifier(FLAGS, data)
init = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for epoch in range(FLAGS.max_max_epoch):
sess.run(model.train_step)
loss, acc = sess.run([model.mean_cost, model.accuracy])
print("Epoch {:2d}: Loss = {:.6f}; Training Accuracy = {:.5f}".format(epoch+1, loss, acc))
print()
coord.request_stop()
coord.join(threads)
And in RNNClassifier class I could replace
self.train_x = tf.placeholder(tf.int32, [6248, 42], name='train_x')
self.train_y = tf.placeholder(tf.int32, [6248, 3], name='train_y')
self.embedding_placeholder = tf.placeholder(tf.float32, [1193515, 100])
(and remove use_embedding()) to
def __init__(self, FLAGS, data):
self._train_x = tf.convert_to_tensor(data.train_x, dtype=tf.int32)
self._train_y = tf.convert_to_tensor(data.train_y, dtype=tf.int32)
embedding = tf.get_variable("embedding", shape=self.embedding_shape, trainable=False)
self.embedding_init = embedding.assign(data.glove_vec)
This way everything is initialised with RNNClassifier(FLAGS, data) before making the call to the queue runners.
I still don't know why the previous way aka using placeholders, didn't work. I have checked the dtype and data shape, they all match. This post seems to have a similar problem and the "answer" suggests to replace placeholder with tf.Variable. I have tried this -> input values can now be fed but obvs it doesn't work as placeholders do and only feed the initialised values..
UPDATE: I think the error might be caused by operating inputs batching in the RNNClassifier class, that somehow effected feeding input data into the graph (again please correct me if you know the cause to the error, thanks!). I have instead relocated my batching and embedding functions onto my main code before initialising my session, this way I think it makes sure all inputs are defined to the main pipeline and batching&prefetching would be handled inside the tf graph (by not using placeholders).
Here's my code:
def batch(x, y):
with tf.device("/cpu:0"):
x = tf.convert_to_tensor(x, dtype=tf.int32)
y = tf.convert_to_tensor(y, dtype=tf.int32)
batched_x, batched_y = tf.train.batch(
tensors=[x, y],
batch_size=50,
dynamic_pad=True,
enqueue_many=True,
name='batching'
)
return (batched_x, batched_y)
def embed(df):
with tf.device("/cpu:0"), tf.variable_scope("embed"):
embedding = tf.get_variable("embedding", shape=df.embed_shape, trainable=False)
embedding_init = embedding.assign(df.embed_vec)
return embedding_init
data = load_data(FLAGS.data)
pretrained_embed = embed(data)
batched_train_x, batched_train_y = batch(data.train_x, data.train_y)
batched_test_x, batched_test_y = batch(data.train_x, data.train_y)
model = RNNClassifier(FLAGS, pretrained_embed)
logits = model.inference(batched_train_x)
test_pred = model.inference(batched_test_x, reuse=True)
loss = model.loss(logits, batched_train_y)
test_loss = model.loss(test_pred, batched_test_y)
train_op = model.training(loss[0])
init = tf.group(tf.initialize_all_variables(),
tf.initialize_local_variables())
with tf.Session() as sess:
t0 = time.time()
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
np.random.seed(FLAGS.random_state)
for epoch in range(FLAGS.max_max_epoch):
_, output = sess.run([train_op, loss])
loss_value, acc = output
print("Epoch {:2d}: Loss = {:.6f}; Training Accuracy = {:.5f}".format(epoch+1, loss_value, acc))
print()
coord.request_stop()
coord.join(threads)
And the RNNClassifier:
class RNNClassifier:
def __init__(self, FLAGS, embedding_init):
self.batch_size = FLAGS.batch_size
self.num_hidden = FLAGS.num_hidden
self.num_classes = FLAGS.num_classes
self.seq_len = FLAGS.seq_len
self.embedding_init = embedding_init
def inference(self, batched_inputs, reuse=None):
embed_inputs = tf.nn.embedding_lookup(self.embedding_init, tf.transpose(batched_inputs))
with tf.variable_scope('hidden', reuse=reuse):
with tf.variable_scope('forward_lstm'):
lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.num_hidden, use_peepholes=False,
activation=tf.nn.relu, forget_bias=0.0,
initializer=tf.random_uniform_initializer(-1.0, 1.0),
state_is_tuple=True)
lstm_fw_cell = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_fw_cell, input_keep_prob=0.7)
with tf.variable_scope('backward_lstm'):
lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.num_hidden, use_peepholes=False,
activation=tf.nn.relu, forget_bias=0.0,
initializer=tf.random_uniform_initializer(-1.0, 1.0),
state_is_tuple=True)
lstm_bw_cell = tf.nn.rnn_cell.DropoutWrapper(cell=lstm_bw_cell, input_keep_prob=0.7)
fw_initial_state = lstm_fw_cell.zero_state(self.batch_size, tf.float32)
bw_initial_state = lstm_bw_cell.zero_state(self.batch_size, tf.float32)
rnn_outputs, output_state_fw, output_state_bw = tf.nn.bidirectional_rnn(
cell_fw=lstm_fw_cell,
cell_bw=lstm_bw_cell,
inputs=tf.unpack(embed_inputs),
# sequence_length=self.seq_len,
initial_state_fw=fw_initial_state,
initial_state_bw=bw_initial_state
)
with tf.variable_scope('output', reuse=reuse):
with tf.variable_scope('softmax'):
W = tf.get_variable('W', [self.num_hidden*2, self.num_classes],
initializer=tf.truncated_normal_initializer(stddev=0.1))
b = tf.get_variable('b', [self.num_classes], initializer=tf.constant_initializer(0.1))
logits = tf.matmul(rnn_outputs[-1], W) + b
return logits
def loss(self, logits, labels):
cost = tf.nn.softmax_cross_entropy_with_logits(logits, tf.cast(labels, tf.float32))
# self.total_cost = tf.reduce_sum(self.cost)
mean_cost = tf.reduce_mean(cost)
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
return mean_cost, accuracy
def training(self, cost):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(cost)
return train_op
On a side note, I wish the Tensorflow community on stackoverflow would be more active..

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Tensorflow next word predictor bug - python

Related

Torch Geometric - RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1024)

model(features) does not return EagerTensor

Research Question (Help): RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same

How to read multiple .mat files (which are too large to fit in memory) using tensorflow dataset

Unable to feed value for placeholder tensor

Categories

Resources