Related
I am trying to implement a custom modified ReLU in Tensorflow 1, in which I use two learnable parameters. But the parameters are not getting learnt even after running 1000 training steps, as suggested by printing their values before and after training. I have observed that inside the function, when I do not split x, i.e. execute the commented lines, then the coefficients are learnt. Could anyone suggest why splitting the input results in the trainable coefficients not being learnt and how this can be resolved?
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
def weight_variable(shape,vari_name):
initial = tf.truncated_normal(shape, stddev=0.1,dtype=tf.float32)
return tf.Variable(initial,name = vari_name)
def init_Prelu_coefficient(var1, var2):
coeff = tf.truncated_normal(([1]), stddev=0.1,dtype=tf.float32)
coeff1 = tf.truncated_normal(([1]), stddev=0.1,dtype=tf.float32)
return tf.Variable(coeff, trainable=True, name=var1), tf.Variable(coeff1, trainable=True, name=var2)
def Prelu(x, coeff, coeff1):
s = int(x.shape[-1])
sop = x[:,:,:,:s//2]*coeff+x[:,:,:,s//2:]*coeff1
sop1 = x[:,:,:,:s//2]*coeff-x[:,:,:,s//2:]*coeff1
copied_variable = tf.concat([sop, sop1], axis=-1)
copied_variable = tf.math.maximum(copied_variable,0)/copied_variable
# copied_variable = tf.identity(x)
# copied_variable = tf.math.maximum(copied_variable*coeff+copied_variable*coeff1,0)/copied_variable
# copied_variable = tf.multiply(copied_variable,x)
return copied_variable
def conv2d_dilate(x, W, dilate_rate):
return tf.nn.convolution(x, W,padding='VALID',dilation_rate = [1,dilate_rate])
matr = np.random.rand(1, 60, 40, 8)
target = np.random.rand(1, 58, 36, 8)
def learning(sess):
# define placeholder for inputs to network
Input = tf.placeholder(tf.float32, [1, 60, 40, 8])
input_Target = tf.placeholder(tf.float32, [1, 58, 36, 8])
kernel = weight_variable([3, 3, 8, 8],'G1')
coeff, coeff1 = init_Prelu_coefficient('alpha', 'alpha1')
conv = Prelu(conv2d_dilate(Input, kernel , 2), coeff, coeff1)
error_norm = 1*tf.norm(input_Target - conv)
print("MOMENTUM LEARNING")
train_step = tf.train.MomentumOptimizer(learning_rate=0.001,momentum=0.9,use_nesterov=False).minimize(error_norm)
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)
print("INIT coefficient ", sess.run(coeff), sess.run(coeff1))
init_var = tf.trainable_variables()
error_prev = 1 # initial error, we set 1 and it began to decrease.
for i in range(1000):
sess.run(train_step, feed_dict={Input: matr, input_Target: target})
if i % 100 == 0:
error_now=sess.run(error_norm,feed_dict={Input : matr, input_Target: target})
print('The',i,'th iteration gives an error',error_now)
error = sess.run(error_norm,feed_dict={Input: matr, input_Target: target})
print(sess.run(kernel))
print("LEARNT coefficient ", sess.run(coeff), sess.run(coeff1))
sess = tf.Session()
learning(sess)
I am trying to create a GNN that models a protein. However, I am running into an error with GraphConv (I get the same error with GCNConv). I do not understand why I am getting this error when the shapes should be able to be multiplied. I think the error must have something to do with the custom dataset I created, but I am not 100% sure. Please let me know if you have had a similar issue or know how to fix this. Thank you.
EDIT: Even if I change embedding_size to 1479, I still get: RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1479).
Custom dataset:
class ProteinDataset(geom_data.Dataset):
def __init__(self, root, transform=None, pre_transform=None):
# root = where data set is stored
super(ProteinDataset, self).__init__(root, transform, pre_transform)
self.root = root
#property
def raw_file_names(self):
return os.listdir(f'{self.root}/raw')
#property
def processed_file_names(self):
inxs = []
for pdb in self.raw_paths:
inxs.append(pdb.split('/')[-1].split('.p')[0])
return [f'{i}.pt' for i in inxs]
def download(self):
pass
def process(self):
for pdb in self.raw_paths:
try:
mol_obj = Chem.rdmolfiles.MolFromPDBFile(pdb)
except AttributeError:
os.remove(pdb)
continue
# Get node features
node_feats = self._get_node_features(mol_obj).reshape([-1,1])
# Get edge features
edge_feats = self._get_edge_features(mol_obj).reshape([-1,1])
# Get adjacency info
edge_index = self._get_adjacency_info(mol_obj)
label = self._get_labels(pdb)
# Create Data object
data = geom_data.Data(x=node_feats,
edge_index=edge_index,
edge_attr=edge_feats,
y=label)
i = pdb.split('/')[-1].split('.p')[0]
torch.save(data, os.path.join(self.processed_dir,f'{i}.pt'))
def _get_node_features(self, mol):
all_node_feats = []
for atom in mol.GetAtoms():
all_node_feats.append(atom.GetMass())
all_node_feats = np.asarray(all_node_feats)
return torch.tensor(all_node_feats, dtype=torch.float)
def _get_edge_features(self, mol):
all_edge_feats = []
dists = Chem.rdmolops.Get3DDistanceMatrix(mol)
# CA-CA Distances
for bond in mol.GetBonds():
begin = bond.GetBeginAtomIdx()
end = bond.GetEndAtomIdx()
all_edge_feats.append(dists[begin,end])
all_edge_feats = np.asarray(all_edge_feats)
return torch.tensor(all_edge_feats, dtype=torch.float)
def _get_adjacency_info(self, mol):
adj_matrix = Chem.rdmolops.GetAdjacencyMatrix(mol)
row, col = np.where(adj_matrix)
coo = np.array(list(zip(row, col)))
coo = np.reshape(coo, (2, -1))
return torch.tensor(coo, dtype=torch.long)
def _get_labels(self, fn):
with open(fn, 'r') as f:
label = float(f.readline())
f.close()
label = np.asarray([label])
return torch.tensor(label, dtype=torch.float)
def len(self):
return len(self.raw_paths)
def get(self, inx):
data = torch.load(self.processed_paths[inx])
return data
Model:
class GNN(torch.nn.Module):
def __init__(self, feature_size):
super(GNN, self).__init__()
embedding_size = 1024
# GNN Layers
self.conv1 = GraphConv(feature_size, embedding_size)
self.head1 = Linear(embedding_size*3, embedding_size)
self.pool1 = TopKPooling(embedding_size, ratio=0.8)
self.conv2 = GraphConv(embedding_size, embedding_size)
self.head2 = Linear(embedding_size*3, embedding_size)
self.pool2 = TopKPooling(embedding_size, ratio=0.5)
self.conv3 = GraphConv(embedding_size, embedding_size)
self.head3 = Linear(embedding_size*3, embedding_size)
self.pool3 = TopKPooling(embedding_size, ratio=0.2)
# Linear Layers
self.fc1 = Linear(embedding_size*2, 1024)
self.fc2 = Linear(1024, 128)
self.fc3 = Linear(128, 1)
def forward(self, x, edge_attr, edge_index, batch_index):
# First block
x = self.conv1(x, edge_index).relu()
x = self.head1(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool1(x,
edge_index,
None,
batch_index)
x1 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Second block
x = self.conv2(x, edge_index).relu()
x = self.head2(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool2(x,
edge_index,
None,
batch_index)
x2 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Third block
x = self.conv3(x, edge_index).relu()
x = self.head3(x)
x, edge_index, edge_attr, batch_index, _, _ = self.pool3(x,
edge_index,
None,
batch_index)
x3 = torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1)
# Concat pooled vectors
x = x1 + x2 + x3
# Apply Linear Layers
x = self.fc1(x).relu()
x = self.fc2(x).relu()
x = self.fc3(x)
return x
Training:
device = torch.device('cuda')
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
# Loading the dataset
train_set = ProteinDataset(root='data/lys50_2/train')
test_set = ProteinDataset(root='data/lys50_2/test')
print('Shape of input:', train_set[0].x.shape[0])
# Loading the model
model = GNN(feature_size=train_set[0].x.shape[0])
model = model.to(device)
print(f'Number of parameters: {count_parameters(model)}')
print(model)
# Loss and Optimizer
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)
print(optimizer)
# Prepare for training
train_loader = DataLoader(train_set, batch_size=1, shuffle=True)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False)
def train(m,opt):
loss_sum = 0.0
for _, batch in enumerate(train_loader):
# Use GPU
batch.to(device)
# Reset grad
opt.zero_grad()
# Pass node features and connections
pred = m(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculate loss and gradients
loss = loss_fn(pred, batch.y)
loss.backward()
loss_sum += loss.item()
# Update using the gradients
opt.step()
return loss_sum / len(train_loader)
def validate(m):
loss_sum = 0.0
for _, batch in enumerate(test_loader):
for _, batch in enumerate(test_loader):
# Use GPU
batch.to(device)
# No grad
with torch.no_grad():
pred = m(batch.x.float(),
batch.edge_attr.float(),
batch.edge_index,
batch.batch)
# Calculate loss and gradients
loss = loss_fn(pred, batch.y)
loss_sum += loss.item()
return loss_sum / len(test_loader)
model.zero_grad()
optimizer.zero_grad()
# Loop for training
for i in range(101):
loss = train(model,optimizer)
if (i%10==0):
loss_v = validate(model)
print(i, loss, loss_v)
else:
print(i, loss)
Error when running training:
Traceback (most recent call last):
File "/home/spencer/sh3/gnn/./train.py", line 79, in <module>
loss = train(model,optimizer)
File "/home/spencer/sh3/gnn/./train.py", line 44, in train
pred = m(batch.x.float(),
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/feig/s1/spencer/sh3/gnn/model2.py", line 32, in forward
x = self.conv1(x, edge_index).relu()
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/conv/graph_conv.py", line 71, in forward
out = self.lin_rel(out)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch_geometric/nn/dense/linear.py", line 109, in forward
return F.linear(x, self.weight, self.bias)
File "/home/spencer/miniconda3/lib/python3.9/site-packages/torch/nn/functional.py", line 1848, in linear
return torch._C._nn.linear(input, weight, bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (1479x1 and 1479x1024)
The error tells you that input shapes don't match.
You can reshape the input in the forward method like this: x = x.view(1, 1479) but make sure that this is what you need - this error usually indicates wrongly shaped dataset or passing the wrong input.
I am trying to build an implicit quantile network. I build a custom loss function but do not get it working. I get the error 'no gradients available' but I belief I only use functions that should provide gradients, like tf.tile and stuff. I dont explicityly cast something in my loss_kv_iq() function.
Below I provide the code for my custom layer ( IQNlayer ) , the network I use (IQN), and my custom loss function. Also a small piece of code in the main that should be able to reproduce the error.
TF version: 2.1.0
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
class IQN(keras.Model):
def __init__(self, quantile_dims, fc_dims, n_actions, n_quantiles):
super(IQN, self).__init__()
self.n_quantiles = n_quantiles
initializer = keras.initializers.he_uniform()
self.iq = IQNlayer(quantile_dims, n_quantiles)
self.dense = keras.layers.Dense(fc_dims, activation='relu', kernel_initializer = initializer)
self.out = keras.layers.Dense(n_actions, activation = None)
def call(self, state, tau):
batch_size, state_size = state.shape
x = self.iq(state, tau)
x = self.dense(x)
x = self.out(x)
x = tf.transpose(tf.split(x, batch_size, axis=0), perm=[0, 2, 1])
return x
class IQNlayer(keras.layers.Layer):
def __init__(self, quantile_dims, n_quantiles):
super(IQNlayer, self).__init__()
self.quantile_dims = quantile_dims
self.n_quantiles = n_quantiles
self.fc1 = keras.layers.Dense(self.quantile_dims, activation = tf.nn.selu)
self.fc2 = keras.layers.Dense(self.quantile_dims, activation = tf.nn.relu)
def call(self, state, tau):
batch_size, state_size = state.shape
state_tile = tf.tile(state, [1, self.n_quantiles])
state_reshape = tf.reshape(state_tile, [-1, state_size])
state_net = self.fc1(state_reshape)
tau = tf.reshape(tau, [-1, 1])
pi_mtx = tf.constant(np.expand_dims(np.pi * np.arange(0, 64), axis=0), dtype=tf.float32)
cos_tau = tf.cos(tf.matmul(tau, pi_mtx))
phi = self.fc2(cos_tau)
net = tf.multiply(state_net, phi)
return net
def loss_kv_iq(x, tau, action_hot, theta_target):
expand_dim_action = tf.expand_dims(action_hot, -1)
main_support = tf.reduce_sum(x * expand_dim_action, axis=1)
theta_loss_tile = tf.tile(tf.expand_dims(main_support, axis=2), [1, 1, N_QUANTILES])
logit_valid_tile = tf.tile(tf.expand_dims(theta_target, axis=1), [1, N_QUANTILES, 1])
Huber_loss = hloss(logit_valid_tile, theta_loss_tile)
inv_tau = 1 - tau
tau = tf.tile(tf.expand_dims(tau, axis=1), [1, N_QUANTILES, 1])
inv_tau = tf.tile(tf.expand_dims(inv_tau, axis=1), [1, N_QUANTILES, 1])
error_loss = logit_valid_tile - theta_loss_tile
Loss = tf.where(tf.less(error_loss, 0.0), inv_tau * Huber_loss, tau * Huber_loss)
loss = tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(Loss, axis=2), axis=1))
return loss
if __name__ == '__main__':
hloss = tf.keras.losses.Huber(reduction = tf.keras.losses.Reduction.NONE)
N_QUANTILES = 10
BATCH_SIZE = 2
ACTION_SIZE = 5
STATE_SIZE = 16
# FOR EXAMPLE: RANDOM BATCH
cs = np.random.rand(BATCH_SIZE,STATE_SIZE)
a = np.random.randint(0,5,size=(2))
r = np.random.randint(0,500,size=(2))
ns = np.random.rand(BATCH_SIZE,STATE_SIZE)
tau = np.random.uniform(size=(BATCH_SIZE, N_QUANTILES))
tau = tau.astype('float32')
iq = IQN(128,128,ACTION_SIZE,N_QUANTILES)
action_hot = np.zeros((BATCH_SIZE,ACTION_SIZE), dtype = np.float32)
action_hot[np.arange(BATCH_SIZE), a] = 1
Q = iq(ns, tau)
theta_target = np.random.rand(BATCH_SIZE,N_QUANTILES)
theta_target = theta_target.astype('float32')
optimizer = tf.keras.optimizers.Adam(lr = 1e-3)
with tf.GradientTape() as tape:
loss = loss_kv_iq(Q, tau, action_hot, theta_target)
grads = tape.gradient(loss, iq.trainable_weights)
optimizer.apply_gradients(zip(grads,iq.trainable_weights))
Error:
Traceback (most recent call last):
File "C:\Users\rensj\.spyder-py3\Thesis\test.py", line 106, in <module>
optimizer.apply_gradients(zip(grads,iq.trainable_weights))
File "C:\Users\rensj\Anaconda3\envs\tfnew\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 426, in apply_gradients
grads_and_vars = _filter_grads(grads_and_vars)
File "C:\Users\rensj\Anaconda3\envs\tfnew\lib\site-packages\tensorflow_core\python\keras\optimizer_v2\optimizer_v2.py", line 1039, in _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['iqn_4/iq_nlayer_4/dense_16/kernel:0', 'iqn_4/iq_nlayer_4/dense_16/bias:0', 'iqn_4/iq_nlayer_4/dense_17/kernel:0', 'iqn_4/iq_nlayer_4/dense_17/bias:0', 'iqn_4/dense_18/kernel:0', 'iqn_4/dense_18/bias:0', 'iqn_4/dense_19/kernel:0', 'iqn_4/dense_19/bias:0'].
EDIT:
As mister Agrawal pointed out, I use numpy operation in pi_mtx. I changed these to their tensorflow counterparts, and together with some other small change to the same line, this becomes:
pi_mtx = tf.constant(tf.expand_dims(tf.constant(np.pi) * tf.range(0, 64, dtype=tf.float32), axis=0), dtype=tf.float32)
However, I keep having the same ValueError: No gradients provided
In the line
pi_mtx = tf.constant(np.expand_dims(np.pi * np.arange(0, 64), axis=0), dtype=tf.float32)
You're using numpy functions. Change them to their tensorflow counterparts.
np.expand_dims -> tf.expand_dims
np.arange -> tf.keras.backend.arange OR tf.range
You can use np.pi, since that is a constant, not an operation.
In the following code, I wanted to train a model in tensorflow. the model is a ResNet model, a deep one, hence the batch should be small for data/all activations to fit in memory. For this reason, I have implemented a custom optimizer that accumulates the gradients over the different fed mini-batchs, and finally apply the gradient descent once. In addition, I have used tf.data api to fetch data from tfrecords which I created. Please note that my input data are video frames; and the detected variable used indicated whether a face is detected in a certain frame or not. Hence, detected is used only for MSE (Just for clarification).
import tensorflow as tf
import numpy as np
import csv
import os
num_epoch = 100
latent_dim = 100
cell_size = 100
# for each input frame, I have 3 outputs.
num_classes = 3
common = "C:/Users/user/Documents/SEWA_db/tfrecords_db/"
filenames_train = []
filenames_dev = []
for i in range(1, 35):
filenames_train.append(common + "Train_DE_{num:02d}.tfrecords".format(num=i))
for i in range(1, 15):
filenames_dev.append(common + "Devel_DE_{num:02d}.tfrecords".format(num=i))
phase_train = tf.placeholder_with_default(True, shape=(), name='phase')
train_batch_size = 5
test_batch_size = 5
tf.set_random_seed(123)
mseed = 123
# this method is used within the model()...
def create_variables(name, shape, initializer=tf.contrib.layers.xavier_initializer(), weight_decay=0.0001):
'''
:param name: A string. The name of the new variable
:param shape: A list of dimensions
:param initializer: User Xavier as default.
:param is_fc_layer: Want to create fc layer variable? May use different weight_decay for fc
layers.
:return: The created variable
'''
## TODO: to allow different weight decay to fully connected layer and conv layer
regularizer = tf.contrib.layers.l2_regularizer(scale=weight_decay)
new_variables = tf.get_variable(name, shape=shape, initializer=initializer,
regularizer=regularizer)
return new_variables
def model(inputs, n):
....
# predictions shape: (batch_size, 3)
return predictions
# loss function:
summaries_while_testing = []
summaries_while_training = []
def loss(predictions, labels, detected, name_scope, train_test):
# MSE
with tf.name_scope(name_scope):
MSE = tf.square(tf.subtract(predictions, labels))
MSE = tf.boolean_mask(MSE, detected)
MSE = tf.reduce_mean(MSE)
if train_test == 'Train':
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
MSE += tf.reduce_sum(reg_losses)
loss_s = tf.summary.scalar('MSE', MSE)
summaries_while_training.append(loss_s)
else:
loss_s = tf.summary.scalar('MSE', MSE)
summaries_while_testing.append(loss_s)
return MSE
# optimizer:
def optimize(mse):
with tf.name_scope('Optimizer'):
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
trainable_variables = tf.trainable_variables()
accum_vars = [tf.Variable(tf.zeros_like(single_tr_variable.value()), trainable=False)
for single_tr_variable in trainable_variables]
# This is used as a rest mode between different training iterations...
zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars]
grads_vars = optimizer.compute_gradients(mse, trainable_variables)
accum_ops = [accum_vars[i].assign_add(gv[0]) for i, gv in enumerate(grads_vars) if gv[0] is not None]
train_step = optimizer.apply_gradients([(accum_vars[i], gv[1]) for i, gv in enumerate(grads_vars)])
return train_step, accum_ops, zero_ops
# retrieve data section
def _parse_function(example_proto):
# The annotation contains the following features: timestamp; arousal; valence; liking
features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'frame_number': tf.FixedLenFeature([1], tf.int64),
'detected': tf.FixedLenFeature([1], tf.int64),
'arousal': tf.FixedLenFeature([1], tf.float32),
'valence': tf.FixedLenFeature([1], tf.float32),
'liking': tf.FixedLenFeature([1], tf.float32)
}
parsed_features = tf.parse_single_example(example_proto, features)
# This is how we create one example, that is, extract one example from the database.
image = tf.decode_raw(parsed_features['image_raw'], tf.uint8)
# The height and the weights are used to
height = tf.cast(parsed_features['height'], tf.int32)
width = tf.cast(parsed_features['width'], tf.int32)
# The image is reshaped since when stored as a binary format, it is flattened. Therefore, we need the
# height and the weight to restore the original image back.
# Tensor("Reshape:0", shape=(112, 112, 3), dtype=uint8)
image = tf.reshape(image, [112, 112, 3])
detected = parsed_features['detected']
arousal = parsed_features['arousal']
valence = parsed_features['valence']
liking = parsed_features['liking']
return detected, arousal, valence, liking, image
############################### TRAINING ###################################
datasets_train_iterators = []
for file_name in filenames_train:
dataset_train = tf.data.TFRecordDataset(file_name).map(_parse_function).batch(train_batch_size)
datasets_train_iterators.append(dataset_train)
dataset_train_all = tf.data.Dataset.zip(tuple(datasets_train_iterators))
iterator_train_all = dataset_train_all.make_initializable_iterator()
def retrieve_inputs_train():
next_batch = iterator_train_all.get_next()
detected = []
arousal = []
valence = []
liking = []
images = []
for n in next_batch:
detected.append(n[0])
arousal.append(n[1])
valence.append(n[2])
liking.append(n[3])
images.append(n[4])
detected = tf.concat(detected, axis=0)
arousal = tf.concat(arousal, axis=0)
valence = tf.concat(valence, axis=0)
liking = tf.concat(liking, axis=0)
images = tf.concat(images, axis=0)
return detected, arousal, valence, liking, images
############################### TESTING ###################################
datasets_dev_iterators = []
for file_name in filenames_dev:
dataset_dev = tf.data.TFRecordDataset(file_name).map(_parse_function).batch(test_batch_size)
datasets_dev_iterators.append(dataset_dev)
dataset_dev_all = tf.data.Dataset.zip(tuple(datasets_dev_iterators))
iterator_dev_all = dataset_dev_all.make_initializable_iterator()
def retrieve_inputs_dev():
next_batch = iterator_dev_all.get_next()
detected = []
arousal = []
valence = []
liking = []
images = []
for n in next_batch:
detected.append(n[0])
arousal.append(n[1])
valence.append(n[2])
liking.append(n[3])
images.append(n[4])
detected = tf.concat(detected, axis=0)
arousal = tf.concat(arousal, axis=0)
valence = tf.concat(valence, axis=0)
liking = tf.concat(liking, axis=0)
images = tf.concat(images, axis=0)
return detected, arousal, valence, liking, images
# preparing model before training
detected, arousal, valence, liking, images = tf.cond(phase_train,
lambda: retrieve_inputs_train(),
lambda: retrieve_inputs_dev())
images_casted = tf.cast(images, tf.float32)
with tf.name_scope('image_normal'):
images_casted_normalized = tf.map_fn(lambda img: tf.image.per_image_standardization(img), images_casted)
# shape of predictions: (680, 3) -> 3 since we are outputing arousal, valence and liking
# the n parameter is for Resnet configuration... Not important for now
predictions = model(images_casted_normalized, n=[3, 4, 6, 3])
predicted_arousal = tf.slice(predictions, begin=[0, 0], size=[-1, 1], name='predicted_arousal')
predicted_valence = tf.slice(predictions, begin=[0, 1], size=[-1, 1], name='predicted_valence')
predicted_liking = tf.slice(predictions, begin=[0, 2], size=[-1, 1], name='predicted_liking')
MSE_a = tf.cond(phase_train,
lambda: loss(predicted_arousal, arousal, detected, 'MSE_arousal_Train', 'Train'),
lambda: loss(predicted_arousal, arousal, detected, 'MSE_arousal_Devel', 'Devel'))
MSE_v = tf.cond(phase_train,
lambda: loss(predicted_valence, valence, detected, 'MSE_valence_Train', 'Train'),
lambda: loss(predicted_valence, valence, detected, 'MSE_valence_Devel', 'Devel'))
MSE_l = tf.cond(phase_train,
lambda: loss(predicted_liking, liking, detected, 'MSE_liking_Train', 'Train'),
lambda: loss(predicted_liking, liking, detected, 'MSE_liking_Devel', 'Devel'))
MSE = MSE_a + MSE_v + MSE_l
train_step, accum_ops, zero_ops = optimize(MSE)
init_op = tf.global_variables_initializer()
model_path = "C:/Users/user/Documents/f24/model"
events_path = "C:/Users/user/Documents/f24/event_files/34_layers"
with tf.Session() as sess:
sess.run(init_op)
train_writer = tf.summary.FileWriter(events_path, sess.graph)
merged_train = tf.summary.merge(summaries_while_training)
merged_val = tf.summary.merge(summaries_while_testing)
sess.run(iterator_train_all.initializer)
sess.run(iterator_dev_all.initializer)
Finally, I am getting the following error:
FailedPreconditionError: Attempting to use uninitialized value conv3_1/conv2_in_block/conv
[[Node: conv3_1/conv2_in_block/conv/read = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](conv3_1/conv2_in_block/conv)]]
During handling of the above exception, another exception occurred:
FailedPreconditionError Traceback (most recent call last)
<ipython-input-11-dbe6d12c67ce> in <module>()
7
8 for v in accum_vars:
----> 9 sess.run(v.initializer)
10
11 sess.run(init_op)
...
File "<ipython-input-10-8d7d7b4aa814>", line 10, in <module>
predictions = model(images_casted_normalized, n=[3, 4, 6, 3])
File "<ipython-input-5-fae307f9536f>", line 25, in model
conv3 = residual_block(layers[-1], 256, is_training=phase_train)
File "<ipython-input-4-d8a2d1403f18>", line 97, in residual_block
conv2 = bn_relu_conv_layer(conv1, [3, 3, output_channel, output_channel], 1, is_training=is_training)
File "<ipython-input-4-d8a2d1403f18>", line 61, in bn_relu_conv_layer
filter = create_variables(name='conv', shape=filter_shape)
File "<ipython-input-4-d8a2d1403f18>", line 15, in create_variables
regularizer=regularizer)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1317, in get_variable
constraint=constraint)
Now when I remove these 2 lines in the optimize(), my code works fine, but I know that this is wrong.
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
Or, if I use the following code for the optimizer, my code runs fine.
def optimize(mse):
with tf.name_scope('Optimizer'):
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_step = optimizer.minimize(mse)
return train_step
To me, this is weird and strange. I would love to know the reason for the error I am getting.
Any help is much appreciated!!
I have the following code
flags = tf.flags
logging = tf.logging
flags.DEFINE_string('model', 'small',
'A type of model. Possible options are: small, medium, large.'
)
flags.DEFINE_string('data_path', None, 'data_path')
flags.DEFINE_string('checkpoint_dir', 'ckpt', 'checkpoint_dir')
flags.DEFINE_bool('use_fp16', False,
'Train using 16-bit floats instead of 32bit floats')
flags.DEFINE_bool('train', False, 'should we train or test')
FLAGS = flags.FLAGS
def data_type():
return tf.float16 if FLAGS.use_fp16 else tf.float32
class PTBModel(object):
"""The PTB model."""
def __init__(self, is_training, config):
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
size = config.hidden_size
vocab_size = config.vocab_size
self._input_data = tf.placeholder(tf.float32, [batch_size,
num_steps])
self._targets = tf.placeholder(tf.int32, [batch_size,
num_steps])
# Slightly better results can be obtained with forget gate biases
# initialized to 1 but the hyperparameters of the model would need to be
# different than reported in the paper.
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=0.0,
state_is_tuple=True)
if is_training and config.keep_prob < 1:
lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell,
output_keep_prob=config.keep_prob)
cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell]
* config.num_layers, state_is_tuple=True)
self._initial_state = cell.zero_state(batch_size, data_type())
with tf.device('/cpu:0'):
embedding = tf.get_variable('embedding', [vocab_size,
size], dtype=data_type())
inputs = tf.nn.embedding_lookup(embedding, self._input_data)
if is_training and config.keep_prob < 1:
inputs = tf.nn.dropout(inputs, config.keep_prob)
# Simplified version of tensorflow.models.rnn.rnn.py's rnn().
# This builds an unrolled LSTM for tutorial purposes only.
# In general, use the rnn() or state_saving_rnn() from rnn.py.
#
# The alternative version of the code below is:
#
# from tensorflow.models.rnn import rnn
inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(inputs, num_steps, axis=1)]
(outputs, state) = tf.nn.rnn(cell, inputs, initial_state=self._initial_state)
# outputs = []
# state = self._initial_state
# with tf.variable_scope("RNN"):
# for time_step in range(num_steps):
# if time_step > 0: tf.get_variable_scope().reuse_variables()
# (cell_output, state) = cell(inputs[:, time_step, :], state)
# outputs.append(cell_output)
output = tf.reshape(tf.concat(outputs, axis=1), [-1, size])
softmax_w = tf.get_variable('softmax_w', [size, vocab_size],
dtype=data_type())
softmax_b = tf.get_variable('softmax_b', [vocab_size],
dtype=data_type())
logits = tf.matmul(output, softmax_w) + softmax_b
loss = tf.nn.seq2seq.sequence_loss_by_example([logits],
[tf.reshape(self._targets, [-1])], [tf.ones([batch_size
* num_steps],
dtype=data_type())])
self._cost = cost = tf.reduce_sum(loss) / batch_size
self._final_state = state
# RANI
self.logits = logits
if not is_training:
return
self._lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
(grads, _) = tf.clip_by_global_norm(tf.gradients(cost, tvars),
config.max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(self._lr)
self._train_op = optimizer.apply_gradients(zip(grads, tvars))
self._new_lr = tf.placeholder(tf.float32, shape=[],
name='new_learning_rate')
self._lr_update = tf.assign(self._lr, self._new_lr)
def assign_lr(self, session, lr_value):
session.run(self._lr_update, feed_dict={self._new_lr: lr_value})
...
However, When I run it, I get the following errors
File "ptb_word_lm.py", line 349, in <module>
tf.app.run()
File "C:\Users\Josh Goldman\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\platform\app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "ptb_word_lm.py", line 299, in main
m = PTBModel(is_training=True, config=config)
File "ptb_word_lm.py", line 60, in __init__
inputs = tf.nn.embedding_lookup(embedding, self._input_data)
File "C:\Users\Josh Goldman\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\embedding_ops.py", line 122, in embedding_lookup
return maybe_normalize(_do_gather(params[0], ids, name=name))
File "C:\Users\Josh Goldman\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\embedding_ops.py", line 42, in _do_gather
return array_ops.gather(params, ids, name=name)
File "C:\Users\Josh Goldman\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 1179, in gather
validate_indices=validate_indices, name=name)
File "C:\Users\Josh Goldman\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 589, in apply_op
param_name=input_name)
File "C:\Users\Josh Goldman\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 60, in _SatisfiesTypeConstraint
", ".join(dtypes.as_dtype(x).name for x in allowed_list)))
TypeError: Value passed to parameter 'indices' has DataType float32 not in list of allowed values: int32, int64
Someone, please help me. I have all my packages upgraded to the newest version. I'm using the correct interpreter. I'm sorry if the error is very simple. I'm only 13 and am very new to programming. By the way, this code is not mine; I got it from Github.
The error is due to tensorflow version, syntax of tf.split is changed in the newer version. there is another same problem with tf.concat
# replace this line with the following one
inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(1, num_steps, inputs)]
# this support `tensorflow >= 1.0.0`
inputs = [tf.squeeze(input_, [1]) for input_ in tf.split(inputs, num_steps, axis=1)]
# Also use dtype float32 for inputs
self._input_data = tf.placeholder(tf.float32, [batch_size,
num_steps])
# replace this line
output = tf.reshape(tf.concat(1, outputs), [-1, size])
# with this one
output = tf.reshape(tf.concat(outputs, axis=1), [-1, size])