Related
I am new to deep learning and tensorflow. I have a following code. Whenever I run this code my system administrator notifies me that my code is running in CPU and not GPU even thought we have GPU in the system and I have only installed tensorflow-gpu. What changes should I make to my code so that it runs in GPU and not CPU?
import math
import tempfile
import numpy as np
from tensorflow.python.keras.layers import BatchNormalization, Conv2D, Dense, Flatten, MaxPooling2D
from tensorflow.python.keras.models import Sequential
import fastestimator as fe
from fastestimator.dataset.data import cifair10
from fastestimator.architecture.tensorflow import WideResidualNetwork
from fastestimator.op.numpyop.meta import Sometimes
from fastestimator.op.numpyop.multivariate import HorizontalFlip, PadIfNeeded, RandomCrop
from fastestimator.op.numpyop.univariate import CoarseDropout, Normalize
from fastestimator.op.tensorop.loss import CrossEntropy, SuperLoss
from fastestimator.op.tensorop.model import ModelOp, UpdateOp
from fastestimator.trace.io import BestModelSaver
from fastestimator.trace.metric import MCC, Accuracy
from fastestimator.trace.xai import LabelTracker
#training parameters
epochs = 100
batch_size = 128
max_train_steps_per_epoch = None
max_eval_steps_per_epoch = None
save_dir = tempfile.mkdtemp()
train_data, eval_data = cifair10.load_data()
test_data = eval_data.split(0.5)
def corrupt_dataset(dataset, n_classes=10, corruption_fraction=0.4):
# Keep track of which samples were corrupted for visualization later
corrupted = [0 for _ in range(len(dataset))]
# Perform the actual label corruption
n_samples_per_class = len(dataset) // n_classes # dataset size 50000
# n_classes - 100
# n_samples_per_class - 500
n_to_corrupt_per_class = math.floor(corruption_fraction * n_samples_per_class) # 200
n_corrupted = [0] * n_classes
i = 0
while any([elem < n_to_corrupt_per_class for elem in n_corrupted]): # while any class is left to be corrupted
current_class = dataset[i]['y'].item()
if n_corrupted[current_class] < n_to_corrupt_per_class: #check the number of corrupted data of a particular class has reached 200 or not
dataset[i]['y'] = (dataset[i]['y'] + np.random.randint(1, n_classes)) % n_classes # change the y value of a dataset
n_corrupted[current_class] += 1
corrupted[i] = 1
i += 1
# Put the corruption labels into the dataset for visualization
dataset['data_labels'] = np.array(corrupted, dtype=np.int).reshape((len(dataset), 1))
corrupt_dataset(train_data)
def get_wrn():
return WideResidualNetwork((32, 32, 3))
def build_estimator(loss_op):
pipeline = fe.Pipeline(train_data=train_data,
eval_data=eval_data,
test_data=test_data,
batch_size=batch_size,
ops=[Normalize(inputs="x", outputs="x", mean=(0.4914, 0.4822, 0.4465), std=(0.2471, 0.2435, 0.2616)),
PadIfNeeded(min_height=40, min_width=40, image_in="x", image_out="x", mode="train"),
RandomCrop(32, 32, image_in="x", image_out="x", mode="train"),
Sometimes(HorizontalFlip(image_in="x", image_out="x", mode="train")),
CoarseDropout(inputs="x", outputs="x", max_holes=1, mode="train"),
])
model = fe.build(model_fn=get_wrn, optimizer_fn='adam')
network = fe.Network(ops=[
ModelOp(model=model, inputs="x", outputs="y_pred"),
loss_op, # <<<----------------------------- This is where the secret sauce will go
UpdateOp(model=model, loss_name="ce")
])
traces = [
Accuracy(true_key="y", pred_key="y_pred"),
MCC(true_key="y", pred_key="y_pred"),
BestModelSaver(model=model, save_dir=save_dir, metric="mcc", save_best_mode="max", load_best_final=True),
# We will also visualize the difference between the normal and corrupted image confidence scores. You could follow this with an
# ImageViewer trace, but we will get the data out of the system summary instead later for viewing.
LabelTracker(metric="confidence", label="data_labels", label_mapping={"Normal": 0, "Corrupted": 1}, mode="train", outputs="label_confidence"),
]
estimator = fe.Estimator(pipeline=pipeline,
network=network,
epochs=epochs,
traces=traces,
train_steps_per_epoch=max_train_steps_per_epoch,
eval_steps_per_epoch=max_eval_steps_per_epoch,
log_steps=300)
return estimator
loss = SuperLoss(CrossEntropy(inputs=("y_pred", "y"), outputs="ce"), output_confidence="confidence") # The output_confidence arg is only needed if you want to visualize
estimator_super = build_estimator(loss)
superL = estimator_super.fit("SuperLoss")
print("before test")
summary = estimator_super.test()
print("after test")
print(summary.history["test"])
Here, I have LSTM Autoencoder written in Keras. I want to convert the code to Chainer.
import numpy as np
from keras.layers import Input, GRU
from keras.models import Model
input_feat = Input(shape=(30, 2000))
l = GRU( 100, return_sequences=True, activation="tanh", recurrent_activation="hard_sigmoid")(input_feat)
l = GRU(2000, return_sequences=True, activation="tanh", recurrent_activation="hard_sigmoid")(l)
model = Model(input_feat, l)
model.compile(optimizer="RMSprop", loss="mean_squared_error")
feat = np.load("feat.npy")
model.fit(feat, feat[:, ::-1, :], epochs=200, batch_size=250)
feat is numpy whose dimension is (269, 30, 2000). I could run above code and the result was reasonable. I had written below Chainer code.
import numpy as np
from chainer import Chain, Variable, optimizers
import chainer.functions as F
import chainer.links as L
class GRUAutoEncoder(Chain):
def __init__(self):
super().__init__()
with self.init_scope():
self.encode = L.GRU(2000, 100)
self.decode = L.GRU(100, 2000)
def __call__(self, h, mode):
if mode == "encode":
h = F.tanh(self.encode(h))
return h
if mode == "decode":
h = F.tanh(self.decode(h))
return h
def reset(self):
self.encode.reset_state()
self.decode.reset_state()
def main():
feat = np.load("feat.npy") #(269, 30, 2000)
gru_autoencoder = GRUAutoEncoder()
optimizer = optimizers.RMSprop(lr=0.01).setup(gru_autoencoder)
N = len(feat)
batch_size = 250
for epoch in range(200):
index = np.random.randint(0, N-batch_size+1)
input_splices = feat[index:index+batch_size] #(250, 30, 2000)
#Encoding
input_vector = np.zeros((30, batch_size, 2000), dtype="float32")
h = []
for i in range(frame_rate):
input_vector[i] = input_splices[:, i, :] #(250, 1, 2000)
tmp = Variable(input_vector[i])
h.append(gru_autoencoder(tmp, "encode")) #(250, 100)
#Decoding
output_vector = []
for i in range(frame_rate):
tmp = h[i]
output_vector.append(gru_autoencoder(tmp, "decode"))
x = input_vector[0]
t = output_vector[0]
for i in range(len(output_vector)):
x = F.concat((x,input_vector[i]), axis=1)
t = F.concat((t,output_vector[i]), axis=1)
loss = F.mean_squared_error(x, t)
gru_autoencoder.cleargrads()
loss.backward()
optimizer.update()
gru_autoencoder.reset()
if __name__ == "__main__":
main()
But the result of above code was not reasonable. I think the Chainer code has something wrong but I cannot find where it is.
In Keras code,
model.fit(feat, feat[:, ::-1, :])
So, I tried to reverse output_vector in Chainer code,
output_vector.reverse()
but the result was not still reasonable.
.. note: This answer is a translation of [Japanese SO].(https://ja.stackoverflow.com/questions/52162/keras%E3%81%AE%E3%82%B3%E3%83%BC%E3%83%89%E3%82%92chainer%E3%81%AB%E6%9B%B8%E3%81%8D%E6%8F%9B%E3%81%88%E3%81%9F%E3%81%84lstm-autoencoder%E3%81%AE%E5%AE%9F%E8%A3%85/52213#52213)
You should avoid using L.GRU and should use L.NStepGRU, because for L.GRU you have to write "recurrence-aware" code. In other words, you have to apply L.GRU multiple times to one timeseries, therefore "batch" must be treated with great care. L.NStepGRU (with n_layers=1) wraps the batch-processing, so it would be user-friendly.
An instance of L.StepGRU takes two input arguments: one is initial state, and the other is a list of timeserieses, which composes a batch. Conventionally, the initial state is None.
Therefore, the whole answer for your question is as follows.
### dataset.py
from chainer.dataset import DatasetMixin
import numpy as np
class MyDataset(DatasetMixin):
N_SAMPLES = 269
N_TIMESERIES = 30
N_DIMS = 2000
def __init__(self):
super().__init__()
self.data = np.random.randn(self.N_SAMPLES, self.N_TIMESERIES, self.N_DIMS) \
.astype(np.float32)
def __len__(self):
return self.N_SAMPLES
def get_example(self, i):
return self.data[i, :, :]
### model.py
import chainer
from chainer import links as L
from chainer import functions as F
from chainer.link import Chain
class MyModel(Chain):
N_IN_CHANNEL = 2000
N_HIDDEN_CHANNEL = 100
N_OUT_CHANNEL = 2000
def __init__(self):
super().__init__()
self.encoder = L.NStepGRU(n_layers=1, in_size=self.N_IN_CHANNEL, out_size=self.N_HIDDEN_CHANNEL, dropout=0)
self.decoder = L.NStepGRU(n_layers=1, in_size=self.N_HIDDEN_CHANNEL, out_size=self.N_OUT_CHANNEL, dropout=0)
def to_gpu(self, device=None):
self.encoder.to_gpu(device)
self.decoder.to_gpu(device)
def to_cpu(self):
self.encoder.to_cpu()
self.decoder.to_cpu()
#staticmethod
def flip_list(source_list):
return [F.flip(source, axis=1) for source in source_list]
def __call__(self, source_list):
"""
.. note:
This implementation makes use of "auto-encoding"
by avoiding redundant copy in GPU device.
In the typical implementation, this function should receive
both of ``source_list`` and ``target_list``.
"""
target_list = self.flip_list(source_list)
_, h_list = self.encoder(hx=None, xs=source_list)
_, predicted_list = self.decoder(hx=None, xs=h_list)
diff_list = [F.mean_squared_error(target, predicted).reshape((1,)) for target, predicted in zip(target_list, predicted_list)]
loss = F.sum(F.concat(diff_list, axis=0))
chainer.report({'loss': loss}, self)
return loss
### converter.py (referring examples/seq2seq/seq2seq.py)
from chainer.dataset import to_device
def convert(batch, device):
"""
.. note:
batch must be list(batch_size) of array
"""
if device is None:
return batch
else:
return [to_device(device, x) for x in batch]
### train.py
from chainer.iterators import SerialIterator
from chainer.optimizers import RMSprop
from chainer.training.updaters import StandardUpdater
from chainer.training.trainer import Trainer
dataset = MyDataset()
BATCH_SIZE = 32
iterator = SerialIterator(dataset, BATCH_SIZE)
model = MyModel()
optimizer = RMSprop()
optimizer.setup(model)
updater = StandardUpdater(iterator, optimizer, convert, device=0)
trainer = Trainer(updater, (100, 'iteration'))
from chainer.training.extensions import snapshot_object
trainer.extend(snapshot_object(model, "model_iter_{.updater.iteration}"), trigger=(10, 'iteration'))
from chainer.training.extensions import LogReport, PrintReport, ProgressBar
trainer.extend(LogReport(['epoch', 'iteration', 'main/loss'], (1, 'iteration')))
trainer.extend(PrintReport(['epoch', 'iteration', 'main/loss']), trigger=(1, 'iteration'))
trainer.extend(ProgressBar(update_interval=1))
trainer.run()
I'm new to python and tensorflow. I'm now testing Improved WGAN code from https://github.com/igul222/improved_wgan_training
After adjusting the code to python 3.6, it still gives "NameError: name 'train_gen' is not defined" when I ran it, although there wasn't warning from pylint.
Can anyone help me with it?
The version of python I'm using is 3.6. There were many syntax differences from 2.7. I've already changed a lot to make it work. And I am running Tensorflow in a virtual environment. Still couldn't figure out this one.
import os, sys
sys.path.append(os.getcwd())
import time
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import sklearn.datasets
import tensorflow as tf
import tflib as lib
import tflib.ops.linear
import tflib.ops.conv2d
import tflib.ops.batchnorm
import tflib.ops.deconv2d
import tflib.save_images
import tflib.mnist
import tflib.plot
MODE = 'wgan-gp' # dcgan, wgan, or wgan-gp
DIM = 64 # Model dimensionality
BATCH_SIZE = 50 # Batch size
CRITIC_ITERS = 5 # For WGAN and WGAN-GP, number of critic iters per gen iter
LAMBDA = 10 # Gradient penalty lambda hyperparameter
ITERS = 200000 # How many generator iterations to train for
OUTPUT_DIM = 784 # Number of pixels in MNIST (28*28)
lib.print_model_settings(locals().copy())
def LeakyReLU(x, alpha=0.2):
return tf.maximum(alpha*x, x)
def ReLULayer(name, n_in, n_out, inputs):
output = lib.ops.linear.Linear(
name+'.Linear',
n_in,
n_out,
inputs,
initialization='he'
)
return tf.nn.relu(output)
def LeakyReLULayer(name, n_in, n_out, inputs):
output = lib.ops.linear.Linear(
name+'.Linear',
n_in,
n_out,
inputs,
initialization='he'
)
return LeakyReLU(output)
def Generator(n_samples, noise=None):
if noise is None:
noise = tf.random_normal([n_samples, 128])
output = lib.ops.linear.Linear('Generator.Input', 128, 4*4*4*DIM, noise)
if MODE == 'wgan':
output = lib.ops.batchnorm.Batchnorm('Generator.BN1', [0], output)
output = tf.nn.relu(output)
output = tf.reshape(output, [-1, 4*DIM, 4, 4])
output = lib.ops.deconv2d.Deconv2D('Generator.2', 4*DIM, 2*DIM, 5, output)
if MODE == 'wgan':
output = lib.ops.batchnorm.Batchnorm('Generator.BN2', [0,2,3], output)
output = tf.nn.relu(output)
output = output[:,:,:7,:7]
output = lib.ops.deconv2d.Deconv2D('Generator.3', 2*DIM, DIM, 5, output)
if MODE == 'wgan':
output = lib.ops.batchnorm.Batchnorm('Generator.BN3', [0,2,3], output)
output = tf.nn.relu(output)
output = lib.ops.deconv2d.Deconv2D('Generator.5', DIM, 1, 5, output)
output = tf.nn.sigmoid(output)
return tf.reshape(output, [-1, OUTPUT_DIM])
def Discriminator(inputs):
output = tf.reshape(inputs, [-1, 1, 28, 28])
output = lib.ops.conv2d.Conv2D('Discriminator.1',1,DIM,5,output,stride=2)
output = LeakyReLU(output)
output = lib.ops.conv2d.Conv2D('Discriminator.2', DIM, 2*DIM, 5, output, stride=2)
if MODE == 'wgan':
output = lib.ops.batchnorm.Batchnorm('Discriminator.BN2', [0,2,3], output)
output = LeakyReLU(output)
output = lib.ops.conv2d.Conv2D('Discriminator.3', 2*DIM, 4*DIM, 5, output, stride=2)
if MODE == 'wgan':
output = lib.ops.batchnorm.Batchnorm('Discriminator.BN3', [0,2,3], output)
output = LeakyReLU(output)
output = tf.reshape(output, [-1, 4*4*4*DIM])
output = lib.ops.linear.Linear('Discriminator.Output', 4*4*4*DIM, 1, output)
return tf.reshape(output, [-1])
real_data = tf.placeholder(tf.float32, shape=[BATCH_SIZE, OUTPUT_DIM])
fake_data = Generator(BATCH_SIZE)
disc_real = Discriminator(real_data)
disc_fake = Discriminator(fake_data)
gen_params = lib.params_with_name('Generator')
disc_params = lib.params_with_name('Discriminator')
if MODE == 'wgan':
gen_cost = -tf.reduce_mean(disc_fake)
disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
gen_train_op = tf.train.RMSPropOptimizer(
learning_rate=5e-5
).minimize(gen_cost, var_list=gen_params)
disc_train_op = tf.train.RMSPropOptimizer(
learning_rate=5e-5
).minimize(disc_cost, var_list=disc_params)
clip_ops = []
for var in lib.params_with_name('Discriminator'):
clip_bounds = [-.01, .01]
clip_ops.append(
tf.assign(
var,
tf.clip_by_value(var, clip_bounds[0], clip_bounds[1])
)
)
clip_disc_weights = tf.group(*clip_ops)
elif MODE == 'wgan-gp':
gen_cost = -tf.reduce_mean(disc_fake)
disc_cost = tf.reduce_mean(disc_fake) - tf.reduce_mean(disc_real)
alpha = tf.random_uniform(
shape=[BATCH_SIZE,1],
minval=0.,
maxval=1.
)
differences = fake_data - real_data
interpolates = real_data + (alpha*differences)
gradients = tf.gradients(Discriminator(interpolates), [interpolates])[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
gradient_penalty = tf.reduce_mean((slopes-1.)**2)
disc_cost += LAMBDA*gradient_penalty
gen_train_op = tf.train.AdamOptimizer(
learning_rate=1e-4,
beta1=0.5,
beta2=0.9
).minimize(gen_cost, var_list=gen_params)
disc_train_op = tf.train.AdamOptimizer(
learning_rate=1e-4,
beta1=0.5,
beta2=0.9
).minimize(disc_cost, var_list=disc_params)
clip_disc_weights = None
elif MODE == 'dcgan':
gen_cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
disc_fake,
tf.ones_like(disc_fake)
))
disc_cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
disc_fake,
tf.zeros_like(disc_fake)
))
disc_cost += tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
disc_real,
tf.ones_like(disc_real)
))
disc_cost /= 2.
gen_train_op = tf.train.AdamOptimizer(
learning_rate=2e-4,
beta1=0.5
).minimize(gen_cost, var_list=gen_params)
disc_train_op = tf.train.AdamOptimizer(
learning_rate=2e-4,
beta1=0.5
).minimize(disc_cost, var_list=disc_params)
clip_disc_weights = None
# For saving samples
fixed_noise = tf.constant(np.random.normal(size=(128, 128)).astype('float32'))
fixed_noise_samples = Generator(128, noise=fixed_noise)
def generate_image(frame, true_dist):
samples = session.run(fixed_noise_samples)
lib.save_images.save_images(
samples.reshape((128, 28, 28)),
'samples_{}.png'.format(frame)
)
# Dataset iterator
train_gen, dev_gen, test_gen = lib.mnist.load(BATCH_SIZE, BATCH_SIZE)
def inf_train_gen():
while True:
for images, targets in train_gen():
yield images
# Train loop
with tf.Session() as session:
session.run(tf.initialize_all_variables())
gen = inf_train_gen()
for iteration in range(ITERS):
start_time = time.time()
if iteration > 0:
_ = session.run(gen_train_op)
if MODE == 'dcgan':
disc_iters = 1
else:
disc_iters = CRITIC_ITERS
for i in range(disc_iters):
_data = gen.__next__()
_disc_cost, _ = session.run(
[disc_cost, disc_train_op],
feed_dict={real_data: _data}
)
if clip_disc_weights is not None:
_ = session.run(clip_disc_weights)
lib.plot.plot('train disc cost', _disc_cost)
lib.plot.plot('time', time.time() - start_time)
# Calculate dev loss and generate samples every 100 iters
if iteration % 100 == 99:
dev_disc_costs = []
for images,_ in dev_gen():
_dev_disc_cost = session.run(
disc_cost,
feed_dict={real_data: images}
)
dev_disc_costs.append(_dev_disc_cost)
lib.plot.plot('dev disc cost', np.mean(dev_disc_costs))
generate_image(iteration, _data)
# Write logs every 100 iters
if (iteration < 5) or (iteration % 100 == 99):
lib.plot.flush()
lib.plot.tick()
This is the section containing the error name.
# Dataset iterator
train_gen, dev_gen, test_gen = lib.mnist.load(BATCH_SIZE, BATCH_SIZE)
def inf_train_gen():
while True:
for images, targets in train_gen():
yield images
And here is the error.
Traceback (most recent call last):
File "<stdin>", line 13, in <module>
File "<stdin>", line 3, in inf_train_gen
NameError: name 'train_gen' is not defined
Attempt 1:
I believe it's just because you are saying for images, targets in train_gen(): when you should be saying for images, targets in train_gen:
In a nutshell, the brackets suggest you are calling a function, which leads Python to raise the exception NameError: name 'train_gen' is not defined because there is no function with the name train_gen defined.
In the future, your code should be minimal, because you have pasted an enormous amount of code which makes it very hard to debug/see what you're doing.
Attempt 2:
Upon second review of the code (this is a good reason why you need to make your examples as small as possible), I have realised that is is possible you are maybe importing this code from elsewhere?
When you are making the first assignment to train_gen this is outside the function scope. It is possible then that when you go to call the function train_gen is no longer defined, which is why you get your error. This can occur for a number of reasons. After having reviewed the code a little there are various issues I can see (bad practice mostly).
It is generally not a good idea to use global variables within a function as you have in inf_train_gen, if a function needs an argument to run properly, then it should be passed as an argument. This is because if we have a problem with a variable (as we do now) we can normally see where this variable comes from and what uses it, but if all function rely on the globally scoped variable, any number of functions could delete it, change it, etc.
Right now I have no idea what has happened to the variable train_gen, I would suggest printing out the variable at different intervals and seeing if you can see which function call is causing issues and in the future stay away from globally scoped variables unless absolutely necessary, it makes it near-impossible to debug.
I am currently playing around and learning about distributed tensorflow.
I recently created a cluster with One GPU server(two cards) - One CPU server
I was browsing through various articles and in the TensorFlow distributed guide I saw that distribution happened across cards by explicitly calling them with names.
https://github.com/tensorflow/models/blob/master/tutorials/image/cifar10/cifar10_multi_gpu_train.py
but here no cluster are being created.
Can i create a TensorFlow cluster and then specify which card the code should run on?
If yes, does the below look correct?
In one github question who's link i dont have right now but the code below, the card is specified under with tf.device(replica_device_setter) but when i try to do that my code throws an error stating "Cannot assign a device for operation 'dummy_queue_Close_1': Could not satisfy explicit device specification '/job:ps/task:0/device:GPU:0' because no supported kernel for GPU devices is available."
Is this because i am assinging tasks which were supposed to happen on a CPU but instead as i gave with tf.device('/gpu:0/') it throws the error ?
Also I cant share my official code but it looks very similar to the below code which i took for reference.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy
import tensorflow as tf
tf.app.flags.DEFINE_string("ps_hosts", "localhost:2222", "...")
tf.app.flags.DEFINE_string("worker_hosts", "localhost:2223", "...")
tf.app.flags.DEFINE_string("job_name", "", "...")
tf.app.flags.DEFINE_integer("task_index", 0, "...")
tf.app.flags.DEFINE_integer('gpu_cards', 4, 'Number of GPU cards in a machine to use.')
FLAGS = tf.app.flags.FLAGS
def dense_to_one_hot(labels_dense, num_classes = 10) :
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = numpy.arange(num_labels) * num_classes
labels_one_hot = numpy.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
def run_training(server, cluster_spec, num_workers) :
is_chief = (FLAGS.task_index == 0)
with tf.Graph().as_default():
with tf.device(tf.train.replica_device_setter(cluster = cluster_spec)) :
with tf.device('/cpu:0') :
global_step = tf.get_variable('global_step', [],
initializer = tf.constant_initializer(0), trainable = False)
with tf.device('/gpu:%d' % (FLAGS.task_index % FLAGS.gpu_cards)) :
# Create the model
x = tf.placeholder("float", [None, 784])
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
# Define loss and optimizer
y_ = tf.placeholder("float", [None, 10])
cross_entropy = -tf.reduce_sum(y_ * tf.log(y))
opt = tf.train.GradientDescentOptimizer(0.01)
opt = tf.train.SyncReplicasOptimizer(opt, replicas_to_aggregate = num_workers,
replica_id = FLAGS.task_index, total_num_replicas = num_workers)
train_step = opt.minimize(cross_entropy, global_step = global_step)
# Test trained model
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
init_token_op = opt.get_init_tokens_op()
chief_queue_runner = opt.get_chief_queue_runner()
init = tf.initialize_all_variables()
sv = tf.train.Supervisor(is_chief = is_chief,
init_op = init,
global_step = global_step)
# Create a session for running Ops on the Graph.
config = tf.ConfigProto(allow_soft_placement = True)
sess = sv.prepare_or_wait_for_session(server.target, config = config)
if is_chief:
sv.start_queue_runners(sess, [chief_queue_runner])
sess.run(init_token_op)
for i in range(100000):
source_data = numpy.random.normal(loc = 0.0, scale = 1.0, size = (100, 784))
labels_dense = numpy.clip(numpy.sum(source_data, axis = 1) / 5 + 5, 0, 9).astype(int)
labels_one_hot = dense_to_one_hot(labels_dense)
_, cost, acc, step = sess.run([train_step, cross_entropy, accuracy, global_step], feed_dict = { x: source_data, y_ : labels_one_hot })
print("[%d]: cost=%.2f, accuracy=%.2f" % (step, cost, acc))
def main(_) :
ps_hosts = FLAGS.ps_hosts.split(",")
worker_hosts = FLAGS.worker_hosts.split(",")
num_workers = len(worker_hosts)
print("gup_cards=%d; num_worders=%d" % (FLAGS.gpu_cards, num_workers))
cluster_spec = tf.train.ClusterSpec({ "ps":ps_hosts, "worker" : worker_hosts })
server = tf.train.Server(cluster_spec, job_name = FLAGS.job_name, task_index = FLAGS.task_index)
if FLAGS.job_name == "ps":
server.join()
elif FLAGS.job_name == "worker" :
run_training(server, cluster_spec, num_workers)
if __name__ == '__main__' :
tf.app.run()
I found a way to do this it sounds very simple, and it is very simple.
I created a TensorFlow cluster in the same way and passed the n_workers parameter to the cluster, and I called different instances of the code with an extra parameter for CUDA_VISIBLE_DEVICES.
CUDA_VISIBLE_DEVICES is an environment variable which can be used to restrict the vision of TensorFlow or any DL framework to a limited number of cards.
CUDA_VISIBLE_DEVICES value can range from -1 to n (where n is the number of GPUs).
-1 indicates no cards to use
n indicates nth card to use
I hope someone who is looking for a similar answer can find this useful.
My following code works in TF 0.9.0, but in TF 1.1 it shows the following error:
tensorflow.python.framework.errors_impl.OutOfRangeError: RandomShuffleQueue '_1_input/batching_shuffling/random_shuffle_queue' is closed and has insufficient elements (requested 30, current size 0)
[[Node: input/batching_shuffling = QueueDequeueManyV2[component_types=[DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/cpu:0"](input/batching_shuffling/random_shuffle_queue, input/batching_shuffling/n)]]
I cannot figure out what I need to fix to make it work with TF 1.1. Can you please provide me some suggestions?
Here is my code. Sorry it is a little bit long.
import numpy as np
import tensorflow as tf
import glob
import os
tf_record_dir = os.getcwd() + '/'
batch_size = 30
num_epochs = 100
filename = os.path.join(tf_record_dir, 'train.tfrecords')
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def gen_tf_record_files():
print ('Writing', filename)
writer = tf.python_io.TFRecordWriter(filename)
for index in range(1000):
example = tf.train.Example(features=tf.train.Features(feature={'image_id': _int64_feature(index)
}))
writer.write(example.SerializeToString())
writer.close()
def inputs():
tfRec_files_list = glob.glob(filename)
with tf.name_scope('input'):
filename_queue = tf.train.string_input_producer(tfRec_files_list, num_epochs=num_epochs, name='string_input_producer')
im_id = _read_and_decode(filename_queue)
min_after_dequeue = 10
capacity = min_after_dequeue + 3 * batch_size
im_ids = tf.train.shuffle_batch([im_id], batch_size=batch_size, num_threads=3,
capacity=capacity, enqueue_many=False, min_after_dequeue=min_after_dequeue, name='batching_shuffling')
return im_ids
def _read_and_decode(filename_queue):
# All of the bellow are symbolics
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={'image_id': tf.FixedLenFeature([], tf.int64)
})
im_id = tf.to_int32(features['image_id'], name='ToInt32')
return im_id
def main():
# initialize graph
gen_tf_record_files()
g = tf.Graph()
with g.as_default():
im_id = inputs()
if tf.__version__.split('.')[0] == "1":
init = tf.global_variables_initializer()
else:
init = tf.initialize_all_variables() #it is deprecated in TF 1.1
sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
with sess.as_default():
sess.run(init)
# Start the queue runners.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
im = sess.run(im_id)
main()