I was training a model in Colab, but, I shut down my computer and this training stoped. Every 5 epochs I save the weights. I think it is but I don't know how. How it's possible to continue the training with the weights previously saved?
Thanks.
When training a model in colab, training doesn't stop when you close you computer, it stops some time afterwards.
If you are saving the weights in colab, when colab closes everything is deleted.
If you have mounted your gdrive in colab and you save weights in gdrive, your weights will be there.
If your weights are in your gdrive you can continue training by loading your stored weights to your keras model simple by
model.load_weights('path_to_weights')
Thank you for your answer, #Ioannis Nasios. Yes, my weights are in 'gdrive'. I'm training a GAN network and I trying to figure out how to load these weights and continue the training. I saved the discriminator and generator weights and also gan_loss and discriminator_loss. Well, do I have to compile generator and discriminator networks, load weights and compile gan network with their loss? I think it could be a stupid question. It is my first time training a GAN network.
Here I post the code:
# Combined network
def get_gan_network(discriminator, shape, generator, optimizer, loss):
discriminator.trainable = False
gan_input = Input(shape=shape)
x = generator(gan_input)
gan_output = discriminator(x)
gan = Model(inputs=gan_input, outputs=[x,gan_output])
gan.compile(loss=[loss, "binary_crossentropy"],
loss_weights=[1., 1e-3],
optimizer=optimizer)
return gan
def train(x_train_lr, x_train_hr, x_test_lr, x_test_hr, epochs, batch_size, output_dir, model_save_dir, weights_save_dir):
loss = VGG_LOSS(image_shape)
batch_count = int(x_train_hr.shape[0] / batch_size)
#### SI LAS IMAGENES NO SON CUADRADAS ESTO DEBERIA CAMBIAR
shape_lr = (image_shape[0]//downscale_factor, image_shape[1]//downscale_factor, image_shape[2])
shape_hr = x_train_hr[0].shape
####
generator = Generator(shape_lr, shape_hr).generator()
discriminator = Discriminator(image_shape).discriminator()
optimizer = Utils_model.get_optimizer()
generator.compile(loss=loss.vgg_loss, optimizer=optimizer)
discriminator.compile(loss="binary_crossentropy", optimizer=optimizer)
gan = get_gan_network(discriminator, shape_lr, generator, optimizer, loss.vgg_loss)
loss_file = open(model_save_dir + '/losses.txt' , 'w+')
loss_file.close()
for e in range(1, epochs+1):
print ('-'*15, 'Epoch %d' % e, '-'*15)
for _ in tqdm(range(batch_count)):
rand_nums = np.random.randint(0, x_train_hr.shape[0], size=batch_size)
image_batch_hr = x_train_hr[rand_nums]
image_batch_lr = x_train_lr[rand_nums]
generated_images_sr = generator.predict(image_batch_lr)
real_data_Y = np.ones(batch_size) - np.random.random_sample(batch_size)*0.2
fake_data_Y = np.random.random_sample(batch_size)*0.2
discriminator.trainable = True
d_loss_real = discriminator.train_on_batch(image_batch_hr, real_data_Y)
d_loss_fake = discriminator.train_on_batch(generated_images_sr, fake_data_Y)
discriminator_loss = 0.5 * np.add(d_loss_fake, d_loss_real)
rand_nums = np.random.randint(0, x_train_hr.shape[0], size=batch_size)
image_batch_hr = x_train_hr[rand_nums]
image_batch_lr = x_train_lr[rand_nums]
gan_Y = np.ones(batch_size) - np.random.random_sample(batch_size)*0.2
discriminator.trainable = False
gan_loss = gan.train_on_batch(image_batch_lr, [image_batch_hr,gan_Y])
print("discriminator_loss : %f" % discriminator_loss)
print("gan_loss :", gan_loss)
gan_loss = str(gan_loss)
loss_file = open(model_save_dir + 'losses.txt' , 'a')
loss_file.write('epoch%d : gan_loss = %s ; discriminator_loss = %f\n' %(e, gan_loss, discriminator_loss) )
loss_file.close()
if e == 1 or e % 5 == 0:
Utils.plot_generated_images(output_dir, e, generator, x_test_hr, x_test_lr)
generator.save_weights(weights_save_dir + '%d_gen_weights.h5' % e)
discriminator.save_weights(weights_save_dir + '%d_dis_weights.h5' % e)
if e % 500 == 0 or e == epochs+1:
generator.save(model_save_dir + 'gen_model%d.h5' % e)
discriminator.save(model_save_dir + 'dis_model%d.h5' % e)
Related
I have a GAN that aims to reproduce the paper CycleGAN. However, even though everything is implemented correctly (as for some basic dataset it works), with the Facade Dataset the discriminator predicts the following:
print(f"Segm: {tf.reduce_mean(discriminator_segm(segmented_test[0:10])).numpy()}, {tf.reduce_mean(discriminator_segm(generator_real_to_segm(real_test[0:10]))).numpy()}")
print(f"Real: {tf.reduce_mean(discriminator_real(real_test[0:10])).numpy()}, {tf.reduce_mean(discriminator_real(generator_segm_to_real(segmented_test[0:10]))).numpy()}")
# Segm: 0.9463781714439392, 0.9564124941825867
# Real: 0.9635561108589172, 0.8240727782249451
In other words, it considers the generated output as good as the ground truth
This makes no sense, as the best guess, if the generated images were identical to the ground truth (which are not, are very far from being similar), should be to predict 0.5
At this point, I tried to train the discriminator many more times than the generator, with no difference (still get the accuracy near 90%)
If you need, even though it's a bit long, this is the training loop:
def train(EPOCHS = 400, batch_size = 4):
real_train_batched = real_train.reshape((-1, batch_size, *real_train.shape[1:]))
segmented_train_batched = segmented_train.reshape((-1, batch_size, *segmented_train.shape[1:]))
for e in range(EPOCHS):
print(f"Epoch: {e+1}/{EPOCHS}")
gen_perm = lambda : np.random.permutation(range(len(real_train_batched)))
perm = gen_perm()
real_train_batched_gen = real_train_batched[perm]
segmented_train_batched_gen = segmented_train_batched[perm]
real_train_batched_disc = real_train_batched[perm]
segmented_train_batched_disc = segmented_train_batched[perm]
pack = zip(real_train_batched_gen, segmented_train_batched_gen, real_train_batched_disc, segmented_train_batched_disc)
for i, (b_real_gen, b_segm_gen, b_real_disc, b_segm_disc) in enumerate(pack):
print(".", end="")
let_the_magic_happen(b_segm_gen, b_real_gen, b_segm_disc, b_real_disc, alpha=tf.constant(0.00000005))
#tf.function
def let_the_magic_happen_discriminator(batch_segmented_gen, batch_real_gen, batch_segmented_disc, batch_real_disc):
fake_real_images = generator_segm_to_real(batch_segmented_gen, training = False)
fake_segmented_images = generator_real_to_segm(batch_real_gen, training = False)
with tf.GradientTape(persistent=True) as disc_tape:
real_real_pred = discriminator_real(batch_real_disc, training=True)
real_fake_pred = discriminator_real(fake_real_images, training=True)
segm_real_pred = discriminator_segm(batch_segmented_disc, training=True)
segm_fake_pred = discriminator_segm(fake_segmented_images, training=True)
segm_disc_loss = discriminator_loss(segm_real_pred, segm_fake_pred)
real_disc_loss = discriminator_loss(real_real_pred, real_fake_pred)
segm_disc_grad = disc_tape.gradient(segm_disc_loss, discriminator_segm.trainable_weights)
real_disc_grad = disc_tape.gradient(real_disc_loss, discriminator_real.trainable_weights)
segm_disc_optimizer.apply_gradients(zip(segm_disc_grad , discriminator_segm.trainable_weights))
real_disc_optimizer.apply_gradients(zip(real_disc_grad , discriminator_real.trainable_weights))
#tf.function
def let_the_magic_happen_generator(batch_segmented_gen, batch_real_gen, alpha):
with tf.GradientTape(persistent=True) as gen_tape:
fake_real_images = generator_segm_to_real(batch_segmented_gen, training = True)
fake_segmented_images = generator_real_to_segm(batch_real_gen, training = True)
fake_real_images_pred = discriminator_real(fake_real_images, training=False)
fake_segmented_images_pred = discriminator_segm(fake_segmented_images, training=False)
batch_real_gen_reconstruction = generator_segm_to_real(fake_segmented_images, training=True)
batch_segmented_gen_reconstruction = generator_real_to_segm(fake_real_images, training=True)
reconstruction_loss = alpha * (
tf.keras.losses.MeanAbsoluteError()(batch_segmented_gen, batch_segmented_gen_reconstruction) +
tf.keras.losses.MeanAbsoluteError()(batch_real_gen, batch_real_gen_reconstruction)
)
# tf.print(reconstruction_loss)
# tf.print(generator_loss(fake_real_images_pred))
# tf.print(generator_loss(fake_segmented_images_pred))
segm_to_real_gen_loss = generator_loss(fake_real_images_pred) + reconstruction_loss
real_to_segm_gen_loss = generator_loss(fake_segmented_images_pred) + reconstruction_loss
real_to_segm_gen_grad = gen_tape.gradient(real_to_segm_gen_loss, generator_real_to_segm.trainable_weights)
segm_to_real_gen_grad = gen_tape.gradient(segm_to_real_gen_loss, generator_segm_to_real.trainable_weights)
real_to_segm_gen_optimizer.apply_gradients(zip(real_to_segm_gen_grad , generator_real_to_segm.trainable_weights))
segm_to_real_gen_optimizer.apply_gradients(zip(segm_to_real_gen_grad , generator_segm_to_real.trainable_weights))
def let_the_magic_happen(batch_segmented_gen, batch_real_gen, batch_segmented_disc, batch_real_disc, alpha):
let_the_magic_happen_discriminator(batch_segmented_gen, batch_real_gen, batch_segmented_disc, batch_real_disc)
if tf.random.uniform([1]) < 0.2 :
let_the_magic_happen_generator(batch_segmented_gen, batch_real_gen, alpha = alpha)
let_the_magic_happen_discriminator(batch_segmented_gen, batch_real_gen, batch_segmented_disc, batch_real_disc)
#tf.function
def generator_loss(fake_pred) :
return tf.keras.losses.BinaryCrossentropy()(tf.ones_like(fake_pred), fake_pred)
#tf.function
def discriminator_loss(real_pred, fake_pred):
return tf.keras.losses.BinaryCrossentropy()(
tf.concat((tf.ones_like(real_pred) - 5e-2, tf.zeros_like(fake_pred)),axis=0),
tf.concat((real_pred, fake_pred),axis=0),
)
The generators are 2 U-Net where the discriminator are 2 LeNet (also tried with PatchGAN, same thing happens)
I've also already tried with MAE instead of BCE
I made my windows 10 jupyter notebook as a server and running some trains on it.
I've installed CUDA 9.0 and cuDNN properly, and python detects the GPU. This is what I've got on the anaconda prompt.
>>> torch.cuda.get_device_name(0)
'GeForce GTX 1070'
And I also placed my model and tensors on cuda by .cuda()
model = LogPPredictor(1, 58, 64, 128, 1, 'gsc')
if torch.cuda.is_available():
torch.set_default_tensor_type(torch.cuda.DoubleTensor)
model.cuda()
else:
torch.set_default_tensor_type(torch.FloatTensor)
list_train_loss = list()
list_val_loss = list()
acc = 0
mse = 0
optimizer = args.optim(model.parameters(),
lr=args.lr,
weight_decay=args.l2_coef)
data_train = DataLoader(args.dict_partition['train'],
batch_size=args.batch_size,
pin_memory=True,
shuffle=args.shuffle)
data_val = DataLoader(args.dict_partition['val'],
batch_size=args.batch_size,
pin_memory=True,
shuffle=args.shuffle)
for epoch in tqdm_notebook(range(args.epoch), desc='Epoch'):
model.train()
epoch_train_loss = 0
for i, batch in enumerate(data_train):
list_feature = torch.tensor(batch[0]).cuda()
list_adj = torch.tensor(batch[1]).cuda()
list_logP = torch.tensor(batch[2]).cuda()
list_logP = list_logP.view(-1,1)
optimizer.zero_grad()
list_pred_logP = model(list_feature, list_adj)
list_pred_logP.require_grad = False
train_loss = args.criterion(list_pred_logP, list_logP)
epoch_train_loss += train_loss.item()
train_loss.backward()
optimizer.step()
list_train_loss.append(epoch_train_loss/len(data_train))
model.eval()
epoch_val_loss = 0
with torch.no_grad():
for i, batch in enumerate(data_val):
list_feature = torch.tensor(batch[0]).cuda()
list_adj = torch.tensor(batch[1]).cuda()
list_logP = torch.tensor(batch[2]).cuda()
list_logP = list_logP.view(-1,1)
list_pred_logP = model(list_feature, list_adj)
val_loss = args.criterion(list_pred_logP, list_logP)
epoch_val_loss += val_loss.item()
list_val_loss.append(epoch_val_loss/len(data_val))
data_test = DataLoader(args.dict_partition['test'],
batch_size=args.batch_size,
pin_memory=True,
shuffle=args.shuffle)
model.eval()
with torch.no_grad():
logP_total = list()
pred_logP_total = list()
for i, batch in enumerate(data_val):
list_feature = torch.tensor(batch[0]).cuda()
list_adj = torch.tensor(batch[1]).cuda()
list_logP = torch.tensor(batch[2]).cuda()
logP_total += list_logP.tolist()
list_logP = list_logP.view(-1,1)
list_pred_logP = model(list_feature, list_adj)
pred_logP_total += list_pred_logP.tolist()
mse = mean_squared_error(logP_total, pred_logP_total)
But on the Process Manager of Windows, whenever I start training, only CPU usage goes up to 25% and GPU usage remains 0. How can I fix this???
I had a similar problem with using PyTorch on Cuda. After looking for possible solutions, I found the following post by Soumith himself that found it very helpful.
https://discuss.pytorch.org/t/gpu-supposed-to-be-used-but-isnt/2883
The bottom line is, at least in my case, I could not put enough load on GPUs. There was a bottleneck in my application. Try another example, or increase batch size; it should be OK.
We are using tensorflow library for face recognition. Our code works fine for single images. But when we run it as an API, the prediction time increases for every subsequent request. This happens because it searches for previously predicted images as well which should ideally not happen. Please find below the code I am using.
def train:
with tf.Session(config=tf.ConfigProto(log_device_placement=False)) as sess:
test_set = _get_test_data(input_directory)
images, labels = _load_images_and_labels(test_set, image_size=160, batch_size=batch_size,
num_threads=4, num_epochs=1)
_load_model(model_filepath=model_path)
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embedding_layer = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
emb_array, label_array = _create_embeddings(embedding_layer, images, labels, images_placeholder,
phase_train_placeholder, sess)
classifier_filename = classifier_output_path
class_name, prob = _evaluate_classifier(emb_array, label_array, classifier_filename)
coord.request_stop()
coord.join(threads)
def _create_embeddings(embedding_layer, images, labels, images_placeholder, phase_train_placeholder, sess):
emb_array = None
label_array = None
try:
i = 0
while True:
print("batch images")
batch_images, batch_labels = sess.run([images, labels])
print('Processing iteration {} batch of size: {}'.format(i, len(batch_labels)))
emb = sess.run(embedding_layer,
feed_dict={images_placeholder: batch_images, phase_train_placeholder: False})
emb_array = np.concatenate([emb_array, emb]) if emb_array is not None else emb
label_array = np.concatenate([label_array, batch_labels]) if label_array is not None else batch_labels
i += 1
except tf.errors.OutOfRangeError:
pass
return emb_array, label_array
It searches for previously predicted images at
`batch_images, batch_labels = sess.run([images, labels])`
in the create embedding function. I think this is the problem of some unclosed threads because of which sess.run runs for all queued threads. Can anyone help me with this
During debugging I found that previously predicted images information was in default graph which is picked during execution of following lines
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embedding_layer = tf.get_default_graph().get_tensor_by_name("embeddings:0")
so by resetting graph before start of session will solve the problem of scanning previously predicted images as
tf.reset_default_graph()
I have been using TensorFlow for a reasonable length of time now. and believed I had a thorough understanding of how a TensorFlow graph works and executes within a session. However, I have written all of my TensorFlow models in a script-like fashion as such:
import tensorflow as tf
import DataWorker
import Constants
x = tf.placeholder(tf.float32, [None, Constants.sequenceLength, DataWorker.numFeatures])
y = tf.placeholder(tf.float32, [None, 1])
xTensors = tf.unstack(x, axis=1) # [seqLength tensors of shape (batchSize, numFeatures)]
W = tf.Variable(tf.random_normal([Constants.numHidden, 1])) # Weighted matrix
b = tf.Variable(tf.random_normal([1])) # Bias
cell = tf.contrib.rnn.BasicLSTMCell(Constants.numHidden, forget_bias=Constants.forgetBias)
outputs, finalState = tf.nn.static_rnn(cell, xTensors, dtype=tf.float32)
# predictions = [tf.add(tf.matmul(output, W), b) for output in outputs] # List of predictions after each time step
prediction = tf.add(tf.matmul(outputs[-1], W), b) # Prediction after final time step
prediction = tf.tanh(prediction) # Activation
mse = tf.losses.mean_squared_error(predictions=prediction, labels=y) # Mean loss over entire batch
accuracy = tf.reduce_mean(1 - (tf.abs(y - prediction) / DataWorker.labelRange)) # Accuracy over entire batch
optimiser = tf.train.AdamOptimizer(Constants.learningRate).minimize(mse) # Backpropagation
with tf.Session() as session:
session.run(tf.global_variables_initializer())
# #############################################
# TRAINING
# #############################################
for epoch in range(Constants.numEpochs):
print("***** EPOCH:", epoch + 1, "*****\n")
IDPointer, TSPointer = 0, 0 # Pointers to current ID and timestamp
epochComplete = False
batchNum = 0
while not epochComplete:
batchNum += 1
batchX, batchY, IDPointer, TSPointer, epochComplete = DataWorker.generateBatch(IDPointer, TSPointer, isTraining=True)
dict = {x: batchX, y: batchY}
session.run(optimiser, dict)
if batchNum % 1000 == 0 or epochComplete:
batchLoss = session.run(mse, dict)
batchAccuracy = session.run(accuracy, dict)
print("Iteration:", batchNum)
print(batchLoss)
print(str("%.2f" % (batchAccuracy * 100) + "%\n"))
# #############################################
# TESTING
# #############################################
testX, testY, _, _, _ = DataWorker.generateBatch(0, 0, isTraining=False)
testAccuracy = session.run(accuracy, {x: testX, y: testY})
print("Testing Accuracy:", str("%.2f" % (testAccuracy * 100) + "%"))
But now, for practicality and readability, I want to implement my model as a class, but have encountered many problems with initializing my variables, etc.
This is the closest I have got to implementing the above example using my own LSTM class
Model.py
import tensorflow as tf
import Constants
import DataWorker # Remove this dependency
class LSTM():
"""docstring."""
def __init__(self,
inputDimensionList,
outputDimensionList,
numLayers=Constants.numLayers,
numHidden=Constants.numHidden,
learningRate=Constants.learningRate,
forgetBias=Constants.forgetBias
):
"""docstring."""
self.batchInputs = tf.placeholder(tf.float32, [None] + inputDimensionList)
self.batchLabels = tf.placeholder(tf.float32, [None] + outputDimensionList)
self.weightedMatrix = tf.Variable(tf.random_normal([numHidden] + outputDimensionList))
self.biasMatrix = tf.Variable(tf.random_normal(outputDimensionList))
self.cell = tf.contrib.rnn.BasicLSTMCell(numHidden, forget_bias=forgetBias)
self.numLayers = numLayers
self.numHidden = numHidden
self.learningRate = learningRate
self.forgetBias = forgetBias
self.batchDict = {}
self.batchInputTensors = None
self.batchOutputs = None # All needed as instance variables?
self.batchFinalStates = None
self.batchPredictions = None
self.batchLoss = None
self.batchAccuracy = None
self.initialised = False
self.session = tf.Session()
# Take in activation, loss and optimiser FUNCTIONS as args
def execute(self, command):
"""docstring."""
return self.session.run(command, self.batchDict)
def setBatchDict(self, inputs, labels):
"""docstring."""
self.batchDict = {self.batchInputs: inputs, self.batchLabels: labels}
self.batchInputTensors = tf.unstack(self.batchInputs, axis=1)
def processBatch(self):
"""docstring."""
self.batchOutputs, self.batchFinalState = tf.nn.static_rnn(self.cell, self.batchInputTensors, dtype=tf.float32)
pred = tf.tanh(tf.add(tf.matmul(self.batchOutputs[-1], self.weightedMatrix), self.biasMatrix))
mse = tf.losses.mean_squared_error(predictions=pred, labels=self.batchLabels)
optimiser = tf.train.AdamOptimizer(self.learningRate).minimize(mse)
if not self.initialised:
self.session.run(tf.global_variables_initializer())
self.initialised = True
with tf.variable_scope("model") as scope:
if self.initialised:
scope.reuse_variables()
self.execute(optimiser)
self.batchPredictions = self.execute(pred)
self.batchLoss = self.execute(tf.losses.mean_squared_error(predictions=self.batchPredictions, labels=self.batchLabels))
self.batchAccuracy = self.execute(tf.reduce_mean(1 - (tf.abs(self.batchLabels - self.batchPredictions) / DataWorker.labelRange)))
return self.batchPredictions, self.batchLabels, self.batchLoss, self.batchAccuracy
def kill(self):
"""docstring."""
self.session.close()
This class is quite messy, especially processBatch() as I have just been trying to get it to work before refining it.
I then run my model here:
Main.py
import DataWorker
import Constants
from Model import LSTM
inputDim = [Constants.sequenceLength, DataWorker.numFeatures]
outputDim = [1]
lstm = LSTM(inputDimensionList=inputDim, outputDimensionList=outputDim)
# #############################################
# TRAINING
# #############################################
for epoch in range(Constants.numEpochs):
print("***** EPOCH:", epoch + 1, "*****\n")
IDPointer, TSPointer = 0, 0 # Pointers to current ID and timestamp
epochComplete = False
batchNum = 0
while not epochComplete:
batchNum += 1
batchX, batchY, IDPointer, TSPointer, epochComplete = DataWorker.generateBatch(IDPointer, TSPointer, isTraining=True)
lstm.setBatchDict(batchX, batchY)
batchPredictions, batchLabels, batchLoss, batchAccuracy = lstm.runBatch()
if batchNum % 1000 == 0 or epochComplete:
print("Iteration:", batchNum)
print("Pred:", batchPredictions[-1], "\tLabel:", batchLabels[-1])
print("Loss:", batchLoss)
print("Accuracy:", str("%.2f" % (batchAccuracy * 100) + "%\n"))
# #############################################
# TESTING
# #############################################
testX, testY, _, _, _ = DataWorker.generateBatch(0, 0, isTraining=False)
lstm.setBatchDict(testX, testY)
_, _, _, testAccuracy = lstm.runBatch()
print("Testing Accuracy:", str("%.2f" % (testAccuracy * 100) + "%"))
lstm.kill()
A single passthrough of the graph is executed fine, when all the variables are initialized, but it is on the second iteration where I get the error
ValueError: Variable rnn/basic_lstm_cell/kernel/Adam/ already exists, disallowed. Did you mean to set reuse=True in VarScope? Originally defined at:
optimiser = tf.train.AdamOptimizer(self.learningRate).minimize(mse)
I Googled this problem and learned that using scope.reuse_variables() should stop it trying to initialize the AdamOptimizer a second time, but cleary this isn't working how I have implemented it. How can I fix this issue?
As a side note, is my method of creating the TensorFlow session as an instance variable within my LSTM class acceptable, or should I create the session in Main and then pass it into the LSTM instance?
In general I wrap anything that creates variables under the hood with tf.make_template when doing object oriented model building.
However, you should avoid adding ops to the graph in a training loop, which looks like it's happening here. They will build up and cause problems, and likely give you incorrect results. Instead, define the graph (with inputs from tf.data, placeholders, or queues) and only loop over a session.run call. Even better, structure your code as an Estimator and this will be enforced.
I'm trying out Tensorflow's rnn example.
With some problems at the start I could run the example in order to train the ptb and now I have a model trained.
How do I exactly use the model now to create sentences without having to train every time again?
I'm running it with a command like python ptb_word_lm.py --data_path=/home/data/ --model medium --save_path=/home/medium
Is there a example somewhere on how to use the trained model to make sentences?
1.Add the following code at the last line of PTBModel:__init__() function:
self._output_probs = tf.nn.softmax(logits)
2.Add the following function in PTBModel:
#property
def output_probs(self):
return self._output_probs
3.Try to run the following code:
raw_data = reader.ptb_raw_data(FLAGS.data_path)
train_data, valid_data, test_data, vocabulary, word_to_id, id_to_word = raw_data
eval_config = get_config()
eval_config.batch_size = 1
eval_config.num_steps = 1
sess = tf.Session()
initializer = tf.random_uniform_initializer(-eval_config.init_scale,
eval_config.init_scale)
with tf.variable_scope("model", reuse=None, initializer=initializer):
mtest = PTBModel(is_training=False, config=eval_config)
sess.run(tf.initialize_all_variables())
saver = tf.train.Saver()
ckpt = tf.train.get_checkpoint_state('/home/medium') # __YOUR__MODEL__SAVE__PATH__
if ckpt and gfile.Exists(ckpt.model_checkpoint_path):
msg = 'Reading model parameters from %s' % ckpt.model_checkpoint_path
print(msg)
saver.restore(sess, ckpt.model_checkpoint_path)
def pick_from_weight(weight, pows=1.0):
weight = weight**pows
t = np.cumsum(weight)
s = np.sum(weight)
return int(np.searchsorted(t, np.random.rand(1) * s))
while True:
number_of_sentences = 10 # generate 10 sentences one time
sentence_cnt = 0
text = '\n'
end_of_sentence_char = word_to_id['<eos>']
input_char = np.array([[end_of_sentence_char]])
state = sess.run(mtest.initial_state)
while sentence_cnt < number_of_sentences:
feed_dict = {mtest.input_data: input_char,
mtest.initial_state: state}
probs, state = sess.run([mtest.output_probs, mtest.final_state],
feed_dict=feed_dict)
sampled_char = pick_from_weight(probs[0])
if sampled_char == end_of_sentence_char:
text += '.\n'
sentence_cnt += 1
else:
text += ' ' + id_to_word[sampled_char]
input_char = np.array([[sampled_char]])
print(text)
raw_input('press any key to continue ...')
This website has an answer with an alteration on the PTB word script, working currently with Tensorflow version 1.0
http://deeplearningathome.com/2016/10/Text-generation-using-deep-recurrent-neural-networks.html