I've been trying to be able to feed my own images in some tensorflow code to look how the code would react to my own images instead of the MNIST set. I've been able to import images(i think) into tensorflow but I have two placeholders that should get my image data and label data. I tried to use feed_dict(which still seems right to me) to be able to use my data in the rest of my code but it won't accept any data I feed it. I know I can't feed it a Tensor and apparently not a batch but the only way I can think of to make this work is to feed it a list. I saw feed_dict is able to use numpy arrays but im not sure how i should approach converting data to a numpy array.
I'm new to TensorFlow and python so please forgive any mistakes I made, I'm still learning how everything works.
with tf.name_scope('Image_Data_Input'):
def read_labeled_image_list(image_list_file):
print('read_labeled_image_list function opened')
f = open(image_list_file, 'r')
print('image_list_file opened')
filenames = []
labels = []
print('Arrays formed')
for line in f:
filename, label = line[:-1].split(' ')
filenames.append(filename)
labels.append(label)
print('Lines deconstructed')
return filenames, labels
def read_image(input_queue):
label = input_queue[1]
file_contents = tf.read_file(input_queue[0])
decoded_image = tf.image.decode_jpeg(file_contents, channels=3)
print('Image decoded to JPEG')
decoded_image.set_shape([2560, 1440, 3])
decoded_image = tf.image.resize_images(decoded_image, [128, 128])
return decoded_image, label
image_list, label_list = read_labeled_image_list(image_list_file)
images = tf.convert_to_tensor(image_list, dtype=tf.string)
labels = tf.convert_to_tensor(label_list, dtype=tf.string)
input_queue = tf.train.slice_input_producer([images, labels], num_epochs=None, shuffle=True)
image, label = read_image(input_queue)
The indentation behaved a little weird when i pasted my code so I'm not sure everything is properly placed.
Well now I have these placeholder:
with tf.name_scope('input'):
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
And I've seen the code routing data to those placeholder this way:
batch_x, batch_y = tf.train.batch([image, label], batchsize)
#_, summary = sess.run([train_writer, summary_op], feed_dict={x: batch_x, y_: batch_y})
But I can't seem to make this work.
Does anyone have any idea how i could make this work?
Again sorry for any mistakes and thanks in advance.
As the error says, you can't feed tensors into a placeholder. batch_x and batch_y are tensors. The new tf.Dataset API is the preferred way to input data into a model (guide here). I think Dataset.from_tensor_slices would require minimal rewriting. Short of that, build the graph so that batch_x and batch_y flow into the model you're using directly. Then you don't need to use placeholders.
I don't recommend this, but for completeness I want to mention another method. You could:
numpy_batch_x, numpy_batch_y = sess.run([batch_x, batch_y])
_, summary = sess.run([train_writer, summary_op],
feed_dict={x: numpy_batch_x, y_: numpy_batch_y})
PS: If train_writer is a tf.summary.FileWriter, I think you want to:
summary = sess.run([summary_op], ...)
train_writer.add_summary(summary)
EDIT: In response to confusion on the dataset API, I am going to show how to handle this with a Dataset. I am going to use TFRecords. It may not be the simplest solution, but it's one way.
import numpy as np
from scipy.misc import imread # There are others that would work here.
from cv2 import resize # Again, others to choose from.
def read_labeled_image_list(...)
# See question
return filenames, labels
def make_tfr(tfr_dir="/YOUR/PREFERRED/TFR/DIR")
def _int64_list_feature(a_list):
return tf.train.Feature(int64_list=tf.train.Int64List(value=a_list)
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
writer = tf.python_io.TFRecordWriter(tfr_dir)
all_image_paths, all_labels = read_labeled_image_list(...)
for path, label in zip(all_image_paths, all_labels):
disk_im = imread(path)
resized_im = cv2.resize(disk_im, (128, 128))
raw_im = resized_im.tostring()
# Construct an example proto-obj,
example = tf.train.Example(
# which wants a Features proto-obj,
features=tf.train.Features(
# which wants a dict.
feature={
'image_raw': _bytes_feature(raw_im),
'label': _int64_list_feature(label)
})) # close your example object
serialized = example.SerializeToString()
writer.write(serialized)
make_tfr() # After you've done it successfully once, comment out.
def input_pipeline(batch_size, epochs, tfr_dir="/YOUR/PREFERRED/TFR/DIR"):
# with tf.name_scope("Input"): maybe you like to scope as much as I do?
dataset = tf.data.TFRecordDataset(tfr_dir)
def parse_protocol_buffer(example_proto):
features = {'image_raw': tf.FixedLenFeature((), tf.string),
'label': tf.FixedLenFeature((), tf.int64)}
parsed_features = tf.parse_single_example(
example_proto, features)
return parsed_features['image_raw'], parsed_features['label']
dataset = dataset.map(parse_protocol_buffer)
def convert_parsed_proto_to_input(image_string, label):
image_decoded = tf.decode_raw(image_string, tf.uint8)
image_resized = tf.reshape(image_decoded, (128, 128, 3))
image = tf.cast(image_resized, tf.float32)
# I usually put my image elements in [-1, 1]
return image * (2. /255) -1, label
dataset = dataset.map(converted_parsed_proto_to_input)
dataset = dataset.shuffle(buffer_size=1000)
dataset = dataset.repeat(batch_size * epochs)
return dataset
def model(image_tensor):
...
# However you want to do this.
return predictions
def loss(predictions, labels):
...
return some_loss
def train(some_loss):
...
return train_op
batch_size = 50
iterations = 10000
train_dataset = input_pipeline(batch_size, iterations)
train_iterator = train_dataset.make_initializable_iterator()
image, label = train_iterator.get_next()
predictions = model(image)
loss_op = loss(image, predictions)
train_op = train(loss_op)
summary_op = tf.summary.merge_all()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
train_writer = tf.summary.FileWriter("/YOUR/LOGDIR", sess.graph)
sess.run(train_iterator.initializer)
for epoch in range(iterations + 1):
_, summary = sess.run([train_op, summary_op])
train_writer.add_summary(summary, epoch)
You say you're new to TensorFlow. I hope this doesn't intimidate you. I was new to TensorFlow not long ago, and it was a pain to figure out how to make a good input pipeline. Learning TFRecords seemed impossible. You also say you're new to Python, so I'll warn you that cv2 has a reputation of difficult installs. You may want to look into other ways to resize an image (though I'd advise against PIL, which is probably even more confusing and difficult at first).
Basically, I'm posting this code because because the documentation on writing TFRecords is confusing (Exhibit A vs a blog post that helped me figure it out) but TFRecords is the way I know how to make a Dataset best. Even if you don't go the TFRecords route, this could help you with the map function for datasets, e.g. notice how I pass label through convert... even though it's not used. Making a dataset (specifically from TFRecords) is a lot of lines of code, but Dataset is the preferred way to construct an input pipeline and it's designed to replace the old queue method you're using.
As a side note, the purpose of the queue strategy was to read data from memory directly into the graph without placeholders. Placeholders are slow and memory-intensive compared to the queue strategy, but Datasets are even better when implemented correctly.
I see in your comment that you want to see the placeholder namescope get connected to your graph. The dataset way, you'll see some dataset nodes on the graph. If you scope them with what I commented out, it should be apparent that everything's hooked up right. Your way, you're actually adding this queue-and-preprocess structure onto the graph. Since you'll have to de-tensor-ify the images to pass them into a placeholder, it won't be apparent that your data is flowing correctly.
Now, as I mentioned in the original post, you can just pass batch_x and batch_y into your model and forget the placeholder and dataset altogether. You'll see everything hooked up right from the preprocessing stage, if the queue is implemented right. Still, your images are large before reshaping them. Reading them will be an intensive task. I'd recommend going the hard route of learning to use Datasets and TFRecords.
I hope this helps you implement a Dataset in your code. I hope this helps you get TensorBoard running. And I hope this helps you figure out TFRecords if you decide to go that route.
PS: On the topic of TensorBoard validating that the model is working, you could attach a tf.summary.image(img) as the first line of model(...). Then check out the image dash and see if it's what you expect.
EDIT 2: example = tf.train.Example(features=tf.train.Features(feature={}))
Related
I'm trying to create a Dataset object in tensorflow 1.14 (I have some legacy code that i can't change for this specific project) starting from numpy arrays, but everytime i try i get everything copied on my graph and for this reason when i create an event log file it is huge (719 MB in this case).
Originally i tried using this function "tf.data.Dataset.from_tensor_slices()", but it didn't work, then i read it is a common problem and someone suggested me to try with generators, thus i tried with the following code, but again i got a huge event file (719 MB again)
def fetch_batch(x, y, batch):
i = 0
while i < batch:
yield (x[i,:,:,:], y[i])
i +=1
train, test = tf.keras.datasets.fashion_mnist.load_data()
images, labels = train
images = images/255
training_dataset = tf.data.Dataset.from_generator(fetch_batch,
args=[images, np.int32(labels), batch_size], output_types=(tf.float32, tf.int32),
output_shapes=(tf.TensorShape(features_shape), tf.TensorShape(labels_shape)))
file_writer = tf.summary.FileWriter("/content", graph=tf.get_default_graph())
I know in this case I could use tensorflow_datasets API and it would be easier, but this is a more general question, and it involves how to create datasets in general, not only using the mnist one.
Could you explain to me what am i doing wrong? Thank you
I guess it's because you are using args in from_generator. This will surely put the provided args in the graph.
What you could do is define a function that will return a generator that will iterate through your set, something like (haven't tested):
def data_generator(images, labels):
def fetch_examples():
i = 0
while True:
example = (images[i], labels[i])
i += 1
i %= len(labels)
yield example
return fetch_examples
This would give in your example:
train, test = tf.keras.datasets.fashion_mnist.load_data()
images, labels = train
images = images/255
training_dataset = tf.data.Dataset.from_generator(data_generator(images, labels), output_types=(tf.float32, tf.int32),
output_shapes=(tf.TensorShape(features_shape), tf.TensorShape(labels_shape))).batch(batch_size)
file_writer = tf.summary.FileWriter("/content", graph=tf.get_default_graph())
Note that I changed fetch_batch to fetch_examples since you probably want to batch using the dataset utilities (.batch).
I am new to Tensorflow and deep learning, and I am struggling with the Dataset class. I tried a lot of things and I can’t find a good solution.
What I am trying
I have a large amount of images (500k+) to train my DNN with. This is a denoising autoencoder so I have a pair of each image. I am using the dataset class of TF to manage the data, but I think I use it really badly.
Here is how I load the filenames in a dataset:
class Data:
def __init__(self, in_path, out_path):
self.nb_images = 512
self.test_ratio = 0.2
self.batch_size = 8
# load filenames in input and outputs
inputs, outputs, self.nb_images = self._load_data_pair_paths(in_path, out_path, self.nb_images)
self.size_training = self.nb_images - int(self.nb_images * self.test_ratio)
self.size_test = int(self.nb_images * self.test_ratio)
# split arrays in training / validation
test_data_in, training_data_in = self._split_test_data(inputs, self.test_ratio)
test_data_out, training_data_out = self._split_test_data(outputs, self.test_ratio)
# transform array to tf.data.Dataset
self.train_dataset = tf.data.Dataset.from_tensor_slices((training_data_in, training_data_out))
self.test_dataset = tf.data.Dataset.from_tensor_slices((test_data_in, test_data_out))
I have a function to call at each epoch that will prepare the dataset. It shuffles the filenames, and transforms filenames to images and batch data.
def get_batched_data(self, seed, batch_size):
nb_batch = int(self.size_training / batch_size)
def img_to_tensor(path_in, path_out):
img_string_in = tf.read_file(path_in)
img_string_out = tf.read_file(path_out)
im_in = tf.image.decode_jpeg(img_string_in, channels=1)
im_out = tf.image.decode_jpeg(img_string_out, channels=1)
return im_in, im_out
t_datas = self.train_dataset.shuffle(self.size_training, seed=seed)
t_datas = t_datas.map(img_to_tensor)
t_datas = t_datas.batch(batch_size)
return t_datas
Now during the training, at each epoch we call the get_batched_data function, make an iterator, and run it for each batch, then feed the array to the optimizer operation.
for epoch in range(nb_epoch):
sess_iter_in = tf.Session()
sess_iter_out = tf.Session()
batched_train = data.get_batched_data(epoch)
iterator_train = batched_train.make_one_shot_iterator()
in_data, out_data = iterator_train.get_next()
total_batch = int(data.size_training / batch_size)
for batch in range(total_batch):
print(f"{batch + 1} / {total_batch}")
in_images = sess_iter_in.run(in_data).reshape((-1, 64, 64, 1))
out_images = sess_iter_out.run(out_data).reshape((-1, 64, 64, 1))
sess.run(optimizer, feed_dict={inputs: in_images,
outputs: out_images})
What do I need ?
I need to have a pipeline that loads only the images of the current batch (otherwise it will not fit in memory) and I want to shuffle the dataset in a different way for each epoch.
Questions and problems
First question, am I using the Dataset class in a good way? I saw very different things on the internet, for example in this blog post the dataset is used with a placeholder and fed during the learning with the datas. It seems strange because the data are all in an array, so loaded in memory. I don't see the point of using tf.data.dataset in this case.
I found solution by using repeat(epoch) on the dataset, like this, but the shuffle will not be different for each epoch in this case.
The second problem with my implementation is that I have an OutOfRangeError in some cases. With a small amount of data (512 like in the exemple) it works fine, but with a bigger amount of data, the error occurs. I thought it was because of a bad calculation of the number of batch due to bad rounding, or when the last batch has a smaller amount of data, but it happens in batch 32 out of 115... Is there any way to know the number of batch created after a batch(n) call on dataset?
Sorry for this loooonng question, but I've been struggling with this for a few days.
As far as I know, Official Performance Guideline is the best teaching material to make input pipelines.
I want to shuffle the dataset in a different way for each epoch.
Using shuffle() and repeat(), you can get different shuffle pattern for each epochs. You can confirm it with the following code
dataset = tf.data.Dataset.from_tensor_slices([1,2,3,4])
dataset = dataset.shuffle(4)
dataset = dataset.repeat(3)
iterator = dataset.make_one_shot_iterator()
x = iterator.get_next()
with tf.Session() as sess:
for i in range(10):
print(sess.run(x))
You can also use tf.contrib.data.shuffle_and_repeat as the mentioned by the above official page.
There are some problems in your code outside of creating data pipelines. You confuse graph construction with graph execution. You are repeating to create data input pipeline, so there are many redundant input pipelines as many as epochs. You can observe the redundant pipelines by Tensorboard.
You should place your graph construction code outside of loop as the following code (pseudo code)
batched_train = data.get_batched_data()
iterator = batched_train.make_initializable_iterator()
in_data, out_data = iterator_train.get_next()
for epoch in range(nb_epoch):
# reset iterator's state
sess.run(iterator.initializer)
try:
while True:
in_images = sess.run(in_data).reshape((-1, 64, 64, 1))
out_images = sess.run(out_data).reshape((-1, 64, 64, 1))
sess.run(optimizer, feed_dict={inputs: in_images,
outputs: out_images})
except tf.errors.OutOfRangeError:
pass
Moreover there are some unimportant inefficient code. You loaded a list of file path with from_tensor_slices(), so the list was embedded in your graph. (See https://www.tensorflow.org/guide/datasets#consuming_numpy_arrays for detail)
You would be better off using prefetch, and decreasing sess.run call by combining your graph.
I am trying to fine-tune inceptionv3 model using slim tensorflow library.
I am unable to understand certain things while writing the code for it. I tried to read source code (no proper documentation) and figured out few things and I am able to fine-tune it and save the check point. Here are the steps I followed
1. I created a tf.record for my training data which is fine, now I am reading the data using the below code.
import tensorflow as tf
import tensorflow.contrib.slim.nets as nets
import tensorflow.contrib.slim as slim
import matplotlib.pyplot as plt
import numpy as np
# get the data and labels here
data_path = '/home/sfarkya/nvidia_challenge/datasets/detrac/train1.tfrecords'
# Training setting
num_epochs = 100
initial_learning_rate = 0.0002
learning_rate_decay_factor = 0.7
num_epochs_before_decay = 5
num_classes = 5980
# load the checkpoint
model_path = '/home/sfarkya/nvidia_challenge/datasets/detrac/inception_v3.ckpt'
# log directory
log_dir = '/home/sfarkya/nvidia_challenge/datasets/detrac/fine_tuned_model'
with tf.Session() as sess:
feature = {'train/image': tf.FixedLenFeature([], tf.string),
'train/label': tf.FixedLenFeature([], tf.int64)}
# Create a list of filenames and pass it to a queue
filename_queue = tf.train.string_input_producer([data_path], num_epochs=1)
# Define a reader and read the next record
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
# Decode the record read by the reader
features = tf.parse_single_example(serialized_example, features=feature)
# Convert the image data from string back to the numbers
image = tf.decode_raw(features['train/image'], tf.float32)
# Cast label data into int32
label = tf.cast(features['train/label'], tf.int32)
# Reshape image data into the original shape
image = tf.reshape(image, [128, 128, 3])
# Creates batches by randomly shuffling tensors
images, labels = tf.train.shuffle_batch([image, label], batch_size=64, capacity=128, num_threads=2,
min_after_dequeue=64)
Now I am finetuning the model using slim and this is the code.
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
# Create a coordinator and run all QueueRunner objects
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
# load model
# load the inception model from the slim library - we are using inception v3
#inputL = tf.placeholder(tf.float32, (64, 128, 128, 3))
img, lbl = sess.run([images, labels])
one_hot_labels = slim.one_hot_encoding(lbl, num_classes)
with slim.arg_scope(slim.nets.inception.inception_v3_arg_scope()):
logits, inceptionv3 = nets.inception.inception_v3(inputs=img, num_classes=5980, is_training=True,
dropout_keep_prob=.6)
# Restore convolutional layers:
variables_to_restore = slim.get_variables_to_restore(exclude=['InceptionV3/Logits', 'InceptionV3/AuxLogits'])
init_fn = slim.assign_from_checkpoint_fn(model_path, variables_to_restore)
# loss function
loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits = logits)
total_loss = tf.losses.get_total_loss()
# train operation
train_op = slim.learning.create_train_op(total_loss + loss, optimizer= tf.train.AdamOptimizer(learning_rate=1e-4))
print('Im here')
# Start training.
slim.learning.train(train_op, log_dir, init_fn=init_fn, save_interval_secs=20, number_of_steps= 10)
Now I have few questions about the code, which I am quite unable to figure out. Once, the code reaches slim.learning.train I don't see anything printing however, it's training, I can see in the log. Now,
1. How do I give the number of epochs to the code? Right now it's running step by step with each step has batch_size = 64.
2. How do I make sure that in the code tf.train.shuffle_batch I am not repeating my images and I am training over the whole dataset?
3. How can I print the loss values while it's training?
Here are answers to your questions.
You cannot give epochs directly to slim.learning.train. Instead, you give the number of batches as the argument. It is called number_of_steps. It is used to set an operation called should_stop_op on line 709. I assume you know how to convert number of epochs to batches.
I don't think the shuffle_batch function will repeat images because internally it uses the RandomShuffleQueue. According to this answer, the RandomShuffleQueue enqueues elements using a background thread as:
While size(queue) < capacity:
Add an element to the queue
It dequeues elements as:
While the number of elements dequeued < batch_size:
Wait until the size(queue) >= min_after_dequeue + 1 elements.
Select an element from the queue uniformly at random, remove it from the queue, and add it the output batch.
So in my opinion, there is very little chance that the elements would be repeated, because in the dequeuing operation, the chosen element is removed from the queue. So it is sampling without replacement.
Will a new queue be created for every epoch?
The tensors being inputted to tf.train.shuffle_batch are image and label which ultimately come from the filename_queue. If that queue is producing TFRecord filenames indefinitely, then I don't think a new queue will be created by shuffle_batch. You can also create a toy code like this to understand how shuffle_batch works.
Coming to the next point, how to train over the whole dataset? In your code, the following line gets the list of TFRecord filenames.
filename_queue = tf.train.string_input_producer([data_path], num_epochs=1)
If filename_queue covers all TFRecords that you have, then you are surely training over the entire dataset. Now, how to shuffle the entire dataset is another question. As mentioned here by #mrry, there is no support (yet, AFAIK) to shuffle out-of-memory datasets. So the best way is to prepare many shards of your dataset such that each shard contains about 1024 examples. Shuffle the list of TFRecord filenames as:
filename_queue = tf.train.string_input_producer([data_path], shuffle=True, capacity=1000)
Note that I removed the num_epochs = 1 argument and set shuffle=True. This way it will produce the shuffled list of TFRecord filenames indefinitely. Now on each file, if you use tf.train.shuffle_batch, you will get a near-to-uniform shuffling. Basically, as the number of examples in each shard tend to 1, your shuffling will get more and more uniform. I like to not set num_epochs and instead terminate the training using the number_of_steps argument mentioned earlier.
To print the loss values, you could probably just edit the training.py and introduce logging.info('total loss = %f', total_loss). I don't know if there is any simpler way. Another way without changing the code is to view summaries in Tensorboard.
There are very helpful articles on how to view summaries in Tensorboard, including the link at the end of this answer. Generally, you need to do the following things.
Create summary object.
Write variables of interest into summary.
Merge all individual summaries.
Create a summary op.
Create a summary file writer.
Write the summaries throughout the training at a desired frequency.
Now steps 5 and 6 are already done automatically for you if you use slim.learning.train.
For first 4 steps, you could check the file train_image_classifier.py. Line 472 shows you how to create a summaries object. Lines 490, 512 and 536 write the relevant variables into summaries. Line 549 merges all summaries and the line 553 creates an op. You can pass this op to slim.learning.train and you can also specify how frequently you want to write summaries. In my opinion, do not write anything apart from loss, total_loss, accuracy and learning rate into the summaries, unless you want to do specific debugging. If you write histograms, then the tensorboard file could take tens of hours to load for networks like ResNet-50 (my tensorboard file once was 28 GB, which took 12 hours to load the progress of 6 days!). By the way, you could actually use train_image_classifier.py file to finetune and you will skip most of the steps above. However, I prefer this as you get to learn a lot of things.
See the launching tensorboard section on how to view the progress in a browser.
Additional remarks:
Instead of minimizing total_loss + loss, you could do the following:
loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits = logits)
tf.losses.add_loss(loss)
total_loss = tf.losses.get_total_loss()
train_op = slim.learning.create_train_op(total_loss, optimizer=tf.train.AdamOptimizer(learning_rate=1e-4))
I found this post to be very useful when I was learning Tensorflow.
I'm attempting to make predictions using a trained convolutional neural network, slightly modified from the example in the example expert tensorflow tutorial. I have followed the instructions at https://www.tensorflow.org/versions/master/how_tos/reading_data/index.html to read data from a CSV file.
I have trained the model and evaluated its accuracy. I then saved the model and loaded it into a new python script for making predictions. Can I still use the batching method detailed in the link above or should I use feed_dict instead? Most tutorials I've seen online use the latter.
My code is shown below, I have essentially duplicated the code for reading from my training data, which was stored as lines within a single .csv file. Conv_nn is simply a class that contains the convolutional neural network detailed in the expert MNIST tutorial. Most of the content is probably not very useful except for the part where I run the graph.
I suspect I have badly mixed up training and prediction - I'm not sure if the test images are being fed to the prediction operation correctly or if it is valid to use the same batch operations for both datasets.
filename_queue = tf.train.string_input_producer(["data/test.csv"],num_epochs=None)
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# Defaults force key value and label to int, all others to float.
record_defaults = [[1]]+[[46]]+[[1.0] for i in range(436)]
# Reads in a single row from the CSV and outputs a list of scalars.
csv_list = tf.decode_csv(value, record_defaults=record_defaults)
# Packs the different columns into separate feature tensors.
location = tf.pack(csv_list[2:4])
bbox = tf.pack(csv_list[5:8])
pix_feats = tf.pack(csv_list[9:])
onehot = tf.one_hot(csv_list[1], depth=98)
keep_prob = 0.5
# Creates batches of images and labels.
image_batch, label_batch = tf.train.shuffle_batch(
[pix_feats, onehot],
batch_size=50,num_threads=4,capacity=50000,min_after_dequeue=10000)
# Creates a graph of variables and operation nodes.
nn = Conv_nn(x=image_batch,keep_prob=keep_prob,pixels=33*13,outputs=98)
# Launch the default graph.
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
saver.restore(sess, 'model1.ckpt')
print("Model restored.")
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess,coord=coord)
prediction=tf.argmax(nn.y_conv,1)
pred = sess.run([prediction])
coord.request_stop()
coord.join(threads)
This question is old, but I am going to answer anyway, as it has been viewed nearly 1000 times.
So if your model had Y labels and X inputs then
prediction=tf.argmax(Y,1)
result = prediction.eval(feed_dict={X: [data]}, session=sess)
This evaluates a single input, for example a single mnist image, but it can be a batch.
I think it would be immensely helpful to the Tensorflow community if there was a well-documented solution to the crucial task of testing a single new image against the model created by the convnet in the CIFAR-10 tutorial.
I may be wrong, but this critical step that makes the trained model usable in practice seems to be lacking. There is a "missing link" in that tutorial—a script that would directly load a single image (as array or binary), compare it against the trained model, and return a classification.
Prior answers give partial solutions that explain the overall approach, but none of which I've been able to implement successfully. Other bits and pieces can be found here and there, but unfortunately haven't added up to a working solution. Kindly consider the research I've done, before tagging this as duplicate or already answered.
Tensorflow: how to save/restore a model?
Restoring TensorFlow model
Unable to restore models in tensorflow v0.8
https://gist.github.com/nikitakit/6ef3b72be67b86cb7868
The most popular answer is the first, in which #RyanSepassi and #YaroslavBulatov describe the problem and an approach: one needs to "manually construct a graph with identical node names, and use Saver to load the weights into it". Although both answers are helpful, it is not apparent how one would go about plugging this into the CIFAR-10 project.
A fully functional solution would be highly desirable so we could port it to other single image classification problems. There are several questions on SO in this regard that ask for this, but still no full answer (for example Load checkpoint and evaluate single image with tensorflow DNN).
I hope we can converge on a working script that everyone could use.
The below script is not yet functional, and I'd be happy to hear from you on how this can be improved to provide a solution for single-image classification using the CIFAR-10 TF tutorial trained model.
Assume all variables, file names etc. are untouched from the original tutorial.
New file: cifar10_eval_single.py
import cv2
import tensorflow as tf
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('eval_dir', './input/eval',
"""Directory where to write event logs.""")
tf.app.flags.DEFINE_string('checkpoint_dir', './input/train',
"""Directory where to read model checkpoints.""")
def get_single_img():
file_path = './input/data/single/test_image.tif'
pixels = cv2.imread(file_path, 0)
return pixels
def eval_single_img():
# below code adapted from #RyanSepassi, however not functional
# among other errors, saver throws an error that there are no
# variables to save
with tf.Graph().as_default():
# Get image.
image = get_single_img()
# Build a Graph.
# TODO
# Create dummy variables.
x = tf.placeholder(tf.float32)
w = tf.Variable(tf.zeros([1, 1], dtype=tf.float32))
b = tf.Variable(tf.ones([1, 1], dtype=tf.float32))
y_hat = tf.add(b, tf.matmul(x, w))
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print('Checkpoint found')
else:
print('No checkpoint found')
# Run the model to get predictions
predictions = sess.run(y_hat, feed_dict={x: image})
print(predictions)
def main(argv=None):
if tf.gfile.Exists(FLAGS.eval_dir):
tf.gfile.DeleteRecursively(FLAGS.eval_dir)
tf.gfile.MakeDirs(FLAGS.eval_dir)
eval_single_img()
if __name__ == '__main__':
tf.app.run()
There are two methods to feed a single new image to the cifar10 model. The first method is a cleaner approach but requires modification in the main file, hence will require retraining. The second method is applicable when a user does not want to modify the model files and instead wants to use the existing check-point/meta-graph files.
The code for the first approach is as follows:
import tensorflow as tf
import numpy as np
import cv2
sess = tf.Session('', tf.Graph())
with sess.graph.as_default():
# Read meta graph and checkpoint to restore tf session
saver = tf.train.import_meta_graph("/tmp/cifar10_train/model.ckpt-200.meta")
saver.restore(sess, "/tmp/cifar10_train/model.ckpt-200")
# Read a single image from a file.
img = cv2.imread('tmp.png')
img = np.expand_dims(img, axis=0)
# Start the queue runners. If they are not started the program will hang
# see e.g. https://www.tensorflow.org/programmers_guide/reading_data
coord = tf.train.Coordinator()
threads = []
for qr in sess.graph.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
start=True))
# In the graph created above, feed "is_training" and "imgs" placeholders.
# Feeding them will disconnect the path from queue runners to the graph
# and enable a path from the placeholder instead. The "img" placeholder will be
# fed with the image that was read above.
logits = sess.run('softmax_linear/softmax_linear:0',
feed_dict={'is_training:0': False, 'imgs:0': img})
#Print classifiction results.
print(logits)
The script requires that a user creates two placeholders and a conditional execution statement for it to work.
The placeholders and conditional execution statement are added in cifar10_train.py as shown below:
def train():
"""Train CIFAR-10 for a number of steps."""
with tf.Graph().as_default():
global_step = tf.contrib.framework.get_or_create_global_step()
with tf.device('/cpu:0'):
images, labels = cifar10.distorted_inputs()
is_training = tf.placeholder(dtype=bool,shape=(),name='is_training')
imgs = tf.placeholder(tf.float32, (1, 32, 32, 3), name='imgs')
images = tf.cond(is_training, lambda:images, lambda:imgs)
logits = cifar10.inference(images)
The inputs in cifar10 model are connected to queue runner object which is a multistage queue that can prefetch data from files in parallel. See a nice animation of queue runner here
While queue runners are efficient in prefetching large dataset for training, they are an overkill for inference/testing where only a single file is needed to be classified, also they are a bit more involved to modify/maintain.
For that reason, I have added a placeholder "is_training", which is set to False while training as shown below:
import numpy as np
tmp_img = np.ndarray(shape=(1,32,32,3), dtype=float)
with tf.train.MonitoredTrainingSession(
checkpoint_dir=FLAGS.train_dir,
hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
tf.train.NanTensorHook(loss),
_LoggerHook()],
config=tf.ConfigProto(
log_device_placement=FLAGS.log_device_placement)) as mon_sess:
while not mon_sess.should_stop():
mon_sess.run(train_op, feed_dict={is_training: True, imgs: tmp_img})
Another placeholder "imgs" holds a tensor of shape (1,32,32,3) for the image that will be fed during inference -- the first dimension is the batch size which is one in this case. I have modified cifar model to accept 32x32 images instead of 24x24 as the original cifar10 images are 32x32.
Finally, the conditional statement feeds the placeholder or queue runner output to the graph. The "is_training" placeholder is set to False during inference and "img" placeholder is fed a numpy array -- the numpy array is reshaped from 3 to 4 dimensional vector to conform to the input tensor to inference function in the model.
That is all there is to it. Any model can be inferred with a single/user defined test data like shown in the script above. Essentially read the graph, feed data to the graph nodes and run the graph to get the final output.
Now the second method. The other approach is to hack cifar10.py and cifar10_eval.py to change batch size to one and replace the data coming from the queue runner with the one read from a file.
Set batch size to 1:
tf.app.flags.DEFINE_integer('batch_size', 1,
"""Number of images to process in a batch.""")
Call inference with an image file read.
def evaluate(): with tf.Graph().as_default() as g:
# Get images and labels for CIFAR-10.
eval_data = FLAGS.eval_data == 'test'
images, labels = cifar10.inputs(eval_data=eval_data)
import cv2
img = cv2.imread('tmp.png')
img = np.expand_dims(img, axis=0)
img = tf.cast(img, tf.float32)
logits = cifar10.inference(img)
Then pass logits to eval_once and modify eval once to evaluate logits:
def eval_once(saver, summary_writer, top_k_op, logits, summary_op):
...
while step < num_iter and not coord.should_stop():
predictions = sess.run([top_k_op])
print(sess.run(logits))
There is no separate script to run this method of inference, just run cifar10_eval.py which will now read a file from the user defined location with a batch size of one.
Here's how I ran a single image at a time. I'll admit it seems a bit hacky with the reuse of getting the scope.
This is a helper function
def restore_vars(saver, sess, chkpt_dir):
""" Restore saved net, global score and step, and epsilons OR
create checkpoint directory for later storage. """
sess.run(tf.initialize_all_variables())
checkpoint_dir = chkpt_dir
if not os.path.exists(checkpoint_dir):
try:
os.makedirs(checkpoint_dir)
except OSError:
pass
path = tf.train.get_checkpoint_state(checkpoint_dir)
#print("path1 = ",path)
#path = tf.train.latest_checkpoint(checkpoint_dir)
print(checkpoint_dir,"path = ",path)
if path is None:
return False
else:
saver.restore(sess, path.model_checkpoint_path)
return True
Here is the main part of the code that runs a single image at a time within the for loop.
to_restore = True
with tf.Session() as sess:
for i in test_img_idx_set:
# Gets the image
images = get_image(i)
images = np.asarray(images,dtype=np.float32)
images = tf.convert_to_tensor(images/255.0)
# resize image to whatever you're model takes in
images = tf.image.resize_images(images,256,256)
images = tf.reshape(images,(1,256,256,3))
images = tf.cast(images, tf.float32)
saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=1)
#print("infer")
with tf.variable_scope(tf.get_variable_scope()) as scope:
if to_restore:
logits = inference(images)
else:
scope.reuse_variables()
logits = inference(images)
if to_restore:
restored = restore_vars(saver, sess,FLAGS.train_dir)
print("restored ",restored)
to_restore = False
logit_val = sess.run(logits)
print(logit_val)
Here is an alternative implementation to the above using place holders it's a bit cleaner in my opinion. but I'll leave the above example for historical reasons.
imgs_place = tf.placeholder(tf.float32, shape=[my_img_shape_put_here])
images = tf.reshape(imgs_place,(1,256,256,3))
saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=1)
#print("infer")
logits = inference(images)
restored = restore_vars(saver, sess,FLAGS.train_dir)
print("restored ",restored)
with tf.Session() as sess:
for i in test_img_idx_set:
logit_val = sess.run(logits,feed_dict={imgs_place=i})
print(logit_val)
got it working with this
softmax = gn.inference(image)
saver = tf.train.Saver()
ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
with tf.Session() as sess:
saver.restore(sess, ckpt.model_checkpoint_path)
softmaxval = sess.run(softmax)
print(softmaxval)
output
[[ 6.73550041e-03 4.44930716e-04 9.92570221e-01 1.00681427e-06
3.05406687e-08 2.38927707e-04 1.89839399e-12 9.36238484e-06
1.51646684e-09 3.38977535e-09]]
I don't have working code for you I'm afraid, but here's how we often tackle this problem in production:
Save out the GraphDef to disk, using something like write_graph.
Use freeze_graph to load the GraphDef and checkpoints, and save out a GraphDef with the Variables converted into Constants.
Load the GraphDef in something like label_image or classify_image.
For your example this is overkill, but I would at least suggest serializing the graph in the original example as a GraphDef, and then loading it in your script (so you don't have to duplicate the code generating the graph). With the same graph created, you should be able to populate it from a SaverDef, and the freeze_graph script may help as an example.