Pre process images in tensorflow queue - python

I am loading multiple jpeg images in tensorflow queue. The image files are all of different dimensions so I am using wholefileReader() to read my image files. I want to resize and crop the image_file through prep_image function but can't figure out how to do that.
Moreover, how can I get input batch through queue runner and process all that batch and run my classifier on it.
filename_queue = tf.train.string_input_producer(tf.train.match_filenames_once("path_to_image_files"))
image_reader = tf.WholeFileReader()
_, image_file = image_reader.read(filename_queue)
image = tf.image.decode_jpeg(image_file)
image = prep_image(image)
with tf.Session() as sess:
tf.initialize_all_variables().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
image_ = sess.run([image])
print(type(image_))
print image_
image1 = prep_image(image_)
coord.request_stop()
coord.join(threads)

Related

cuMemcpyHtoDAsync failed: invalid argument by using TensorRT (Python)

I am trying to copy an np array to the GPU using TensorRT in Python but I keep getting the error 'cuMemcpyHtoDAsync failed: invalid argument'. The array has the correct format (float32) and size, but the error remains. Does anyone have an idea of what I am doing wrong or how I can fix this error?
import tensorrt as trt
import pycuda.driver as cuda
import numpy as np
import cv2
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
cuda.init()
device = cuda.Device(0)
ctx = device.make_context()
stream = cuda.Stream()
# stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(host_mem)
else:
outputs.append(host_mem)
return inputs, outputs, bindings, stream
def do_inference(context, bindings, inputs, outputs, stream):
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp, i, stream) for inp, i in zip(bindings[:len(inputs)], inputs)]
# Run inference.
context.execute_async(bindings=bindings, stream_handle=stream.handle)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out, o, stream) for out, o in zip(outputs, bindings[len(inputs):])]
# Synchronize the stream
stream.synchronize()
def detect_objects(image, engine, context, threshold=0.5):
# Preprocess the image
image = cv2.resize(image, (640, 640))
image = np.transpose(image, (2, 0, 1))
image = np.expand_dims(image, axis=0)
# Allocate buffers
inputs, outputs, bindings, stream = allocate_buffers(engine)
#inputs[0] = np.ascontiguousarray(image)
inputs[0] = np.ascontiguousarray(image, dtype=np.float32) / 255.0
print(inputs[0].shape)
print(inputs[0].dtype)
# Run inference
do_inference(context, bindings, inputs, outputs, stream)
# Postprocess the outputs
outputs = outputs[0]
outputs = outputs[outputs[:, 0] > threshold]
# Get the bounding boxes
boxes = outputs[:, 1:]
return boxes
# Load the engine
engine = trt.Runtime(trt.Logger(trt.Logger.WARNING)).deserialize_cuda_engine(open("Modelle/best.engine", "rb").read())
context = engine.create_execution_context()
# Read the image
image = cv2.imread("Test.jpg")
# Detect objects in the image
boxes = detect_objects(image, engine, context)
print (boxes)
or am I doing something fundamentally wrong when loading the tensorRT file? Is there another way to index an object on an image?
Thanks

Cant print out tfrecord features

I am writing a python code to create tfrecord file which stores A and C as features, But I am having trouble to print out the A,C values from TFrecord file. Can anyone take a look at this?
#Writing TFrecord file
import tensorflow as tf
import numpy as np
writer=tf.python_io.TFRecordWriter('output.tfrecord')
A=[1,3,4]
C=[1.1, 2.1, 3.1]
feature_A=tf.train.Feature(int64_list=tf.train.Int64List(value=A))
feature_C=tf.train.Feature(float_list=tf.train.FloatList(value=C))
features={'A':feature_A, 'C':feature_C}
example=tf.train.Example(features=tf.train.Features(feature=features))
writer.write(example.SerializeToString())
writer.close()
#Read TFrecord file
import tensorflow as tf
reader=tf.TFRecordReader()
filename_queue = tf.train.string_input_producer(
["output.tfrecord"])
_, serialized_example = reader.read(filename_queue)
feature_set = { 'A': tf.FixedLenFeature([], tf.int64),
'C': tf.FixedLenFeature([], tf.float32)
}
features = tf.parse_single_example( serialized_example, features= feature_set )
A=features['A']
C=features['C']
with tf.Session() as sess:
print(sess.run([A,C])) # print out nothing
There are two issues:
FixedlenFeature has to have the size defined. So change to:
feature_set = { 'A': tf.FixedLenFeature([3], tf.int64),
'C': tf.FixedLenFeature([3], tf.float32)}
You need to start the queues for reading the inputs, so your code should look like:
with tf.Session() as sess:
# for the queues
init_op = tf.local_variables_initializer()
sess.run(init_op)
# Create a coordinator, launch the queue runner threads.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
for _ in range(1):
print(sess.run([A,C]))
except tf.errors.OutOfRangeError:
# When done, ask the threads to stop.
print('')
finally:
coord.request_stop()
# Wait for threads to finish.
coord.join(threads)

Using entire gpu during realtime inference

Im using the following code to do realtime inference from mobilenet
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
camera = cv2.VideoCapture(0)
camera.set(3, 1280)
camera.set(4, 1024)
# Loads label file, strips off carriage return
label_lines = [line.rstrip() for line
in tf.gfile.GFile('tf_model/output_labels.txt')]
gpu_options = tf.GPUOptions(allow_growth=True,per_process_gpu_memory_fraction=0.9)
sess_config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False,allow_soft_placement = True)
sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9
sess_config.gpu_options.allow_growth=True
def grabVideoFeed():
grabbed, frame = camera.read()
return frame if grabbed else None
def initialSetup():
with tf.device('/gpu:0'):
with tf.gfile.FastGFile('tf_model/output_graph.pb', 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name='')
initialSetup()
with tf.Session(config= sess_config) as sess:
softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
while True:
frame = grabVideoFeed()
//do the rest of classification
Though I have tried to use gpu, it is still not using the entire gpu. gpu usage is only 8%. how can I be able to sort this out?

How to exit a TensorFlow session with a queue runner

Say I have defined a function that loads one label/features pair from a TfRecords file as follows
def read_one_image(tfrecords_path):
queue = tf.train.string_input_producer([tfrecords_path])
reader = tf.TFRecordReader()
key, value = reader.read(queue)
features = tf.parse_single_example(value,
features={'label': tf.FixedLenFeature([], tf.int64),
'image': tf.FixedLenFeature([784], tf.int64)})
label = features['label']
image = features['image']
return label, image
Fetching the images in a session works fine if I keep the session open:
tf.reset_default_graph()
label, image = read_one_image("mnist_train.tfrecords")
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
tf.train.start_queue_runners(sess=sess)
for i in range(10):
one_label, one_image = sess.run([label, image])
print(one_label, one_image.shape)
However, if I use a context manager like so
g = tf.Graph()
with g.as_default():
label, image = read_one_image("mnist_train.tfrecords")
with tf.Session(graph=g) as sess:
sess.run(tf.global_variables_initializer())
tf.train.start_queue_runners(sess=sess)
for i in range(10):
one_label, one_image = sess.run([label, image])
print(one_label, one_image.shape)
I get an error: 7 ERROR:tensorflow:Exception in QueueRunner: Attempted to use a closed Session.(784,)
Maybe I am misunderstanding how the queue runner works, but since I called the sess.run method, it should have fetched a data pair 10 times. Now, is there a way to quit/exit/close the session without exhausting the queue?
You need to the tf.train.Coordinator
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
tf.train.start_queue_runners(sess=sess, coord=coord)

Tensorflow: define placeholders/operation name in image pipeline

I would like to save my trained Tensorflow model, so it can be deployed by restoring the model file (I'm following this example, which seems to make sense). To do this, however, I need to have named tensors, so that I can do reload the variables with something like:
graph = tf.get_default_graph()
w1 = graph.get_tensor_by_name("my_tensor:0")
I am queuing images from a list of filenames using string_input_producer (code below), but how do I name the tensors so that I can reload them at a later stage?
import tensorflow as tf
flags = tf.app.flags
conf = flags.FLAGS
class ImageDataSet(object):
def __init__(self, img_list_path, num_epoch, batch_size):
# Build the record list queue
input_file = open(images_list_path, 'r')
self.record_list = []
for line in input_file:
line = line.strip()
self.record_list.append(line)
filename_queue = tf.train.string_input_producer(self.record_list, num_epochs=num_epoch)
image_reader = tf.WholeFileReader()
_, image_file = image_reader.read(filename_queue)
image = tf.image.decode_jpeg(image_file, conf.img_colour_channels)
# preprocess
# ...
min_after_dequeue = 1000
capacity = min_after_dequeue + 400 * batch_size
self.images = tf.train.shuffle_batch(image, batch_size=batch_size, capacity=capacity,
min_after_dequeue=min_after_dequeue)
I assume that you want to restore the graph for testing or deploying.
For these purposes, you can edit your graph by insert a placeholder as an entrance of the testing data.
To edit the graph, you can use tf's graph editor, or build an new graph with placeholder and save it.

Categories

Resources