Im using the following code to do realtime inference from mobilenet
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
camera = cv2.VideoCapture(0)
camera.set(3, 1280)
camera.set(4, 1024)
# Loads label file, strips off carriage return
label_lines = [line.rstrip() for line
in tf.gfile.GFile('tf_model/output_labels.txt')]
gpu_options = tf.GPUOptions(allow_growth=True,per_process_gpu_memory_fraction=0.9)
sess_config = tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False,allow_soft_placement = True)
sess_config.gpu_options.per_process_gpu_memory_fraction = 0.9
sess_config.gpu_options.allow_growth=True
def grabVideoFeed():
grabbed, frame = camera.read()
return frame if grabbed else None
def initialSetup():
with tf.device('/gpu:0'):
with tf.gfile.FastGFile('tf_model/output_graph.pb', 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name='')
initialSetup()
with tf.Session(config= sess_config) as sess:
softmax_tensor = sess.graph.get_tensor_by_name('final_result:0')
while True:
frame = grabVideoFeed()
//do the rest of classification
Though I have tried to use gpu, it is still not using the entire gpu. gpu usage is only 8%. how can I be able to sort this out?
Related
I am trying to copy an np array to the GPU using TensorRT in Python but I keep getting the error 'cuMemcpyHtoDAsync failed: invalid argument'. The array has the correct format (float32) and size, but the error remains. Does anyone have an idea of what I am doing wrong or how I can fix this error?
import tensorrt as trt
import pycuda.driver as cuda
import numpy as np
import cv2
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
cuda.init()
device = cuda.Device(0)
ctx = device.make_context()
stream = cuda.Stream()
# stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding)) * engine.max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(host_mem)
else:
outputs.append(host_mem)
return inputs, outputs, bindings, stream
def do_inference(context, bindings, inputs, outputs, stream):
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp, i, stream) for inp, i in zip(bindings[:len(inputs)], inputs)]
# Run inference.
context.execute_async(bindings=bindings, stream_handle=stream.handle)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out, o, stream) for out, o in zip(outputs, bindings[len(inputs):])]
# Synchronize the stream
stream.synchronize()
def detect_objects(image, engine, context, threshold=0.5):
# Preprocess the image
image = cv2.resize(image, (640, 640))
image = np.transpose(image, (2, 0, 1))
image = np.expand_dims(image, axis=0)
# Allocate buffers
inputs, outputs, bindings, stream = allocate_buffers(engine)
#inputs[0] = np.ascontiguousarray(image)
inputs[0] = np.ascontiguousarray(image, dtype=np.float32) / 255.0
print(inputs[0].shape)
print(inputs[0].dtype)
# Run inference
do_inference(context, bindings, inputs, outputs, stream)
# Postprocess the outputs
outputs = outputs[0]
outputs = outputs[outputs[:, 0] > threshold]
# Get the bounding boxes
boxes = outputs[:, 1:]
return boxes
# Load the engine
engine = trt.Runtime(trt.Logger(trt.Logger.WARNING)).deserialize_cuda_engine(open("Modelle/best.engine", "rb").read())
context = engine.create_execution_context()
# Read the image
image = cv2.imread("Test.jpg")
# Detect objects in the image
boxes = detect_objects(image, engine, context)
print (boxes)
or am I doing something fundamentally wrong when loading the tensorRT file? Is there another way to index an object on an image?
Thanks
I am trying to run the MTCNN and FaceNet model on 2 cameras simultaneously. So, I am not getting any error while doing this but the code doesn't give me any results.
It just loads both the models and doesn't give me any predictions. Can anyone help me with this?
I have created 2 separate graphs and sessions using g=tf.Graph for MTCNN and FaceNet.
I think this error is coming due to multi-processing with TensorFlow as it might try to load MTCNN input to the Facenet graph. *this is my assumption.
Please let me know if you have any ideas about this. Thanks.
FaceNet:
with face_rec_graph.graph.as_default():
self.sess = tf.Session()
with self.sess.as_default():
self.__load_model(model_path)
self.x = tf.get_default_graph() \
.get_tensor_by_name("input:0")
self.embeddings = tf.get_default_graph() \
.get_tensor_by_name("embeddings:0")
self.phase_train_placeholder = tf.get_default_graph() \
.get_tensor_by_name("phase_train:0")
print("Model loaded")
face_rec_graph was created as follows:
class FaceRecGraph(object):
def __init__(self):
self.graph = tf.Graph();
MTCNN:
graph = tf.Graph()
with graph.as_default():
with open(model_path, 'rb') as f:
graph_def = tf.GraphDef.FromString(f.read())
tf.import_graph_def(graph_def, name='')
self.graph = graph
config = tf.ConfigProto(
allow_soft_placement=True,
intra_op_parallelism_threads=4,
inter_op_parallelism_threads=4)
config.gpu_options.allow_growth = True
self.sess = tf.Session(graph=graph, config=config)
There is no error coming just both the cameras stop giving any result.
Say I have defined a function that loads one label/features pair from a TfRecords file as follows
def read_one_image(tfrecords_path):
queue = tf.train.string_input_producer([tfrecords_path])
reader = tf.TFRecordReader()
key, value = reader.read(queue)
features = tf.parse_single_example(value,
features={'label': tf.FixedLenFeature([], tf.int64),
'image': tf.FixedLenFeature([784], tf.int64)})
label = features['label']
image = features['image']
return label, image
Fetching the images in a session works fine if I keep the session open:
tf.reset_default_graph()
label, image = read_one_image("mnist_train.tfrecords")
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
tf.train.start_queue_runners(sess=sess)
for i in range(10):
one_label, one_image = sess.run([label, image])
print(one_label, one_image.shape)
However, if I use a context manager like so
g = tf.Graph()
with g.as_default():
label, image = read_one_image("mnist_train.tfrecords")
with tf.Session(graph=g) as sess:
sess.run(tf.global_variables_initializer())
tf.train.start_queue_runners(sess=sess)
for i in range(10):
one_label, one_image = sess.run([label, image])
print(one_label, one_image.shape)
I get an error: 7 ERROR:tensorflow:Exception in QueueRunner: Attempted to use a closed Session.(784,)
Maybe I am misunderstanding how the queue runner works, but since I called the sess.run method, it should have fetched a data pair 10 times. Now, is there a way to quit/exit/close the session without exhausting the queue?
You need to the tf.train.Coordinator
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
tf.train.start_queue_runners(sess=sess, coord=coord)
I would like to save my trained Tensorflow model, so it can be deployed by restoring the model file (I'm following this example, which seems to make sense). To do this, however, I need to have named tensors, so that I can do reload the variables with something like:
graph = tf.get_default_graph()
w1 = graph.get_tensor_by_name("my_tensor:0")
I am queuing images from a list of filenames using string_input_producer (code below), but how do I name the tensors so that I can reload them at a later stage?
import tensorflow as tf
flags = tf.app.flags
conf = flags.FLAGS
class ImageDataSet(object):
def __init__(self, img_list_path, num_epoch, batch_size):
# Build the record list queue
input_file = open(images_list_path, 'r')
self.record_list = []
for line in input_file:
line = line.strip()
self.record_list.append(line)
filename_queue = tf.train.string_input_producer(self.record_list, num_epochs=num_epoch)
image_reader = tf.WholeFileReader()
_, image_file = image_reader.read(filename_queue)
image = tf.image.decode_jpeg(image_file, conf.img_colour_channels)
# preprocess
# ...
min_after_dequeue = 1000
capacity = min_after_dequeue + 400 * batch_size
self.images = tf.train.shuffle_batch(image, batch_size=batch_size, capacity=capacity,
min_after_dequeue=min_after_dequeue)
I assume that you want to restore the graph for testing or deploying.
For these purposes, you can edit your graph by insert a placeholder as an entrance of the testing data.
To edit the graph, you can use tf's graph editor, or build an new graph with placeholder and save it.
I am loading multiple jpeg images in tensorflow queue. The image files are all of different dimensions so I am using wholefileReader() to read my image files. I want to resize and crop the image_file through prep_image function but can't figure out how to do that.
Moreover, how can I get input batch through queue runner and process all that batch and run my classifier on it.
filename_queue = tf.train.string_input_producer(tf.train.match_filenames_once("path_to_image_files"))
image_reader = tf.WholeFileReader()
_, image_file = image_reader.read(filename_queue)
image = tf.image.decode_jpeg(image_file)
image = prep_image(image)
with tf.Session() as sess:
tf.initialize_all_variables().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
image_ = sess.run([image])
print(type(image_))
print image_
image1 = prep_image(image_)
coord.request_stop()
coord.join(threads)