I am working on an object_detection API implemented in tensorflow 1.14 and opencv 4.1, where my task is to recognize personal protection equipment weard by workers at various construction site gates. We are using RTSP streams, where I am already using threading to minimize latency, but still some times the stream crashes.
So I decided to restart the whole python script every 'n' times of detection to prevent the whole thing to crash because of corrupted frames and so on, but tensorflow is very slow with the loading of inference graph and such for the first time (for me its ~20 seconds), which is unacceptable to wait for the workers to get inside the site at the gate.
So now I am considering to just stop and then restart JUST the RTSP stream with opencv, which constantly feeds with frames the inference machinery for executing object detection on them.
Now I have not found any helpful threads on this topic, so that is why I am writing it here.
The relevant part of my code is here:
from threading import Thread
import cv2, time
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
from threading import Thread
import tensorflow as tf
import tensorflow.contrib.tensorrt as trt
import zipfile
from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from object_detection.utils import ops as utils_ops
import streamlit as st
######Initialize some important stuff###############
...
###### End of initialization###############
class VideoStreamWidget(object): # create threading
detected_counter1 = 0
detected_counter2 = 0
PASS_score1 = 0
NOPASS_score1 = 0
PASS_score2 = 0
NOPASS_score2 = 0
scoresumCounter1=0
scoresumCounter2=0
PASS_score_tmp =0
NOPASS_score_tmp=0
detection_index = 0
detboxesAreaTMP1 = [0,0,0,0]
detboxesAreaTMP2 = [0,0,0,0]
def __init__(self, src=rtsp_Url):
# Create a VideoCapture object
self.capture = cv2.VideoCapture(src)
# Start the thread to read frames from the video stream
self.thread = Thread(target=self.update, args=())
self.thread.daemon = True
self.thread.start()
self.counter = 0
def update(self): # here the update of "raw" frames happen
# Read the next frame from the stream in a different thread
while True:
if self.capture.isOpened():
(self.status, self.frame) = self.capture.read()
def show_frame(self): # here inference and post processing of images happen
if __name__ == '__main__':
video_stream_widget = VideoStreamWidget(rtsp_Url)
with detection_graph.as_default():
with tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.35))) as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in ['num_detections', 'detection_boxes', 'detection_scores','detection_classes', 'detection_masks']:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name)
resfresherCounter = 0
while True:
# I think here it should be implemented somehow to close the stream and reopen it
try:
video_stream_widget.show_frame() # here the objec tdetection happens
except cv2.error as e:
print(e)
continue
My whole code is here:
https://pastebin.com/7qBFwwfy
Thank you in advance!
Related
I'm trying to write a program in Python, where the main thread will read depth frames from a RealSense camera and put them in a queue,
and another thread that will run inference on them with a YoloV5 TensorRT model. The program runs on a Jetson Nano.
For some reason, after reading about 15 frames the program crashes with the following error:
Traceback (most recent call last):
File "test2.py", line 59, in <module>
img = np.asanyarray(c.colorize(DEPTH).get_data())
RuntimeError: Error occured during execution of the processing block! See the log for more info
Here is the full code:
from queue import Queue
import numpy as np
from ObjectDetection.objectDetectionV2 import ODModel, letterbox
import torch
import time
from threading import Thread
import cv2
from Camera.Realsense import RealSense # custom class for reading from Realsense camera
def detect(queue):
while True:
if not queue.empty():
img0 = queue.get()
if img0 is None: break
img = letterbox(img0, 416, stride=32, auto=False)[0] # YoloV5 preprocessing
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
print("loading image...")
img = torch.tensor(img)
print("loaded image")
img = img.float() # uint8 to fp16/32
img /= 255 # 0 - 255 to 0.0 - 1.0
result = model(img)
print(result)
if __name__ == '__main__':
queue = Queue()
print("loading model")
model = ODModel()
print("model loaded")
rs = RealSense()
p = Thread(target=detect, args=(queue,))
c = rs.colorizer
p.start()
for i in range(100):
RGB, DEPTH = rs.getData()
img = np.asanyarray(c.colorize(DEPTH).get_data())
queue.put(img)
queue.put(None)
p.join()
model.destroy()
print("Exiting Main Thread")
I tried commenting everything out and checking line by line, and I think the error is because of the c.colorizer taking too much time? When I deleted it the error went away (but of course the inference failed).
If I don't remove it then the error appears after the line img = np.ascontiguousarray(img). But then why is the error not on this line?
If I limit the size of the queue to at most 14, the problem stops, but then the queue is blocking so everything slows down. Also the error mentions a log, but I have no idea where it is.
Can anyone help me understand what I did wrong? Thank you in advance.
this line reserves memory, and limiting the queue size also limits memory usage so you most likely ran out of memory.
a possible solution is to just limit the queue size to 1 sample, you always get the most recent result that is within the timeframe of your processing time.
another solution is to use a deque of say, 5 elements, your producer will append, and your consumer will pop to get the most recent item, and if the deque length is greater than 3 elements then your producer will popleft to keep the deque bounded, and the "counting" should be in the worker thread instead of the main thread, while the main thread will have an infinite loop to guarantee 100 images were processed before breaking out of the infinite loop. (simply switching role with the worker thread.)
Why would the subprocess take over the device camera? The problem here is I have a more complicated setup than the example, where I need to keep the subprocess running while being able to change the camera on the fly. It ends up throwing a resource busy error.
import time
import cv2 # '4.6.0'
from multiprocessing import Process
cap = cv2.VideoCapture('/dev/video0')
time.sleep(10)
# check `fuser /dev/video0` --> shows 1 PID is using the camera
# ex. /dev/video0: 11615m
def run():
print('running capture...')
# check `fuser /dev/video0` --> another PID is accessing the camera, why?
# ex. /dev/video0: 11615m 11783m
for _ in range(100):
cap.read()
time.sleep(0.05)
proc = Process(target=run)
proc.start()
proc.join()
These error can be reproduce with any adafruit device.These example is for GPS.
I have tested several adafruit products, they are all great quality. However they all seems to present the same problem when use with the multiprocessing module. The script dose not run and throws a Segmentation fault (core dumped). The script runs with threading but not multiprocessing.
These does not works:
import time
import board
import adafruit_bno055
import threading
import multiprocessing
fpsFilt = 0
timeStamp = 0
i2c = board.I2C()
sensor = adafruit_bno055.BNO055_I2C(i2c)
def test():
while True:
print("Quaternion: {}".format(sensor.quaternion))
Gps = multiprocessing.Process(target=test)
Gps.start()
But these works:
import time
import board
import adafruit_bno055
import threading
import multiprocessing
fpsFilt = 0
timeStamp = 0
i2c = board.I2C()
sensor = adafruit_bno055.BNO055_I2C(i2c)
def test():
while True:
print("Quaternion: {}".format(sensor.quaternion))
Gps = threading.Thread(target=test)
Gps.start()
Is there any way to use an adafruit product with multiprocessing?Thanks.
Try this program. I have eliminated all the global variables, initialized the device entirely in the secondary Process, and protected the program's entry point with a test for __main__. These are all standard practices when writing this type of program.
Otherwise it is the same code as your program.
import time
import board
import adafruit_bno055
import threading
import multiprocessing
def test():
i2c = board.I2C()
sensor = adafruit_bno055.BNO055_I2C(i2c)
while True:
print("Quaternion: {}".format(sensor.quaternion))
def main():
Gps = multiprocessing.Process(target=test)
Gps.start()
if __name__ == "__main__":
main()
while True:
time.sleep(1.0)
I have written a Python script that runs on a Raspberry Pi and utilizes the PiCamera library to capture video, the Python Image Library (PIL) to extract individual frames, and then does some image processing on it using DIPLib and OpenCV. The goal of the program is to continuously capture frames of 3D printer filament and return the diameter value. The actual image processing portion works just fine- it's the frame capture that is causing me issues.
I am following the PiCamera Rapid Capture and processing tutorial from PiCamera and using the Python Threading library as they have done to ideally utilize more of th Pi's processor for processing and not getting bottlenecked and falling behind.
The implementation of this code is built to "Drop Frames" when there are not any threads available for processing. As I understand it, this should prevent the Pi from storing any extra frames in the buffer for processing, thus preventing a memory overflow (not sure if that's the correct terminology) from happening. Unfortunately this is exactly what is happening.
I am running the PiCamera at about 3 frames-per-second which gives ~10 threads the ability to process all the incoming images, that is until the memory starts to overflow. However, if I leave the script running for 5-10 minutes, the memory (as shown using htop) slowly compounds until it reaches maximum capacity- at which point the script basically drops all incoming frames.
UPDATE: here is the error it shows:
Exception has occurred: MemoryError
exception: no description
File "/home/pi/Desktop/FilamentPuller_01/pi_camera_threading_03.py", line 45, in run
img = np.array(Image.open(self.stream)
My theory is that the video recording functionality of PiCamera is holding a buffer of some sort, but I am not sure how to see it or how to stop it from doing that. I've been using VSCode on the Pi to debug, and each thread doesn't seem to holding any more data at a time than they should- essentially there should be no reason for them to compound more data from one cycle to the next as all the variables are reused.
I have included my code below, please let me know what other information I can provide to help with solving this issue. Thank you for any insight you might have
import io
import sys
import time
import threading
import cv2
import numpy as np
import os
import picamera
from PIL import Image
import csv
from diameter_detection import diameter_detection
from moving_average import MovingAverageFilter
from serial_write import serial_write
##### CAMERA SETTINGS ######
focalValue = 40 # focus
cameraResolution = (1080, 1000)
cameraZoom = (0.3,0,0.3,0.8)
cameraFrameRate = 1
# create an array for storing filtered diameter values
filtered_dia_meas = []
# create moving average filter
ma5_filter = MovingAverageFilter(2)
class ImageProcessor(threading.Thread):
def __init__(self, owner):
super(ImageProcessor, self).__init__()
self.stream = io.BytesIO()
self.event = threading.Event()
self.terminated = False
self.owner = owner
self.start()
def run(self):
# This method runs in a separate thread
while not self.terminated:
# Wait for an image to be written to the stream
if self.event.wait(1):
try:
self.stream.seek(0)
# Read the image and do some processing on it
img = np.array(Image.open(self.stream))
try:
diameter = diameter_detection(img)
except:
serial_write(0)
print('Could not read diameter, pausing and retrying...')
time.sleep(0.1)
# add the diameter to the filter
ma5_filter.step(diameter)
#filtered_dia_meas.append(ma5_filter.current_state())
# display the current filtered diameter to the Terminal
print(ma5_filter.current_state())
try:
# attempt to send the diameter to the connected serial device
serial_write(ma5_filter.current_state())
except:
print('Serial write failed!')
# Set done to True if you want the script to terminate
# at some point
#self.owner.done=True
finally:
# Reset the stream and event
self.stream.seek(0)
self.stream.truncate()
self.event.clear()
# Return ourselves to the available pool
with self.owner.lock:
self.owner.pool.append(self)
class ProcessOutput(object):
def __init__(self):
self.done = False
# Construct a pool of 10 image processors along with a lock
# to control access between threads
self.lock = threading.Lock()
self.pool = [ImageProcessor(self) for i in range(10)]
print('Threaded processes created')
self.processor = None
def write(self, buf):
if buf.startswith(b'\xff\xd8'):
# New frame; set the current processor going and grab
# a spare one
if self.processor:
self.processor.event.set()
with self.lock:
if self.pool:
self.processor = self.pool.pop()
else:
# No processor's available, we'll have to skip this frame
print('Frame Skipped!')
self.processor = None
if self.processor:
self.processor.stream.write(buf)
def flush(self):
# When told to flush (end of recording), shut
# down in an orderly fashion. First, add the current processor
# back to the pool
if self.processor:
with self.lock:
self.pool.append(self.processor)
self.processor = None
# Now, empty the pool, joining each thread as we go
while True:
with self.lock:
try:
proc = self.pool.pop()
except IndexError:
pass # pool is empty
proc.terminated = True
proc.join()
with picamera.PiCamera(resolution=cameraResolution) as camera:
print('Succesfully created camera object')
camera.framerate = cameraFrameRate
camera.zoom = (0.3,0,0.3,0.8)
# set focus motor
os.system("i2cset -y 0 0x0c %d %d" % (focalValue,0))
print('Camera focus set')
time.sleep(2)
print('Starting recording...')
output = ProcessOutput()
camera.start_recording(output, format='mjpeg')
while not output.done:
camera.wait_recording(1)
camera.stop_recording()
I am trying to get inputs when I am playing the game (Warcraft III to be exact). Due to it being an RTS game it requires a high number of inputs in short periods. I am trying to use pyWinhook to get the inputs, mss to get screenshots and cv for image processing.
I have tried disabling image processing just after getting inputs to decrease the amount of work for the script. Nonetheless, it doesn't make much difference. I have also tried to decrease graphic details in-game - no difference also.
I am suspecting that thread is throttling due to high amount of inputs, and not very good hardware on which it's used (I am working at Lenovo X240 with 4GB of RAM).
import os
import cv2
import mss
import numpy as np
import pyWinhook as pyHook
import pythoncom
def main():
# file names for training data arrays
file_name = 'training_data.npy'
copy_file_name = 'training_data_copy.npy'
# deciding if previous file with data is saved. If yes, it is opened. If not it's created
if os.path.isfile(file_name):
print('File exists, loading previous data!')
print(os.path.realpath(file_name))
training_data = list(np.load(file_name, allow_pickle=True))
np.save(copy_file_name, training_data)
else:
print('File does not exist, starting fresh!')
training_data = []
# getting screenshot and processing it for optimal machine learning processing
def get_screen():
with mss.mss() as sct:
screen = np.array(sct.grab((0, 0, 1366, 768)))
screen = cv2.cvtColor(screen, cv2.COLOR_BGR2GRAY)
screen = cv2.resize(screen, (136, 76))
return screen
# saving data after acquiring 2500 sets of inputs and screenshots
def save_data(screen, output):
training_data.append([screen, output])
if len(training_data) % 2500 == 0:
print(len(training_data))
np.save(file_name, training_data)
print("Frames taken: " + str(len(training_data)))
# getting inputs and screen on mouse event
def OnMouseEvent(event):
screen = get_screen()
action = [event.Position, event.Wheel]
output = action
save_data(screen, output)
return True
# getting inputs and screen on keyboard event
def OnKeyboardEvent(event):
screen = get_screen()
output = event.Key
save_data(screen, output)
return True
# create a hook manager
hm = pyHook.HookManager()
# watch for all mouse events
hm.MouseAll = OnMouseEvent
hm.KeyUp = OnKeyboardEvent
# set the hook
hm.HookMouse()
hm.HookKeyboard()
# wait forever
try:
pythoncom.PumpMessages()
except KeyboardInterrupt:
pass
# looping getting data
while True:
pass
if __name__ == '__main__':
main()
I want to optimize getting inputs as much as possible because right now inputs in-game are lagging and hanging (which is not proper, because optimal inputs are the basics for machine learning modules).
I am fully aware that it can be an unsolvable problem due to slow hardware and too high amount of inputs per minute, but I want to try anyway.