Related
recently I was commissioned to use a robot already made in gazebo, using Ros which has a camera and I have been able to see that image through .py code. Now they ask me to filter colors (green, red and blue), I already have the high and low values, and I can do it with an image, but I don't know how to mix both programs, so that what is filtered by color, I only know what transmit the robot's camera and not an image.
All of this while using Open CV
Attached both codes
////////////////////////////////////////////////////////////////////////////////////////
Code to see the gazeebo virtual camera
////////////////////////////////////////////////////////////////////////////////////////
#!/usr/bin/env python
import rospy
from sensor_msgs.msg import Image
from cv_bridge import CvBridge, CvBridgeError
import cv2
bridge_object = CvBridge() # create the cv_bridge object
image_received = 0 #Flag to indicate that we have already received an image
cv_image = 0 #This is just to create the global variable cv_image
def show_image():
image_sub = rospy.Subscriber("/two_wheels_robot/camera1/image_raw",Image,camera_callback)
r = rospy.Rate(10) #10Hz
while not rospy.is_shutdown():
if image_received:
cv2.waitKey(1)
r.sleep()
cv2.destroyAllWindows()
def camera_callback(data):
global bridge_object
global cv_image
global image_received
image_received=1
try:
print("received ROS image, I will convert it to opencv")
# We select bgr8 because its the OpenCV encoding by default
cv_image = bridge_object.imgmsg_to_cv2(data, desired_encoding="bgr8")
#Add your code to save the image here:
#Save the image "img" in the current path
cv2.imwrite('robot_image.jpg', cv_image)
#Display the image in a window
cv2.imshow('Image from robot camera', cv_image)
except CvBridgeError as e:
print(e)
if __name__ == '__main__':
rospy.init_node('load_image', anonymous=True)
show_image()
////////////////////////////////////////////////////////////////////////////////////////
code to see an image segmented in RGB by windows
////////////////////////////////////////////////////////////////////////////////////////
#!/usr/bin/env python
import rospy
from sensor_msgs.msg import Image
#Import the numpy library which will help with some matrix operations
import numpy as np
from cv_bridge import CvBridge, CvBridgeError
import cv2
bridge_object = CvBridge() # create the cv_bridge object
image_received = 0 #Flag to indicate that we have already received an image
cv_image = 0 #This is just to create the global variable cv_image
def show_image():
image_sub = rospy.Subscriber("/two_wheels_robot/camera1/image_raw",Image,camera_callback)
r = rospy.Rate(10) #10Hz
image = cv2.imread('/home/user/catkin_ws/src/opencv_for_robotics_images/Unit_2/Course_images/Filtering.png')
while not rospy.is_shutdown():
#I resized the image so it can be easier to work with
image = cv2.resize(image,(300,300))
#Once we read the image we need to change the color space to HSV
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
#Hsv limits are defined
#here is where you define the range of the color you're looking for
#each value of the vector corresponds to the H,S & V values respectively
min_green = np.array([50,220,220])
max_green = np.array([60,255,255])
min_red = np.array([170,220,220])
max_red = np.array([180,255,255])
min_blue = np.array([110,220,220])
max_blue = np.array([120,255,255])
#This is the actual color detection
#Here we will create a mask that contains only the colors defined in your limits
#This mask has only one dimension, so its black and white }
mask_g = cv2.inRange(hsv, min_green, max_green)
mask_r = cv2.inRange(hsv, min_red, max_red)
mask_b = cv2.inRange(hsv, min_blue, max_blue)
#We use the mask with the original image to get the colored post-processed image
res_b = cv2.bitwise_and(image, image, mask= mask_b)
res_g = cv2.bitwise_and(image,image, mask= mask_g)
res_r = cv2.bitwise_and(image,image, mask= mask_r)
cv2.imshow('Green',res_g)
cv2.imshow('Red',res_b)
cv2.imshow('Blue',res_r)
cv2.imshow('Original',image)
cv2.waitKey(1)
r.sleep()
cv2.destroyAllWindows()
def camera_callback(data):
global bridge_object
global cv_image
global image_received
image_received=1
try:
print("received ROS image, I will convert it to opencv")
# We select bgr8 because its the OpenCV encoding by default
cv_image = bridge_object.imgmsg_to_cv2(data, desired_encoding="bgr8")
except CvBridgeError as e:
print(e)
if __name__ == '__main__':
rospy.init_node('opencv_example1', anonymous=True)
show_image()
First of all there a lot structural problems in that code. You should consider using Classes and follow the ROS philosophy by implementing separates nodes that communicates with each other. Please take a look also to the rospy tutorials.
To "mix" both programs you can either create a function that processes the color in your first program that receives the image
def process_image(image):
image = cv2.resize(image,(300,300))
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
min_green = np.array([50,220,220])
max_green = np.array([60,255,255])
min_red = np.array([170,220,220])
max_red = np.array([180,255,255])
min_blue = np.array([110,220,220])
max_blue = np.array([120,255,255])
mask_g = cv2.inRange(hsv, min_green, max_green)
mask_r = cv2.inRange(hsv, min_red, max_red)
mask_b = cv2.inRange(hsv, min_blue, max_blue)
res_b = cv2.bitwise_and(image, image, mask= mask_b)
res_g = cv2.bitwise_and(image,image, mask= mask_g)
res_r = cv2.bitwise_and(image,image, mask= mask_r)
cv2.imshow('Green',res_g)
cv2.imshow('Red',res_b)
cv2.imshow('Blue',res_r)
cv2.imshow('Original',image)
cv2.waitKey(1)
And calling it once every time you receive an image (inside the callback). Usually it's preferable to divide all this processing in multiple threads, but, as a start point you could use something like this:
#!/usr/bin/env python
import rospy
from sensor_msgs.msg import Image
from cv_bridge import CvBridge, CvBridgeError
import cv2
bridge_object = CvBridge() # create the cv_bridge object
image_received = 0 #Flag to indicate that we have already received an image
cv_image = 0 #This is just to create the global variable cv_image
def show_image():
image_sub = rospy.Subscriber("/two_wheels_robot/camera1/image_raw",Image,camera_callback)
r = rospy.Rate(10) #10Hz
while not rospy.is_shutdown():
if image_received:
cv2.waitKey(1)
r.sleep()
cv2.destroyAllWindows()
def process_image(image):
image = cv2.resize(image,(300,300))
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
min_green = np.array([50,220,220])
max_green = np.array([60,255,255])
min_red = np.array([170,220,220])
max_red = np.array([180,255,255])
min_blue = np.array([110,220,220])
max_blue = np.array([120,255,255])
mask_g = cv2.inRange(hsv, min_green, max_green)
mask_r = cv2.inRange(hsv, min_red, max_red)
mask_b = cv2.inRange(hsv, min_blue, max_blue)
res_b = cv2.bitwise_and(image, image, mask= mask_b)
res_g = cv2.bitwise_and(image,image, mask= mask_g)
res_r = cv2.bitwise_and(image,image, mask= mask_r)
cv2.imshow('Green',res_g)
cv2.imshow('Red',res_b)
cv2.imshow('Blue',res_r)
cv2.imshow('Original',image)
cv2.waitKey(1)
def camera_callback(data):
global bridge_object
global cv_image
global image_received
image_received=1
try:
print("received ROS image, I will convert it to opencv")
# We select bgr8 because its the OpenCV encoding by default
cv_image = bridge_object.imgmsg_to_cv2(data, desired_encoding="bgr8")
#Add your code to save the image here:
#Save the image "img" in the current path
cv2.imwrite('robot_image.jpg', cv_image)
## Calling the processing function
process_image(cv_image)
cv2.imshow('Image from robot camera', cv_image)
except CvBridgeError as e:
print(e)
if __name__ == '__main__':
rospy.init_node('load_image', anonymous=True)
show_image()
You should also avoid declaring globals variables inside a function.
I have the code below for detecting cirtain blobs in an image. Before implementing the code with tkinter the camera code worked fine.
now that i combined it it puts ou the incorrect buffer error.
I tried implementing the rawCapture.truncate(0) function at the end but that resolves in there not being displayed any gui display.
Does anyone have any clue why that is?
Below is the code, i am a beginner in python so no doubt it is messy.
import cv2
import numpy as np
from tkinter import*
from PIL import Image, ImageTk
from picamera.array import PiRGBArray
from picamera import PiCamera
import time
# initialize the camera and grab a reference to the raw camera capture
camera = PiCamera()
camera.resolution = (320,240)
camera.brightness = 16
camera.framerate = 10
rawCapture = PiRGBArray(camera, size=(320,240))
camera.zoom=(0.495,0.435,0.073,0.073)
win = Tk()
win.geometry("670x600+200+30")
win.resizable(False, False)
w = 320
h = 240
color = "#581845"
frame_1 = Frame(win, width=670, height=700, bg=color).place(x=0, y=0)
var7 = IntVar()
var8 = IntVar()
W = 150
thresh = Scale(frame_1, label="thresh1", from_=0, to=255, orient=HORIZONTAL, variable=var7, activebackground='#339999')
thresh.set(0)
thresh.place(x=500, y=10, width=W)
thresh2 = Scale(frame_1, label="thresh2", from_=255, to=0, orient=HORIZONTAL, variable=var8, activebackground='#339999')
thresh2.set(255)
thresh2.place(x=500, y=80, width=W)
cap = cv2.VideoCapture(0)
label1 = Label(frame_1, width=w, height=h)
label1.place(x=10, y=160)
label2 = Label(frame_1, width=w, height=h)
label2.place(x=350, y=160)
label3 = Label(frame_1, width=w, height=h)
label3.place(x=10, y=370)
label4 = Label(frame_1, width=w, height=h)
label4.place(x=350, y=370)
#def select_img():
# _, img = cap.read()
for frame in camera.capture_continuous(rawCapture, format="bgr", use_video_port=False):
# grab the raw NumPy array representing the image, then initialize the timestamp
# and occupied/unoccupied text
image = frame.array
grayFrame = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(grayFrame, 50, 255, cv2.THRESH_BINARY)
# Read image
im = image
# Set up the detector with default parameters.
im=cv2.bitwise_not(im)
params = cv2.SimpleBlobDetector_Params()
params.minThreshold = 40
params.maxThreshold = 255
params.filterByArea = True
params.minArea = 80
params.filterByCircularity = False
params.filterByConvexity = False
params.filterByInertia = False
detector = cv2.SimpleBlobDetector_create(params)
# Detect blobs.
keypoints = detector.detect(im)
im=cv2.bitwise_not(im)
# Draw detected blobs as red circles.
# cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS ensures the size of the circle corresponds to the size of blob
im_with_keypoints_or = cv2.drawKeypoints(im, keypoints, np.array([]), (255,255,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
im_with_keypoints_bi = cv2.drawKeypoints(thresh, keypoints, np.array([]), (0,0,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
im_with_keypoints_gr = cv2.drawKeypoints(grayFrame, keypoints, np.array([]), (0,0,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
file = open('color.txt', 'w')
file.write("l_b = " + str(50) + '\n' + "u_b = " + str(50))
file.close()
#res = cv2.bitwise_and(im, im, mask=mask)
#rgb2 = cv2.cvtColor(res, cv2.COLOR_BGR2RGB)
image = Image.fromarray(im_with_keypoints_or)
iago = ImageTk.PhotoImage(image)
label1.configure(image=iago)
label1.image = iago
image_2 = Image.fromarray(im_with_keypoints_bi)
iago_2 = ImageTk.PhotoImage(image_2)
label2.configure(image=iago_2)
label2.image = iago_2
image4 = Image.fromarray(im_with_keypoints_gr)
iago4 = ImageTk.PhotoImage(image4)
label4.configure(image=iago4)
label4.image = iago4
#win.after(10, select_img)
#rawCapture.truncate(0)
select_img()
win.mainloop()
I am trying to use the LK Optical Flow from this tutorial, to get some motion estimation into a robot simulation made with ROS+Gazebo.
I could manage to make properly the bridge between ROS and OpenCV via cv_bridge, as per the code below, and could implement some sample features which work "frame-by-frame" without major issues.
However, the optical flow tutorial reference seems to accept only video inputs, such as webcam and/or saved video files, and this is where I got stuck.
How could I apply the LK Optical flow in a "frame-by-frame" approach, or configure my cv_bridge to act as a "custom" camera device?
This is my cv_brige so far:
#!/usr/bin/env python
import roslib
import rospy
import sys
import cv2
import numpy as np
from sensor_msgs.msg import Image, CameraInfo
from cv_bridge import CvBridge, CvBridgeError
class OpticalFlow(object):
def __init__(self):
#Setting up the bridge and the subscriber
self.bridge = CvBridge()
self.image_sub = rospy.Subscriber("/realsense/color/image_raw", Image,self.image_callback)
# Parameters for Good Features Detection
self.feature_params = dict( maxCorners = 100,
qualityLevel = 0.3,
minDistance = 7,
blockSize = 7 )
# Parameters for Lucas-Kanade optical flow
self.lk_params = dict( winSize = (150,150),
maxLevel = 2,
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
# Create some random colors for the tracking points
self.color = np.random.randint(0,255,(100,3))
def image_callback(self,ros_image):
# Using cv_bridge() to convert the ROS image to OpenCV format
try:
cv_image = self.bridge.imgmsg_to_cv2(ros_image, "bgr8")#bgr8
hsv_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2HSV)
gray = cv2.cvtColor(cv_image, cv2.COLOR_BGR2GRAY)
except CvBridgeError as e:
print(e)
cv2.imshow("Robot Image", cv_image) # The actual (non-processed image from the simulation/robot)
#cv2.imshow('Image HSV', hsv_image)
#cv2.imshow('Image Gray', gray)
frame = np.array(cv_image, dtype=np.uint8)
# Calling the Optical Flow Function
display = self.optical_flow(frame,self.feature_params,self.lk_params,self.color)
def optical_flow(self,frame,feature_params,lk_params,color):
old_frame = frame
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
p0 = cv2.goodFeaturesToTrack(old_gray, mask = None, **feature_params)
mask = np.zeros_like(old_frame)
while(1):
newframe = frame
frame_gray = cv2.cvtColor(newframe, cv2.COLOR_BGR2GRAY)
# Calculating the optical flow
p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
# Select good points
good_new = p1[st==1]
good_old = p0[st==1]
# Draw the tracks
for i,(new,old) in enumerate(zip(good_new, good_old)):
a,b = new.ravel()
c,d = old.ravel()
mask = cv2.line(mask, (a,b),(c,d), color[i].tolist(), 2)
frame = cv2.circle(frame,(a,b),5,color[i].tolist(),-1)
img = cv2.add(frame,mask)
cv2.imshow('LK_Flow',img)
k = cv2.waitKey(30) & 0xff
if k == 27:
break
# Now update the previous frame and previous points
old_gray = frame_gray.copy()
p0 = good_new.reshape(-1,1,2)
def main():
optical_flow_object = OpticalFlow()
rospy.init_node('KLT_Node', anonymous=True)
rospy.loginfo("\nWaiting for image topics...\n...")
try:
rospy.spin()
except KeyboardInterrupt:
rospy.loginfo("\nShutting Down...\n...")
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
I also would like to know, if possible, where I can find more information about how to manipulate those parameters (from the tutorial):
# params for ShiTomasi corner detection
feature_params = dict( maxCorners = 100,
qualityLevel = 0.3,
minDistance = 7,
blockSize = 7 )
# Parameters for lucas kanade optical flow
lk_params = dict( winSize = (15,15),
maxLevel = 2,
criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
I would recommend using cv2.calcOpticalFlowPyrLK(old_frame, cur_frame, ...) or cv2.calcOpticalFlowFarneback(old_frame, cur_frame, ...) (dense optical flow). There is a bunch of information about these methods on the cv2 website. From personal experience, these methods work great!
Let me know if you have any questions or problems!
I'm trying to use tensorflow lite in raspberry pi to detect specific category (motorcycle only) using the pre-trained model. Since the motorcycle category is already existing in the pre-trained model, I assume that I don't need any to retrain it. Is there anyway to remove other objects in the model? I am using the code from Edje Electronics provided by this link: https://github.com/EdjeElectronics/TensorFlow-Lite-Object-Detection-on-Android-and-Raspberry-Pi
'''
# Import packages
import os
import argparse
import cv2
import numpy as np
import sys
import time
from threading import Thread
import importlib.util
from utils import label_map_util
from utils import visualization_utils as vis_util
# Define VideoStream class to handle streaming of video from webcam in separate processing thread
class VideoStream:
"""Camera object that controls video streaming from the Picamera"""
def __init__(self,resolution=(640,480),framerate=30):
# Initialize the PiCamera and the camera image stream
self.stream = cv2.VideoCapture(0)
ret = self.stream.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
ret = self.stream.set(3,resolution[0])
ret = self.stream.set(4,resolution[1])
# Read first frame from the stream
(self.grabbed, self.frame) = self.stream.read()
# Variable to control when the camera is stopped
self.stopped = False
def start(self):
# Start the thread that reads frames from the video stream
Thread(target=self.update,args=()).start()
return self
def update(self):
# Keep looping indefinitely until the thread is stopped
while True:
# If the camera is stopped, stop the thread
if self.stopped:
# Close camera resources
self.stream.release()
return
# Otherwise, grab the next frame from the stream
(self.grabbed, self.frame) = self.stream.read()
def read(self):
# Return the most recent frame
return self.frame
def stop(self):
# Indicate that the camera and thread should be stopped
self.stopped = True
# Define and parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('--modeldir', help='Folder the .tflite file is located in',
required=True)
parser.add_argument('--graph', help='Name of the .tflite file, if different than detect.tflite',
default='detect.tflite')
parser.add_argument('--labels', help='Name of the labelmap file, if different than labelmap.txt',
default='labelmap.txt')
parser.add_argument('--threshold', help='Minimum confidence threshold for displaying detected objects',
default=0.5)
parser.add_argument('--resolution', help='Desired webcam resolution in WxH. If the webcam does not support the resolution entered, errors may occur.',
default='1280x720')
parser.add_argument('--edgetpu', help='Use Coral Edge TPU Accelerator to speed up detection',
action='store_true')
args = parser.parse_args()
MODEL_NAME = args.modeldir
GRAPH_NAME = args.graph
LABELMAP_NAME = args.labels
min_conf_threshold = float(args.threshold)
resW, resH = args.resolution.split('x')
imW, imH = int(resW), int(resH)
use_TPU = args.edgetpu
# Import TensorFlow libraries
# If tflite_runtime is installed, import interpreter from tflite_runtime, else import from regular tensorflow
# If using Coral Edge TPU, import the load_delegate library
pkg = importlib.util.find_spec('tflite_runtime')
if pkg:
from tflite_runtime.interpreter import Interpreter
if use_TPU:
from tflite_runtime.interpreter import load_delegate
else:
from tensorflow.lite.python.interpreter import Interpreter
if use_TPU:
from tensorflow.lite.python.interpreter import load_delegate
# If using Edge TPU, assign filename for Edge TPU model
if use_TPU:
# If user has specified the name of the .tflite file, use that name, otherwise use default 'edgetpu.tflite'
if (GRAPH_NAME == 'detect.tflite'):
GRAPH_NAME = 'edgetpu.tflite'
# Get path to current working directory
CWD_PATH = os.getcwd()
# Path to .tflite file, which contains the model that is used for object detection
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,GRAPH_NAME)
# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,LABELMAP_NAME)
# Load the label map
with open(PATH_TO_LABELS, 'r') as f:
labels = [line.strip() for line in f.readlines()]
# Have to do a weird fix for label map if using the COCO "starter model" from
# https://www.tensorflow.org/lite/models/object_detection/overview
# First label is '???', which has to be removed.
if labels[0] == '???':
del(labels[0])
# Load the Tensorflow Lite model.
# If using Edge TPU, use special load_delegate argument
if use_TPU:
interpreter = Interpreter(model_path=PATH_TO_CKPT,
experimental_delegates=[load_delegate('libedgetpu.so.1.0')])
print(PATH_TO_CKPT)
else:
interpreter = Interpreter(model_path=PATH_TO_CKPT)
interpreter.allocate_tensors()
# Get model details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = (input_details[0]['dtype'] == np.float32)
input_mean = 127.5
input_std = 127.5
# Initialize frame rate calculation
frame_rate_calc = 1
freq = cv2.getTickFrequency()
# Initialize video stream
videostream = VideoStream(resolution=(imW,imH),framerate=30).start()
time.sleep(1)
#for frame1 in camera.capture_continuous(rawCapture, format="bgr",use_video_port=True):
while True:
# Start timer (for calculating frame rate)
t1 = cv2.getTickCount()
# Grab frame from video stream
frame1 = videostream.read()
# Acquire frame and resize to expected shape [1xHxWx3]
frame = frame1.copy()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_resized = cv2.resize(frame_rgb, (width, height))
input_data = np.expand_dims(frame_resized, axis=0)
# Normalize pixel values if using a floating model (i.e. if model is non-quantized)
if floating_model:
input_data = (np.float32(input_data) - input_mean) / input_std
# Perform the actual detection by running the model with the image as input
interpreter.set_tensor(input_details[0]['index'],input_data)
interpreter.invoke()
# Retrieve detection results
boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects
classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects
scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects
#num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed)
# Loop over all detections and draw detection box if confidence is above minimum threshold
for i in range(len(scores)):
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
# Get bounding box coordinates and draw box
# Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min()
ymin = int(max(1,(boxes[i][0] * imH)))
xmin = int(max(1,(boxes[i][1] * imW)))
ymax = int(min(imH,(boxes[i][2] * imH)))
xmax = int(min(imW,(boxes[i][3] * imW)))
cv2.rectangle(frame, (xmin,ymin), (xmax,ymax), (10, 255, 0), 2)
# Draw label
object_name = labels[int(classes[i])] # Look up object name from "labels" array using class index
label = '%s: %d%%' % (object_name, int(scores[i]*100)) # Example: 'person: 72%'
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size
label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window
cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in
cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text
# Draw framerate in corner of frame
cv2.putText(frame,'FPS: {0:.2f}'.format(frame_rate_calc),(30,50),cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),2,cv2.LINE_AA)
# All the results have been drawn on the frame, so it's time to display it.
cv2.imshow('Object detector', frame)
# Calculate framerate
t2 = cv2.getTickCount()
time1 = (t2-t1)/freq
frame_rate_calc= 1/time1
# Press 'q' to quit
if cv2.waitKey(1) == ord('q'):
break
# Clean up
cv2.destroyAllWindows()
videostream.stop()
'''
I've solved my own problem by adding these lines of code inside the for loop.
for i in range(len(scores)):
if classes[i] != 3:
scores[i]=0
if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)):
NOTE: number 3 represents the motorcycle label in the labelmap
In my situation, the data returned by the OpenCV StereoBM Depth Map does not make sense Regardless of Parameter Tuning.
I'm doing research for a design project that involves OpenCV and using stereo vision to generate a depth map. I'm currently successfully able to load both my web cameras and generate a depth map using the StereoBM. However, the result data isn't useful at the moment as my screenshot demonstrates below. So I created a small python app that helps me tune the StereoBM parameters which hasn't helped.
My question do the cameras have to be calibrated in order to be used with the StereoBM function?
If not, what are some alternatives to help me improve my results (i.e. increase resolution, use StereoSBGM, etc.)
Code
import cv2
import time
import numpy as np
from Tkinter import *
oldVal = 15
def oddVals(n):
global oldVal
n = int(n)
if not n % 2:
window_size.set(n+1 if n > oldVal else n-1)
oldVal = window_size.get()
minDispValues = [16,32,48,64]
def minDispCallback(n):
n = int(n)
newvalue = min(minDispValues, key=lambda x:abs(x-float(n)))
min_disp.set(newvalue)
# Display the sliders to control the stereo vision
master = Tk()
master.title("StereoBM Settings");
min_disp = Scale(master, from_=16, to=64, command=minDispCallback, length=600, orient=HORIZONTAL, label="Minimum Disparities")
min_disp.pack()
min_disp.set(16)
window_size = Scale(master, from_=5, to=255, command=oddVals, length=600, orient=HORIZONTAL, label="Window Size")
window_size.pack()
window_size.set(15)
Disp12MaxDiff = Scale(master, from_=5, to=30, length=600, orient=HORIZONTAL, label="Max Difference")
Disp12MaxDiff.pack()
Disp12MaxDiff.set(0)
UniquenessRatio = Scale(master, from_=0, to=30, length=600, orient=HORIZONTAL, label="Uniqueness Ratio")
UniquenessRatio.pack()
UniquenessRatio.set(15)
SpeckleRange = Scale(master, from_=0, to=60, length=600, orient=HORIZONTAL, label="Speckle Range")
SpeckleRange.pack()
SpeckleRange.set(34)
SpeckleWindowSize = Scale(master, from_=60, to=150, length=600, orient=HORIZONTAL, label="Speckle Window Size")
SpeckleWindowSize.pack()
SpeckleWindowSize.set(100)
master.update()
vcLeft = cv2.VideoCapture(0) # Load video campture for the left camera
#vcLeft.set(cv2.cv.CV_CAP_PROP_FRAME_WIDTH,420);
#vcLeft.set(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT,340);
vcLeft.set(3,640) # Set camera width
vcLeft.set(4,480) # Set camera height
vcRight = cv2.VideoCapture(1) # Load video capture for the right camera
#vcRight.set(cv2.cv.CV_CAP_PROP_FRAME_WIDTH,420);
#vcRight.set(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT,340);
firstTime = time.time() # First time log
totalFramesPassed = 0 # Number of frames passed
if vcLeft.isOpened() and vcRight.isOpened():
rvalLeft, frameLeft = vcLeft.read()
rvalRight, frameRight = vcRight.read()
else:
rvalLeft = False
rvalRight = False
while rvalLeft and rvalRight: # If the cameras are opened
rvalLeft, frameLeft = vcLeft.read()
rvalRight, frameRight = vcRight.read()
cv2.putText(frameLeft, "FPS : " + str(totalFramesPassed / (time.time() - firstTime)),(40, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 150, 2, 10)
cv2.imshow("Left Camera", frameLeft)
cv2.putText(frameRight, "FPS : " + str(totalFramesPassed / (time.time() - firstTime)),(40, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, 150, 2, 10)
cv2.imshow("Right Camera", frameRight)
frameLeftNew = cv2.cvtColor(frameLeft, cv2.COLOR_BGR2GRAY)
frameRightNew = cv2.cvtColor(frameRight, cv2.COLOR_BGR2GRAY)
num_disp = 112 - min_disp.get()
stereo = cv2.StereoBM_create(numDisparities = num_disp, blockSize = window_size.get())
stereo.setMinDisparity(min_disp.get())
stereo.setNumDisparities(num_disp)
stereo.setBlockSize(window_size.get())
stereo.setDisp12MaxDiff(Disp12MaxDiff.get())
stereo.setUniquenessRatio(UniquenessRatio.get())
stereo.setSpeckleRange(SpeckleRange.get())
stereo.setSpeckleWindowSize(SpeckleWindowSize.get())
disparity = stereo.compute(frameLeftNew, frameRightNew).astype(np.float32) / 16.0
disp_map = (disparity - min_disp.get())/num_disp
cv2.imshow("Disparity", disp_map)
master.update() # Update the slider options
key = cv2.waitKey(20)
totalFramesPassed = totalFramesPassed + 1 # One frame passed, increment
if key == 27:
break
vcLeft.release()
vcRight.release()
As stated in the opencv documentation of StereoBM opencv stereoBM doc the two images needs to be a "rectified stereo pair".
This means that before you compute the disparity you will need to rectify the two cameras.
Have a look at stereo_match where you can see how to rectify the two cameras before you compute the disparity.
When you are computing the disparity using the stereoBM you are looking at the correspondence of the parallel epipolar lines in both images.
This means that the images are expected to be aligned in such a away that the same rows in both images correspond to the same lines in space. The rectification process takes care of that.
For more information look at Rectification with opencv
I found out that we need to rectify the pair in order to use the StereoBM function. Furthermore, I found out that although it is more resource intensive, the StereoSGBM function gave me more optimal results.
In case anyone needs to calibrate their cameras in the future, you can use this code to help you do so:
# Imports
import cv2
import numpy as np
# Constants
leftCameraNumber = 2 # Number for left camera
rightCameraNumber = 1 # Number for right camera
numberOfChessRows = 6
numberOfChessColumns = 8
chessSquareSize = 30 # Length of square in millimeters
numberOfChessColumns = numberOfChessColumns - 1 # Update to reflect how many corners are inside the chess board
numberOfChessRows = numberOfChessRows - 1
objp = np.zeros((numberOfChessColumns*numberOfChessRows,3), np.float32)
objp[:,:2] = np.mgrid[0:numberOfChessRows,0:numberOfChessColumns].T.reshape(-1,2)*chessSquareSize
objectPoints = []
leftImagePoints = []
rightImagePoints = []
parameterCriteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# Code
print("Press \"n\" when you're done caputing checkerboards.")
vcLeft = cv2.VideoCapture(leftCameraNumber) # Load video campture for the left camera
vcLeft.set(cv2.CAP_PROP_FRAME_WIDTH,640*3/2);
vcLeft.set(cv2.CAP_PROP_FRAME_HEIGHT,480*3/2);
vcRight = cv2.VideoCapture(rightCameraNumber) # Load video capture for the right camera
vcRight.set(cv2.CAP_PROP_FRAME_WIDTH,640*3/2);
vcRight.set(cv2.CAP_PROP_FRAME_HEIGHT,480*3/2);
if vcLeft.isOpened() and vcRight.isOpened():
rvalLeft, frameLeft = vcLeft.read()
rvalRight, frameRight = vcRight.read()
else:
rvalLeft = False
rvalRight = False
# Number of succesful recognitions
checkerboardRecognitions = 0
while rvalLeft and rvalRight: # If the cameras are opened
vcLeft.grab();
vcRight.grab();
rvalLeft, frameLeft = vcLeft.retrieve()
rvalRight, frameRight = vcRight.retrieve()
frameLeftNew = cv2.cvtColor(frameLeft, cv2.COLOR_BGR2GRAY)
frameRightNew = cv2.cvtColor(frameRight, cv2.COLOR_BGR2GRAY)
foundPatternLeft, cornersLeft = cv2.findChessboardCorners(frameLeftNew, (numberOfChessRows, numberOfChessColumns), None, cv2.CALIB_CB_ADAPTIVE_THRESH + cv2.CALIB_CB_NORMALIZE_IMAGE + cv2.CALIB_CB_FAST_CHECK)
foundPatternRight, cornersRight = cv2.findChessboardCorners(frameRightNew, (numberOfChessRows, numberOfChessColumns), None, cv2.CALIB_CB_ADAPTIVE_THRESH + cv2.CALIB_CB_NORMALIZE_IMAGE + cv2.CALIB_CB_FAST_CHECK)
if foundPatternLeft and foundPatternRight: # If found corners in this frame
# Process the images and display the count of checkboards in our array
checkerboardRecognitions = checkerboardRecognitions + 1
print("Checker board recognitions: " + str(checkerboardRecognitions))
objectPoints.append(objp)
exactCornersLeft = cv2.cornerSubPix(frameLeftNew, cornersLeft, (11, 11), (-1, -1), parameterCriteria);
leftImagePoints.append(exactCornersLeft)
exactCornersRight = cv2.cornerSubPix(frameRightNew, cornersRight, (11, 11), (-1, -1), parameterCriteria);
rightImagePoints.append(exactCornersRight)
frameLeft = cv2.drawChessboardCorners(frameLeft, (numberOfChessRows, numberOfChessColumns), (exactCornersLeft), True);
frameRight = cv2.drawChessboardCorners(frameRight, (numberOfChessRows, numberOfChessColumns), (exactCornersRight), True);
# Display current webcams regardless if board was found or not
cv2.imshow("Left Camera", frameLeft)
cv2.imshow("Right Camera", frameRight)
key = cv2.waitKey(250) # Give the frame some time
if key == ord('n'):
break
cameraMatrixLeft = np.zeros( (3,3) )
cameraMatrixRight = np.zeros( (3,3) )
distortionLeft = np.zeros( (8,1) )
distortionRight = np.zeros( (8,1) )
height, width = frameLeft.shape[:2]
rms, leftMatrix, leftDistortion, rightMatrix, rightDistortion, R, T, E, F = cv2.stereoCalibrate(objectPoints, leftImagePoints, rightImagePoints, cameraMatrixLeft, distortionLeft, cameraMatrixRight, distortionRight, (width, height),parameterCriteria, flags=0)
arr1 = np.arange(8).reshape(2, 4)
arr2 = np.arange(10).reshape(2, 5)
np.savez('camera_calibration.npz', leftMatrix=leftMatrix, leftDistortion=leftDistortion, rightMatrix=rightMatrix, rightDistortion=rightDistortion, R=R, T=T, E=E, F=F)
print("Calibration Settings Saved to File!")
print("RMS:")
print(rms)
print("Left Matrix:")
print(leftMatrix)
print("Left Distortion:")
print(leftDistortion)
print("Right Matrix:")
print(rightMatrix)
print("Right Distortion:")
print(rightDistortion)
print("R:")
print(R)
print("T:")
print(T)
print("E:")
print(E)
print("F:")
print(F)
leftRectTransform, rightRectTransform, leftProjMatrix, rightProjMatrix, _, _, _ = cv2.stereoRectify(leftMatrix, leftDistortion, rightMatrix, rightDistortion, (width, height), R, T, alpha=-1);
leftMapX, leftMapY = cv2.initUndistortRectifyMap(leftMatrix, leftDistortion, leftRectTransform, leftProjMatrix, (width, height), cv2.CV_32FC1);
rightMapX, rightMapY = cv2.initUndistortRectifyMap(rightMatrix, rightDistortion, rightRectTransform, rightProjMatrix, (width, height), cv2.CV_32FC1);
minimumDisparities = 0
maximumDisparities = 128
stereo = cv2.StereoSGBM_create(minimumDisparities, maximumDisparities, 18)
while True: # If the cameras are opened
vcLeft.grab();
vcRight.grab();
rvalLeft, frameLeft = vcLeft.retrieve()
rvalRight, frameRight = vcRight.retrieve()
frameLeftNew = cv2.cvtColor(frameLeft, cv2.COLOR_BGR2GRAY)
frameRightNew = cv2.cvtColor(frameRight, cv2.COLOR_BGR2GRAY)
leftRectified = cv2.remap(frameLeftNew, leftMapX, leftMapY, cv2.INTER_LINEAR);
rightRectified = cv2.remap(frameRightNew, rightMapX, rightMapY, cv2.INTER_LINEAR);
disparity = stereo.compute(leftRectified, rightRectified)
cv2.filterSpeckles(disparity, 0, 6000, maximumDisparities);
cv2.imshow("Normalized Disparity", (disparity/16.0 - minimumDisparities)/maximumDisparities);
cv2.imshow("Left Camera", leftRectified)
cv2.imshow("Right Camera", rightRectified)
key = cv2.waitKey(10) # Give the frame some time
if key == 27:
break
print("Finished!")