I am following this tutorial. After cloning her repository and getting the "track" command to work, I wanted to try to integrate a scanning function.
I went into her manager.py script and added my scanning procedure in the set_servos function as shown below (in bold). This runs in the servos_process:
import logging
from multiprocessing import Value, Process, Manager, Queue
import pantilthat as pth
import signal
import sys
import time
import RPi.GPIO as GPIO
from rpi_deep_pantilt.detect.util.visualization import visualize_boxes_and_labels_on_image_array
from rpi_deep_pantilt.detect.camera import run_pantilt_detect
from rpi_deep_pantilt.control.pid import PIDController
GPIO.setmode(GPIO.BCM)
GPIO.setwarnings(False)
GPIO.setup(8,GPIO.OUT)
logging.basicConfig()
LOGLEVEL = logging.getLogger().getEffectiveLevel()
RESOLUTION = (320, 320)
SERVO_MIN = -90
SERVO_MAX = 90
CENTER = (
RESOLUTION[0] // 2,
RESOLUTION[1] // 2
)
# function to handle keyboard interrupt
def signal_handler(sig, frame):
# print a status message
print("[INFO] You pressed `ctrl + c`! Exiting...")
# disable the servos
pth.servo_enable(1, False)
pth.servo_enable(2, False)
GPIO.output(8,GPIO.LOW)
# exit
sys.exit()
def in_range(val, start, end):
# determine the input value is in the supplied range
return (val >= start and val <= end)
def set_servos(pan, tilt, scan):
# signal trap to handle keyboard interrupt
signal.signal(signal.SIGINT, signal_handler)
**
#visualize_boxes_and_labels_on_image_array()
print(scan.value) # output: 't'
while scan.value == 't':**
print('Scanning')
pth.servo_one(90)
pth.servo_two(25)
time.sleep(10)
pth.servo_one(30)
pth.servo_two(25)
time.sleep(10)
pth.servo_one(-30)
pth.servo_two(25)
time.sleep(10)
pth.servo_one(-90)
pth.servo_two(25)
time.sleep(10)
pth.servo_one(-30)
pth.servo_two(25)
time.sleep(10)
pth.servo_one(30)
pth.servo_two(25)
pth.time.sleep(10)
pth.servo_one(90)
pth.servo_two(25)
time.sleep(10)
continue
while True:
pan_angle = -1 * pan.value
tilt_angle = tilt.value
# if the pan angle is within the range, pan
if in_range(pan_angle, SERVO_MIN, SERVO_MAX):
pth.pan(pan_angle)
else:
logging.info(f'pan_angle not in range {pan_angle}')
if in_range(tilt_angle, SERVO_MIN, SERVO_MAX):
pth.tilt(tilt_angle)
else:
logging.info(f'tilt_angle not in range {tilt_angle}')
def pid_process(output, p, i, d, box_coord, origin_coord, action):
# signal trap to handle keyboard interrupt
signal.signal(signal.SIGINT, signal_handler)
# create a PID and initialize it
p = PIDController(p.value, i.value, d.value)
p.reset()
# loop indefinitely
while True:
error = origin_coord - box_coord.value
output.value = p.update(error)
# logging.info(f'{action} error {error} angle: {output.value}')
def pantilt_process_manager(
model_cls,
labels=('Raspi',),
rotation=0
):
pth.servo_enable(1, True)
pth.servo_enable(2, True)
with Manager() as manager:
**scan = manager.Value('c', 't')**
# set initial bounding box (x, y)-coordinates to center of frame
center_x = manager.Value('i', 0)
center_y = manager.Value('i', 0)
center_x.value = RESOLUTION[0] // 2
center_y.value = RESOLUTION[1] // 2
# pan and tilt angles updated by independent PID processes
pan = manager.Value('i', 0)
tilt = manager.Value('i', 0)
# PID gains for panning
pan_p = manager.Value('f', 0.05)
# 0 time integral gain until inferencing is faster than ~50ms
pan_i = manager.Value('f', 0.1)
pan_d = manager.Value('f', 0)
# PID gains for tilting
tilt_p = manager.Value('f', 0.15)
# 0 time integral gain until inferencing is faster than ~50ms
tilt_i = manager.Value('f', 0.2)
tilt_d = manager.Value('f', 0)
**detect_processr = Process(target=run_pantilt_detect,
args=(center_x, center_y, labels, model_cls, rotation, scan))**
pan_process = Process(target=pid_process,
args=(pan, pan_p, pan_i, pan_d, center_x, CENTER[0], 'pan'))
tilt_process = Process(target=pid_process,
args=(tilt, tilt_p, tilt_i, tilt_d, center_y, CENTER[1], 'tilt'))
**servo_process = Process(target=set_servos, args=(pan, tilt, scan))**
detect_processr.start()
pan_process.start()
tilt_process.start()
servo_process.start()
detect_processr.join()
pan_process.join()
tilt_process.join()
servo_process.join()
if __name__ == '__main__':
pantilt_process_manager()
In a separate script called visualization.py, there is a statement in the visualize_boxes_and_labels_on_image_array function which is responsible for overlaying bounding boxes on the camera feed once an object is detected as follows (In bold towards the end). This runs in the detect_processr:
# python
import collections
import logging
# lib
import numpy as np
import PIL.Image as Image
import PIL.ImageColor as ImageColor
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
import six
import RPi.GPIO as GPIO
import time
from time import sleep
import pantilthat as pth
GPIO.setmode(GPIO.BCM)
GPIO.setwarnings(False)
GPIO.setup(8,GPIO.OUT)
STANDARD_COLORS = [
'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
'WhiteSmoke', 'Yellow', 'YellowGreen'
]
def _get_multiplier_for_color_randomness():
num_colors = len(STANDARD_COLORS)
prime_candidates = [5, 7, 11, 13, 17]
# Remove all prime candidates that divide the number of colors.
prime_candidates = [p for p in prime_candidates if num_colors % p]
if not prime_candidates:
return 1
# Return the closest prime number to num_colors / 10.
abs_distance = [np.abs(num_colors / 10. - p) for p in prime_candidates]
num_candidates = len(abs_distance)
inds = [i for _, i in sorted(zip(abs_distance, range(num_candidates)))]
return prime_candidates[inds[0]]
def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
if image.dtype != np.uint8:
raise ValueError('`image` not of type np.uint8')
if mask.dtype != np.uint8:
raise ValueError('`mask` not of type np.uint8')
if np.any(np.logical_and(mask != 1, mask != 0)):
raise ValueError('`mask` elements should be in [0, 1]')
if image.shape[:2] != mask.shape:
raise ValueError('The image has spatial dimensions %s but the mask has '
'dimensions %s' % (image.shape[:2], mask.shape))
rgb = ImageColor.getrgb(color)
pil_image = Image.fromarray(image)
solid_color = np.expand_dims(
np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L')
pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
np.copyto(image, np.array(pil_image.convert('RGB')))
def draw_bounding_box_on_image(image,
ymin,
xmin,
ymax,
xmax,
color='red',
thickness=4,
display_str_list=(),
use_normalized_coordinates=True):
GPIO.output(8,GPIO.HIGH)
print('Object Detected')
draw = ImageDraw.Draw(image)
im_width, im_height = image.size
if use_normalized_coordinates:
(left, right, top, bottom) = (xmin * im_width, xmax * im_width,
ymin * im_height, ymax * im_height)
else:
(left, right, top, bottom) = (xmin, xmax, ymin, ymax)
draw.line([(left, top), (left, bottom), (right, bottom),
(right, top), (left, top)], width=thickness, fill=color)
try:
font = ImageFont.truetype('arial.ttf', 24)
except IOError:
font = ImageFont.load_default()
# If the total height of the display strings added to the top of the bounding
# box exceeds the top of the image, stack the strings below the bounding box
# instead of above.
display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
# Each display_str has a top and bottom margin of 0.05x.
total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
if top > total_display_str_height:
text_bottom = top
else:
text_bottom = bottom + total_display_str_height
# Reverse list and print from bottom to top.
for display_str in display_str_list[::-1]:
text_width, text_height = font.getsize(display_str)
margin = np.ceil(0.05 * text_height)
draw.rectangle(
[(left, text_bottom - text_height - 2 * margin), (left + text_width,
text_bottom)],
fill=color)
draw.text(
(left + margin, text_bottom - text_height - margin),
display_str,
fill='black',
font=font)
text_bottom -= text_height - 2 * margin
def draw_bounding_box_on_image_array(image,
ymin,
xmin,
ymax,
xmax,
color='red',
thickness=4,
display_str_list=(),
use_normalized_coordinates=True):
image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
thickness, display_str_list,
use_normalized_coordinates)
np.copyto(image, np.array(image_pil))
def draw_keypoints_on_image(image,
keypoints,
color='red',
radius=2,
use_normalized_coordinates=True):
draw = ImageDraw.Draw(image)
im_width, im_height = image.size
keypoints_x = [k[1] for k in keypoints]
keypoints_y = [k[0] for k in keypoints]
if use_normalized_coordinates:
keypoints_x = tuple([im_width * x for x in keypoints_x])
keypoints_y = tuple([im_height * y for y in keypoints_y])
for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y):
draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
(keypoint_x + radius, keypoint_y + radius)],
outline=color, fill=color)
def draw_keypoints_on_image_array(image,
keypoints,
color='red',
radius=2,
use_normalized_coordinates=True):
image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
draw_keypoints_on_image(image_pil, keypoints, color, radius,
use_normalized_coordinates)
np.copyto(image, np.array(image_pil))
def visualize_boxes_and_labels_on_image_array(
image,
boxes,
classes,
scores,
category_index,
** scan, **
instance_masks=None,
instance_boundaries=None,
keypoints=None,
track_ids=None,
use_normalized_coordinates=False,
max_boxes_to_draw=20,
min_score_thresh=.5,
agnostic_mode=False,
line_thickness=4,
groundtruth_box_visualization_color='black',
skip_scores=False,
skip_labels=False,
skip_track_ids=False):
GPIO.output(8,GPIO.LOW)
# Create a display string (and color) for every box location, group any boxes
# that correspond to the same location.
box_to_display_str_map = collections.defaultdict(list)
box_to_color_map = collections.defaultdict(str)
box_to_instance_masks_map = {}
box_to_instance_boundaries_map = {}
box_to_keypoints_map = collections.defaultdict(list)
box_to_track_ids_map = {}
if not max_boxes_to_draw:
max_boxes_to_draw = boxes.shape[0]
for i in range(min(max_boxes_to_draw, boxes.shape[0])):
if scores is None or scores[i] > min_score_thresh:
box = tuple(boxes[i].tolist())
if instance_masks is not None:
box_to_instance_masks_map[box] = instance_masks[i]
if instance_boundaries is not None:
box_to_instance_boundaries_map[box] = instance_boundaries[i]
if keypoints is not None:
box_to_keypoints_map[box].extend(keypoints[i])
if track_ids is not None:
box_to_track_ids_map[box] = track_ids[i]
if scores is None:
box_to_color_map[box] = groundtruth_box_visualization_color
else:
display_str = ''
if not skip_labels:
if not agnostic_mode:
if classes[i] in six.viewkeys(category_index):
class_name = category_index[classes[i]]['name']
else:
class_name = 'N/A'
display_str = str(class_name)
if not skip_scores:
if not display_str:
display_str = '{}%'.format(int(100*scores[i]))
else:
display_str = '{}: {}%'.format(
display_str, int(100*scores[i]))
if not skip_track_ids and track_ids is not None:
if not display_str:
display_str = 'ID {}'.format(track_ids[i])
else:
display_str = '{}: ID {}'.format(
display_str, track_ids[i])
box_to_display_str_map[box].append(display_str)
if agnostic_mode:
box_to_color_map[box] = 'DarkOrange'
elif track_ids is not None:
prime_multipler = _get_multiplier_for_color_randomness()
box_to_color_map[box] = STANDARD_COLORS[
(prime_multipler * track_ids[i]) % len(STANDARD_COLORS)]
else:
box_to_color_map[box] = STANDARD_COLORS[
classes[i] % len(STANDARD_COLORS)]
# Draw all boxes onto image.
for box, color in box_to_color_map.items():
ymin, xmin, ymax, xmax = box
if instance_masks is not None:
draw_mask_on_image_array(
image,
box_to_instance_masks_map[box],
color=color
)
if instance_boundaries is not None:
draw_mask_on_image_array(
image,
box_to_instance_boundaries_map[box],
color='red',
alpha=1.0
)
draw_bounding_box_on_image_array(
image,
ymin,
xmin,
ymax,
xmax,
color=color,
thickness=line_thickness,
display_str_list=box_to_display_str_map[box],
use_normalized_coordinates=use_normalized_coordinates)
**scan.value = 'f'
print(scan.value) # output: 'f'**
if keypoints is not None:
draw_keypoints_on_image_array(
image,
box_to_keypoints_map[box],
color=color,
radius=line_thickness / 2,
use_normalized_coordinates=use_normalized_coordinates)
return image
The hope was that once an object is detected, the scan function would break. Visualization.py is executed in the detect_process, but only when an object is detected.
The statements are correctly relayed from my print statements as follows, but the loop still doesn't break:
$ rpi-deep-pantilt track Raspi
t
Scanning
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
^C
The issue you're having here is that memory is (generally) not shared between portions of a multiprocessing python program. In this case, scan_on is a local variable in your manager.py script, which is then re-instantiated in the visualization.py script.
To share data between Processes, we can use Values. These are objectified values that allow sharing state between Processes by simply passing it as an argument.
Why? Simple values like integers and booleans are copied when you pass them into a function, rather than maintaining state across their multiple instances. The Value object, however, simply passes a reference to the value rather than a direct copy.
Though I don't see where you're calling visualize_boxes_and_labels_on_image_array(), here's a start:
scan_on = manager.Value('c', 't') # the 'c' references the unsigned c-char,
# other types found here: https://docs.python.org/3/library/array.html#module-array
# we'll edit your set_servo process to include this variable
servo_process = Process(target=set_servos, args=(pan, tilt, scan_on))
Whenever you call visualize_boxes_and_labels_on_image_array(), you'll also want to pass this same instance of scan_on. You should not have to redeclare it. To access or edit the value of scan_on, use the .value field.
print(scan_on.value) # output: 't'
scan_on.value = 'f'
print(scan_on.value) # output: 'f'
Related
My image,
I've tried to get quite accurate palm from the hand image and used mediapipe(mp) library to do it. But the result is:
mp calculates and draws green strokes and red dots, but also give the coordinates of red dots. So I can get the coordinates of blue circles using given coordinates of red ones. But it can't be enough to get "accurate" palm. I need to get the black dots' coordinates.
following is my code:
import cv2
import mediapipe as mp
import utils
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
def makeCircle(img,circle_y,circle_x,radius):
image_height, image_width, _ = img.shape
y = int(circle_y * image_height)
x = int(circle_x * image_width)
circle_coordinates = (x,y)
color = (255, 0, 0)
thickness = 2
return cv2.circle(img,circle_coordinates,radius,color,thickness)
# For static images:
IMAGE_FILES = [
"C:/Users/USER/workspace/palm/images/sample1.png",
"C:/Users/USER/workspace/palm/images/sample2.png",
"C:/Users/USER/workspace/palm/images/sample3.png",
"C:/Users/USER/workspace/palm/images/sample4.png"
]
with mp_hands.Hands(
static_image_mode=True,
max_num_hands=2,
min_detection_confidence=0.5) as hands:
for idx, file in enumerate(IMAGE_FILES):
# Read an image, flip it around y-axis for correct handedness output (see
# above).
image = cv2.flip(cv2.imread(file), 1)
image = utils.remove_bground(image)
# resize image's height 600 fixing the ratio
image = utils.ResizeWithAspectRatio(image,height=600)
# Convert the BGR image to RGB before processing.
results = hands.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
print("results:",results)
# Print handedness and draw hand landmarks on the image.
print('Handedness:', results.multi_handedness)
if not results.multi_hand_landmarks:
continue
image_height, image_width, _ = image.shape
annotated_image = image.copy()
for hand_landmarks in results.multi_hand_landmarks:
print('hand_landmarks:', hand_landmarks)
print(
f'Index finger tip coordinates: (',
f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width}, '
f'{hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height})'
)
circle_coordinates = [
(
(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP].y + hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_PIP].y)/2,
(hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_MCP].x + hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_PIP].x)/2
),
(
(hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].y + hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].y)/2,
(hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_MCP].x + hand_landmarks.landmark[mp_hands.HandLandmark.MIDDLE_FINGER_PIP].x)/2
),
(
(hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_MCP].y + hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_PIP].y)/2,
(hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_MCP].x + hand_landmarks.landmark[mp_hands.HandLandmark.RING_FINGER_PIP].x)/2
),
(
(hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_MCP].y + hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_PIP].y)/2,
(hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_MCP].x + hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_PIP].x)/2
),
(
hand_landmarks.landmark[mp_hands.HandLandmark.WRIST].y,
hand_landmarks.landmark[mp_hands.HandLandmark.WRIST].x
),
(
hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_CMC].y,
hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_CMC].x
),
(
hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_MCP].y,
hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_MCP].x
),
]
for (circle_y,circle_x) in circle_coordinates:
annotated_image = makeCircle(annotated_image,circle_y,circle_x,10)
palm_center_y = (hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_CMC].y +
hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_MCP].y)/2
palm_center_x = (hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_CMC].x +
hand_landmarks.landmark[mp_hands.HandLandmark.PINKY_MCP].x)/2
#
palm_range = 100
# annotated_image = makeCircle(annotated_image, palm_center_y,palm_center_x,palm_range)
mp_drawing.draw_landmarks(
annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
hand_landmark_x = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x * image_width
hand_landmark_y = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y * image_height
cv2.imwrite(
'/tmp/annotated_image' + str(idx) + '.png', cv2.flip(annotated_image, 1))
cv2.imshow("img" + str(idx),cv2.flip(annotated_image,1))
cv2.waitKey(0)
How to get the coordinates of the dots in first hand image?
maybe you could use cv2.convexHull and cv2.convexityDefects to find that dots. I leave you a video tutorial that I think it could be usefull: 🖐️ CONTANDO DEDOS ✌️ (Usando defectos de convexidad) | Python - OpenCV
The problem:
The goal is to create a disparity map for two parallel cameras. Currently the calculation itself is working, and I have a live disparitymap. It just shows contouring instead of information for every pixel, which is not what a disparity map should be doing.
.
What I have tried:
I tried the tsuka example, the lines are commented out, but they work. So this proves that the used functions work.
The result of my code is here: https://imgur.com/a/bIDmdkk (I probably don't have the reputation needed to upload images)
As can be seen in that image just the outline, the contour, of my face is visible. This contour reacts to my actual distance - with getting brighter or darker - but the rest of the image is dark.
With all parameters commented out (as is the example) it does now work either but has lots and lots of speckles laying over.
I also tried almost any combination of numDisparities and blocksize.
Changing the position of the cameras to one another alters the result but does not change it massively. I made sure to have them in a line with each other, looking in parallel.
Edit: I tinkered a bit and got this result: https://imgur.com/a/m2o9FOE compared to the previous result there are more features, but also more noise. (This one has fewer disparities and another color convertion)
SOLVED: [I tried running the stereo.compute within the while-loop with BGR-Images, but that does not work. The tsuka-example images are colored though, so there might be some case of wrong datatype that I do not see.
Everything is uint8 currently.] => I forgot that imread("",0) reads an image as grayscale. So everything behaves as it should in this regard.
.
So what is the difference between my left/right images and the ones resulting in https://docs.opencv.org/master/disparity_map.jpg ?
.
The code:
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
cap1 = cv.VideoCapture(1)
cap3 = cv.VideoCapture(3)
#imgR = cv.imread('tsuL.png',0)
#imgL = cv.imread('tsuR.png',0)
#stereoTest = cv.StereoBM_create(numDisparities=16, blockSize=15)
#disparityTest = stereoTest.compute(imgL,imgR)
while True:
# save current camera image
ret1, frame1 = cap1.read()
ret3, frame3 = cap3.read()
# switch from BGR to gray
grayFrame1 = cv.cvtColor(frame1, cv.COLOR_BGR2GRAY)
grayFrame3 = cv.cvtColor(frame3, cv.COLOR_BGR2GRAY)
# disparity params
stereo = cv.StereoBM_create(numDisparities=128, blockSize=5)
stereo.setTextureThreshold(600)
#stereo.setSpeckleRange(4)
#stereo.setSpeckleWindowSize(9)
stereo.setMinDisparity(0)
# calculate both variants (Camera 1 Left, Camera 2 Right and Camera 1 right, Camera 2 left)
disparity = stereo.compute(grayFrame1,grayFrame3)
disparity2 = stereo.compute(grayFrame3,grayFrame1)
#res = cv.cvtColor(disparity,cv.COLOR_GRAY2BGR)
# Should have been 65535 from int16 to int8, but 4095 works..
div = 65535.0/16
res = cv.convertScaleAbs(disparity, alpha=(255.0/div))
res2= cv.convertScaleAbs(disparity2, alpha=(255.0/div))
# Show disparity map
cv.namedWindow("Disparity")
cv.moveWindow("Disparity", 450, 20)
cv.imshow('Disparity', np.hstack([res,res2]))
keyboard = cv.waitKey(30)
if keyboard == 'q' or keyboard == 27:
break
cap.release()
cv.destroyAllWindows()
New Code
I got the camera calibration data from boofcv and copied some lines from https://stackoverflow.com/a/29151300/13150965 to my code.
Schwarz S/W
Xc 311,0 323,3
Yc 257,1 261,9
fx 603,0 593,6
fy 604,3 596,5
skew
radial 1,43e-01 1,1e-01
-3,03e-01 -2,43e-01
tangential 1,37e-02 1,25e-02
-9,77e-03 -9,79e-04
These are the values I received for each Camera (Schwarz and S/W are just names for each camera, they have different cables, that's how I recognize them)
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
cap1 = cv.VideoCapture(0)
cap3 = cv.VideoCapture(1)
cameraMatrix1 = np.array(
[[603.0, 0, 311.0],
[0, 604.3, 257.1],
[0, 0, 1]]
)
cameraMatrix2 = np.array(
[[593.6, 0, 323.3],
[0, 596.5, 261.9],
[0, 0, 1]]
)
distCoeffs1 = np.array([[0.143, -0.303, 0.0137, -0.00977, 0.0]])
distCoeffs2 = np.array([[0.11, -0.243, 0.0125, -0.000979, 0.0]])
R = np.array(
[[1.0, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0]]
)
T = np.array(
[[98.0],
[0.0],
[0.0]]
)
# Params from camera calibration
camMats = [cameraMatrix1, cameraMatrix2]
distCoeffs = [distCoeffs1, distCoeffs2]
camSources = [0,1]
for src in camSources:
distCoeffs[src][0][4] = 0.0 # use only the first 2 values in distCoeffs
xOff = 450
div = 64.0
i = 0
while True:
# save current camera image
ret1, frame1 = cap1.read()
ret3, frame3 = cap3.read()
w, h = frame1.shape[:2]
# The rectification process
newCams = [0,0]
roi = [0,0]
frames = [frame1, frame3]
i = i + 1
if i > 10:
for src in camSources:
newCams[src], roi[src] = cv.getOptimalNewCameraMatrix(cameraMatrix = camMats[src],
distCoeffs = distCoeffs[src],
imageSize = (w,h),
alpha = 0)
rectFrames = [0,0]
for src in camSources:
rectFrames[src] = cv.undistort(frames[src], camMats[src], distCoeffs[src])
R1,R2,P1,P2,Q,roi1,roi2 = cv.stereoRectify(
cameraMatrix1 =camMats[0],
cameraMatrix2 =camMats[1],
distCoeffs1 =distCoeffs1,
distCoeffs2 =distCoeffs2,
imageSize = (w,h),
R=R,
T=T,
alpha=1
)
# show camera images
cv.namedWindow("RectFrames")
cv.moveWindow("RectFrames", xOff, 532)
cv.imshow('RectFrames', np.hstack([rectFrames[0],rectFrames[1]]))
# switch from BGR to gray
grayFrame1 = cv.cvtColor(rectFrames[0], cv.COLOR_BGR2GRAY)
grayFrame3 = cv.cvtColor(rectFrames[1], cv.COLOR_BGR2GRAY)
# disparity params
stereo = cv.StereoBM_create(numDisparities=16, blockSize=15)
# calculate both variants (Camera 1 Left, Camera 2 Right and Camera 1 right, Camera 2 left)
disparity = stereo.compute(grayFrame1,grayFrame3)
disparity2 = stereo.compute(grayFrame3,grayFrame1)
# Should have been 65535 from int16 to int8, but 4095 works..
res = cv.convertScaleAbs(disparity, alpha=(255.0/(div-1)))
res2= cv.convertScaleAbs(disparity2, alpha=(255.0/(div-1)))
# Show disparity map
cv.namedWindow("Disparity")
cv.moveWindow("Disparity", xOff, 20)
cv.imshow('Disparity', np.hstack([res,res2]))
keyboard = cv.waitKey(30)
if keyboard == 'q' or keyboard == 27:
break
cap.release()
cv.destroyAllWindows()
I can see, that the images are being undistorted. https://imgur.com/a/SBmv7IY
But I am still doing something wrong.
The R and T are made up, as they look parallel (No Rotation) and are 9.8cm apart from another.
The Values for R and T calculated via the script from StereoCalibration in OpenCV on Python resulted in the unity-matrix for R and an empty vector for T. The latter cannot be right.
I now got the R and T values for a given calibration of the cameras. But it does in fact not solve my problem. So either there is still an error in that calculation or this problem has to be solved differently.
I rewrote the entire script, to see at which step it misbehaves - and do tidy things up. At is stands, the calibration works up to the cv2.initUndistortRectifyMap , if I use this map with cv2.remap onto my camera image, I just get a black image.
import numpy as np
import cv2
from VideoCapture import Device
from PIL import Image
import glob
print("Importing Images")
image_listR = []
image_listL = []
w = 640
h = 480
for filename in glob.glob('StereoCalibrate\imageR*'): #assuming gif
im=Image.open(filename).convert('RGB')
cvim= np.array(im)
cvim = cvim[:, :, ::-1].copy()
image_listR.append(cvim)
for filename in glob.glob('StereoCalibrate\imageL*'): #assuming gif
im=Image.open(filename).convert('RGB')
cvim= np.array(im)
cvim = cvim[:, :, ::-1].copy()
image_listL.append(cvim)
imagesR = len(image_listR)
imagesL = len(image_listL)
print("Found {%d} images for Left camera" % imagesL)
print("Found {%d} images for Right camera" % imagesR)
if imagesR == imagesL:
print("Number of Images match")
else:
print("Number of Images do not match")
print("Using loaded images")
board_w = 8
board_h = 5
board_sz = (8,5)
board_n = board_w*board_h
# termination criteria
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# Arrays to store object points and image points from all the images.
object_points = [] # 3d point in real world space
imagePoints1 = [] # 2d points in image plane.
imagePoints2 = [] # 2d points in image plane.
corners1 = []
corners2 = []
obj = np.zeros((5*8,3), np.float32)
obj[:,:2] = np.mgrid[0:8,0:5].T.reshape(-1,2)
vidStreamL = cv2.VideoCapture(1) # index of your camera
vidStreamR = cv2.VideoCapture(0) # index of your camera
success = 0
found1 = False
found2 = False
i=0
while (success < imagesR*0.9):
#Loop through the image list
if i >= imagesL:
i = 0
img1 = image_listL[i]
img2 = image_listR[i]
#Convert images to grayscale
gray1 = cv2.cvtColor(img1,cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2,cv2.COLOR_BGR2GRAY)
#Check for Chessboard Pattern
found1, corners1 = cv2.findChessboardCorners(img1, board_sz)
found2, corners2 = cv2.findChessboardCorners(img2, board_sz)
#Draw Chessboard in image
if (found1):
cv2.cornerSubPix(gray1, corners1, (11, 11), (-1, -1),criteria)
cv2.drawChessboardCorners(gray1, board_sz, corners1, found1)
if (found2):
cv2.cornerSubPix(gray2, corners2, (11, 11), (-1, -1), criteria)
cv2.drawChessboardCorners(gray2, board_sz, corners2, found2)
#Show grayscale image with chessboard marker
cv2.imshow('image1', gray1)
cv2.imshow('image2', gray2)
if (found1 != 0 and found2 != 0):
#Remove successful detected images from list
image_listL.pop(i)
image_listR.pop(i)
imagesL-=1
imagePoints1.append(corners1);
imagePoints2.append(corners2);
object_points.append(obj);
success+=1
print("{", success, "} / {",imagesR*0.9,"} calibration images detected")
if (success >= imagesR*0.9):
break
i = i + 1
cv2.waitKey(1)
cv2.destroyAllWindows()
print("Calibrating")
cx1 = 327.0
cy1 = 247.9
fx1 = 608.3
fy1 = 607.7
rx1 = 0.129
ry1 = -0.269
tx1 = 0.00382
ty1 = -0.00151
camMat1 = np.array(
[[fx1, 0, cx1],
[0, fy1, cy1],
[0, 0, 1]])
cx2 = 329.8
cy2 = 249.0
fx2 = 601.7
fy2 = 601.1
rx2 = 0.149
ry2 = -0.322
tx2 = 0.0039
ty2 = -0.000837
camMat2 = np.array(
[[fx2, 0, cx2],
[0, fy2, cy2],
[0, 0, 1]])
disCoe1 = np.array([[0.0,0.0,0.0,0.0,0.0]])
disCoe2 = np.array([[0.0,0.0,0.0,0.0,0.0]])
R = np.zeros(shape=(3,3))
T = np.zeros(shape=(3,3))
E = np.zeros(shape=(3,3))
F = np.zeros(shape=(3,3))
retval, camMat1, disCoe1, camMat2, disCoe2, R, T, E, F = cv2.stereoCalibrate(object_points, imagePoints1, imagePoints2, camMat1, disCoe1, camMat2, disCoe2, (w, h), flags = cv2.CALIB_USE_INTRINSIC_GUESS)
print("Done Calibration\n")
R1 = np.zeros(shape=(3,3))
R2 = np.zeros(shape=(3,3))
P1 = np.zeros(shape=(3,4))
P2 = np.zeros(shape=(3,4))
print("T:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in T]))
print("E:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in E]))
print("F:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in F]))
print("R:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in R]))
print("CAM1:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in camMat1]))
print("CAM2:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in camMat2]))
print("DIS1:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in disCoe1]))
print("DIS2:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in disCoe2]))
print("Rectifying cameras")
cv2.stereoRectify(camMat1, disCoe1, camMat2, disCoe2,(w, h), R, T)
#print("Undistort image")
#map1x, map1y = cv2.initUndistortRectifyMap(camMat1, disCoe1, R1, camMat1, (w, h), cv2.CV_32FC1)
#map2x, map2y = cv2.initUndistortRectifyMap(camMat2, disCoe2, R2, camMat2, (w, h), cv2.CV_32FC1)
print("Settings complete\n")
i = 1
j = 1
while(True):
retL, img1 = vidStreamL.read()
retR, img2 = vidStreamR.read()
img1 = cv2.undistort(img1, camMat1, disCoe1)
img2 = cv2.undistort(img2, camMat2, disCoe2)
cv2.imshow("ImgCam", np.hstack([img1,img2]));
#imgU1 = np.zeros((h,w,3), np.uint8)
#imgU2 = np.zeros((h,w,3), np.uint8)
#imgU1 = cv2.remap(img1, map1x, map1y, cv2.INTER_LINEAR, imgU1, cv2.BORDER_CONSTANT, 0)
#imgU2 = cv2.remap(img2, map2x, map2y, cv2.INTER_LINEAR, imgU2, cv2.BORDER_CONSTANT, 0)
#cv2.imshow("ImageCam", np.hstack([imgU1,imgU2]));
#imgU1 = cv2.cvtColor(imgU1, cv2.COLOR_BGR2GRAY)
#imgU2 = cv2.cvtColor(imgU2, cv2.COLOR_BGR2GRAY)
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15)
disparity = stereo.compute(img1,img2)
disparit2 = stereo.compute(img2,img1)
res = cv2.convertScaleAbs(disparity, alpha=(255.0/512.0))
re2 = cv2.convertScaleAbs(disparit2, alpha=(255.0/512.0))
cv2.namedWindow("Disparity")
cv2.imshow('Disparity', np.hstack([res,re2]))
cv2.waitKey(1)
Output:
Importing Images
Found {90} images for Left camera
Found {90} images for Right camera
Number of Images match
Using loaded images
{ 1 } / { 81.0 } calibration images detected
{ 2 } / { 81.0 } calibration images detected
...
{ 81 } / { 81.0 } calibration images detected
Calibrating
Done Calibration
T:
-3.4549164747952514
-0.15507627811210184
-0.058176064658149625
E:
0.0009397723130476023 0.05762864132890782 -0.15527769659160615
-0.01780225919479015 0.01349075458635349 3.455334047732434
-0.008356129824974412 -3.458367965240172 0.010848591597549652
F:
3.59441069386539e-08 2.1966757991956236e-06 -0.0032581679670958268
-6.799554333159719e-07 5.135279707045414e-07 0.060534502577423176
6.856712419870922e-06 -0.061575681061419536 1.0
R:
0.9988149170858261 -0.0472903202575948 -0.01150595570860947
0.047251107481307925 0.998876350140538 -0.0036564971909233096
0.011665943966274269 0.0031084947887139625 0.9999271188499311
CAM1:
457.8949692862012 0.0 333.02411929079784
0.0 459.45537763505865 239.7961684844508
0.0 0.0 1.0
CAM2:
460.4374113961873 0.0 342.68117331116434
0.0 461.07367491328057 244.62051778708334
0.0 0.0 1.0
DIS1:
0.06391854958023913 -0.2191286122082927 -0.000947168228999159 0.004660285089171575 0.08044318478168837
DIS2:
0.011643796283126952 0.14239490114798584 0.001548517080560543 0.011862118627062223 -0.5191998209097282
Rectifying cameras
Settings complete
You missed the Calibration and Rectification process, which is the first step of a disparity algorithm.
Below steps help you get your disparity map:
Calibrate your camera and find the intrinsic and extrinsic of the camera.
With the available camera and distortion matrix from the calibration, rectify your images.
Pass the images to your algorithm.
Get the disparity map.
Note: raw disparity map will be bad in a textureless region.
I am coding a program that can monitor the red dot of a lazer and track the movement. I would like to collect the x and y movement of the laser (deviating from a stable point). Some searching found me this code on git hub which I plan to modify and use to track the red dot using openCV. I am however struggling to pull any data from this.
How would I go about storing the x and y coordinates of the laser in excel (or other useful means)?
import sys
import argparse
import cv2
import numpy
class LaserTracker(object):
def __init__(self, cam_width=640, cam_height=480, hue_min=20, hue_max=160,
sat_min=100, sat_max=255, val_min=200, val_max=256,
display_thresholds=False):
"""
* ``cam_width`` x ``cam_height`` -- This should be the size of the
image coming from the camera. Default is 640x480.
HSV color space Threshold values for a RED laser pointer are determined
by:
* ``hue_min``, ``hue_max`` -- Min/Max allowed Hue values
* ``sat_min``, ``sat_max`` -- Min/Max allowed Saturation values
* ``val_min``, ``val_max`` -- Min/Max allowed pixel values
If the dot from the laser pointer doesn't fall within these values, it
will be ignored.
* ``display_thresholds`` -- if True, additional windows will display
values for threshold image channels.
"""
self.cam_width = cam_width
self.cam_height = cam_height
self.hue_min = hue_min
self.hue_max = hue_max
self.sat_min = sat_min
self.sat_max = sat_max
self.val_min = val_min
self.val_max = val_max
self.display_thresholds = display_thresholds
self.capture = None # camera capture device
self.channels = {
'hue': None,
'saturation': None,
'value': None,
'laser': None,
}
self.previous_position = None
self.trail = numpy.zeros((self.cam_height, self.cam_width, 3),
numpy.uint8)
def create_and_position_window(self, name, xpos, ypos):
"""Creates a named widow placing it on the screen at (xpos, ypos)."""
# Create a window
cv2.namedWindow(name)
# Resize it to the size of the camera image
cv2.resizeWindow(name, self.cam_width, self.cam_height)
# Move to (xpos,ypos) on the screen
cv2.moveWindow(name, xpos, ypos)
def setup_camera_capture(self, device_num=0):
"""Perform camera setup for the device number (default device = 0).
Returns a reference to the camera Capture object.
"""
try:
device = int(device_num)
sys.stdout.write("Using Camera Device: {0}\n".format(device))
except (IndexError, ValueError):
# assume we want the 1st device
device = 0
sys.stderr.write("Invalid Device. Using default device 0\n")
# Try to start capturing frames
self.capture = cv2.VideoCapture(device)
if not self.capture.isOpened():
sys.stderr.write("Faled to Open Capture device. Quitting.\n")
sys.exit(1)
# set the wanted image size from the camera
self.capture.set(
cv2.cv.CV_CAP_PROP_FRAME_WIDTH if cv2.__version__.startswith('2') else cv2.CAP_PROP_FRAME_WIDTH,
self.cam_width
)
self.capture.set(
cv2.cv.CV_CAP_PROP_FRAME_HEIGHT if cv2.__version__.startswith('2') else cv2.CAP_PROP_FRAME_HEIGHT,
self.cam_height
)
return self.capture
def handle_quit(self, delay=10):
"""Quit the program if the user presses "Esc" or "q"."""
key = cv2.waitKey(delay)
c = chr(key & 255)
if c in ['c', 'C']:
self.trail = numpy.zeros((self.cam_height, self.cam_width, 3),
numpy.uint8)
if c in ['q', 'Q', chr(27)]:
sys.exit(0)
def threshold_image(self, channel):
if channel == "hue":
minimum = self.hue_min
maximum = self.hue_max
elif channel == "saturation":
minimum = self.sat_min
maximum = self.sat_max
elif channel == "value":
minimum = self.val_min
maximum = self.val_max
(t, tmp) = cv2.threshold(
self.channels[channel], # src
maximum, # threshold value
0, # we dont care because of the selected type
cv2.THRESH_TOZERO_INV # t type
)
(t, self.channels[channel]) = cv2.threshold(
tmp, # src
minimum, # threshold value
255, # maxvalue
cv2.THRESH_BINARY # type
)
if channel == 'hue':
# only works for filtering red color because the range for the hue
# is split
self.channels['hue'] = cv2.bitwise_not(self.channels['hue'])
def track(self, frame, mask):
"""
Track the position of the laser pointer.
Code taken from
http://www.pyimagesearch.com/2015/09/14/ball-tracking-with-opencv/
"""
center = None
countours = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[-2]
# only proceed if at least one contour was found
if len(countours) > 0:
# find the largest contour in the mask, then use
# it to compute the minimum enclosing circle and
# centroid
c = max(countours, key=cv2.contourArea)
((x, y), radius) = cv2.minEnclosingCircle(c)
moments = cv2.moments(c)
if moments["m00"] > 0:
center = int(moments["m10"] / moments["m00"]), \
int(moments["m01"] / moments["m00"])
else:
center = int(x), int(y)
# only proceed if the radius meets a minimum size
if radius > 10:
# draw the circle and centroid on the frame,
cv2.circle(frame, (int(x), int(y)), int(radius),
(0, 255, 255), 2)
cv2.circle(frame, center, 5, (0, 0, 255), -1)
# then update the ponter trail
if self.previous_position:
cv2.line(self.trail, self.previous_position, center,
(255, 255, 255), 2)
cv2.add(self.trail, frame, frame)
self.previous_position = center
def detect(self, frame):
hsv_img = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# split the video frame into color channels
h, s, v = cv2.split(hsv_img)
self.channels['hue'] = h
self.channels['saturation'] = s
self.channels['value'] = v
# Threshold ranges of HSV components; storing the results in place
self.threshold_image("hue")
self.threshold_image("saturation")
self.threshold_image("value")
# Perform an AND on HSV components to identify the laser!
self.channels['laser'] = cv2.bitwise_and(
self.channels['hue'],
self.channels['value']
)
self.channels['laser'] = cv2.bitwise_and(
self.channels['saturation'],
self.channels['laser']
)
# Merge the HSV components back together.
hsv_image = cv2.merge([
self.channels['hue'],
self.channels['saturation'],
self.channels['value'],
])
self.track(frame, self.channels['laser'])
return hsv_image
def display(self, img, frame):
"""Display the combined image and (optionally) all other image channels
NOTE: default color space in OpenCV is BGR.
"""
cv2.imshow('RGB_VideoFrame', frame)
cv2.imshow('LaserPointer', self.channels['laser'])
if self.display_thresholds:
cv2.imshow('Thresholded_HSV_Image', img)
cv2.imshow('Hue', self.channels['hue'])
cv2.imshow('Saturation', self.channels['saturation'])
cv2.imshow('Value', self.channels['value'])
def setup_windows(self):
sys.stdout.write("Using OpenCV version: {0}\n".format(cv2.__version__))
# create output windows
self.create_and_position_window('LaserPointer', 0, 0)
self.create_and_position_window('RGB_VideoFrame',
10 + self.cam_width, 0)
if self.display_thresholds:
self.create_and_position_window('Thresholded_HSV_Image', 10, 10)
self.create_and_position_window('Hue', 20, 20)
self.create_and_position_window('Saturation', 30, 30)
self.create_and_position_window('Value', 40, 40)
def run(self):
# Set up window positions
self.setup_windows()
# Set up the camera capture
self.setup_camera_capture()
while True:
# 1. capture the current image
success, frame = self.capture.read()
if not success: # no image captured... end the processing
sys.stderr.write("Could not read camera frame. Quitting\n")
sys.exit(1)
hsv_image = self.detect(frame)
self.display(hsv_image, frame)
self.handle_quit()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run the Laser Tracker')
parser.add_argument('-W', '--width',
default=640,
type=int,
help='Camera Width')
parser.add_argument('-H', '--height',
default=480,
type=int,
help='Camera Height')
parser.add_argument('-u', '--huemin',
default=20,
type=int,
help='Hue Minimum Threshold')
parser.add_argument('-U', '--huemax',
default=160,
type=int,
help='Hue Maximum Threshold')
parser.add_argument('-s', '--satmin',
default=100,
type=int,
help='Saturation Minimum Threshold')
parser.add_argument('-S', '--satmax',
default=255,
type=int,
help='Saturation Maximum Threshold')
parser.add_argument('-v', '--valmin',
default=200,
type=int,
help='Value Minimum Threshold')
parser.add_argument('-V', '--valmax',
default=255,
type=int,
help='Value Maximum Threshold')
parser.add_argument('-d', '--display',
action='store_true',
help='Display Threshold Windows')
params = parser.parse_args()
tracker = LaserTracker(
cam_width=params.width,
cam_height=params.height,
hue_min=params.huemin,
hue_max=params.huemax,
sat_min=params.satmin,
sat_max=params.satmax,
val_min=params.valmin,
val_max=params.valmax,
display_thresholds=params.display
)
tracker.run()
I'm by no means an expert on this, but taking a quick look at the code it seems that self.previous_position = (x,y).
Not sure why you would want to save to excel but to save to file just add f = open(filename, 'w') at the start of the run function and then f.write(self.previous_position) at the end of each loop.
Your file will contain all the recorded x and y coordinates recorded, if your set on saving this to excel check out python's out-the-box csv library.
i would like to do some program by capture image from webcam, then cropped it. after crop, i do some image processing and from the process it will run my robots. Here the full program:
import cv2
from cv2 import *
import numpy as np
import pylab
import pymorph
import mahotas
from matplotlib import pyplot
from PIL import Image
# initialize the camera
cam = VideoCapture(0) # 0 -> index of camera
s, img = cam.read()
# frame captured without any errors
if s:
imwrite("img.jpg",img) #save image
#Crop Image
imageFile = "img.jpg"
im1 = Image.open(imageFile)
def imgCrop(im):
box = (0, 199, 640, 200)
region = im.crop(box)
region.save('crop.jpg')
cImg = imgCrop(im1)
#thresholding
def greyImg(im):
gray = im.convert('L')
bw = gray.point(lambda x: 0 if x<128 else 255, '1')
bw.save("bw.jpg")
tImg = greyImg(cImg )
#direction
def find_centroid(im, rez):
width, height = im.size
XX, YY, count = 0, 0, 0
for x in xrange(0, width, rez):
for y in xrange(0, height, rez):
if im.getpixel((x, y)) == 255:
XX += x
YY += y
count += 1
return XX/count, YY/count
print find_centroid(tImg, 1)
def robo_direct():
cen = find_centroid(im, 1)
diff = cen[0] - 320
if diff > 10:
print 'right'
if diff < -10:
print 'left'
else:
print 'straight'
print robo_direct()
The error was come out like this:
File "compile.py", line 32, in greyImg
gray = im.convert('L')
AttributeError: 'NoneType' object has no attribute 'convert'
That is because im is a None object.
Try again the code with:
print im is None
And you'll see. I don't know about threshold, but obviously you are creating the im object the wrong way.
Your function imgCrop(im1) has no return statement and as such returns None. And then your greyImg(im) function also has no return statement and also will return None.
To fix that add return statements to both functions that for the first return region and the second return bw.
Also your robo_direct() function should return and not print the direction so that the call to it in the statement print robo_direct() would print the direction.
I am using matplotlib.pyplot.annotate to draw an arrow on my plot, like so:
import matplotlib.pyplot as plt
plt.annotate("",(x,ybottom),(x,ytop),arrowprops=dict(arrowstyle="->"))
I want to use an arrow style that has a flat line at one end and an arrow at the other, so combining the styles "|-|" and "->" to make something we might call "|->", but I can't figure out how to define my own style.
I thought I might try something like
import matplotlib.patches as patches
myarrow = patches.ArrowStyle("Fancy", head_length=0.4,head_width=0.2)
(which should just be the same as "->" for now; I can tweak the style later) but then how do I tell plt.annotate to use myarrow as the style? There is no arrowstyle property for plt.annotate, and arrowprops=dict(arrowstyle=myarrow) doesn't work either.
I've also tried defining it in the arrowprops dictionary, such as
plt.annotate("",(x,ybottom),(x,ytop),arrowprops=dict(head_length=0.4,head_width=0.2))
but that gives me errors about no attribute 'set_head_width'.
So, how can I define my own style for pyplot.annotate to use?
In the last example in your code you could have used headwidth, frac and width to customize the arrow, the result is arrow0 shown below. For highly customized arrows you can used arbitrary polygons. Below you can see the code I used to produce the figures.
To add more polygons you have to edit polygons dictionary, and the new polygons must have the first and the last point at the origin (0,0), the rescaling and repositioning are done automatically. The figure below illustrates how the polygons are defined.
There is still an issue with shrinking that disconnects the line with the polygons. The '|-|>' arrow that you requested can be easily created using this customization.
The code follows:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib import transforms
import numpy as np
from numpy import cos, sin
plt.close()
plt.plot([1,2],[0,4], 'w')
ax = plt.gcf().axes[0]
def patchesAB(styleA, styleB, orig, target,
widthA, lengthA, widthB, lengthB,
kwargsA, kwargsB, shrinkA=0., shrinkB=0.):
'''
Select 'styleA' and 'styleB' from the dictionary 'polygons'
widthA, lengthA, widthB, lenghtB, shrinkA, shrinkB are defined in points
kwargsA and kwargsB are dictionaries
'''
polygons = {\
'|':np.array([[0,0],[0,1],[0.1,1],[0.1,-1],[0,-1],[0,0]], dtype=float),
'arrow1':np.array([[0,0],[0,1],[-1,2],[3,0],[-1,-2],[0,-1],[0,0]], dtype=float),
'arrow2':np.array([[0,0],[-1,1],[0,2],[3,0],[0,-2],[-1,-1],[0,0]], dtype=float),
}
xyA = polygons.get( styleA )
xyB = polygons.get( styleB )
#
fig = plt.gcf()
ax = fig.axes[0]
trans = ax.transData
pixPunit = trans.transform([(1,0),(0,1)])-ax.transData.transform((0,0))
unitPpix = pixPunit
unitPpix[0,0] = 1/unitPpix[0,0]
unitPpix[1,1] = 1/unitPpix[1,1]
#
orig = np.array(orig)
target = np.array(target)
vec = target-orig
angle = np.arctan2( vec[1], vec[0] )
#
lengthA *= unitPpix[0,0]
lengthB *= unitPpix[0,0]
widthA *= unitPpix[1,1]
widthB *= unitPpix[1,1]
orig += (unitPpix[1,1]*sin(angle)+unitPpix[0,0]*cos(angle))*vec*shrinkA
target -= (unitPpix[1,1]*sin(angle)+unitPpix[0,0]*cos(angle))*vec*shrinkB
#TODO improve shrinking... another attempt:
#orig += unitPpix.dot(vec) * shrinkA
#target -= unitPpix.dot(vec) * shrinkB
# polA
if xyA != None:
a = transforms.Affine2D()
tA = a.rotate_around( orig[0], orig[1], angle+np.pi ) + trans
xyA = np.float_(xyA)
xyA[:,0] *= lengthA/(xyA[:,0].max()-xyA[:,0].min())
xyA[:,1] *= widthA/(xyA[:,1].max()-xyA[:,1].min())
xyA += orig
polA = patches.Polygon( xyA, **kwargsA )
polA.set_transform( tA )
else:
polA = None
# polB
if xyB != None:
a = transforms.Affine2D()
tB = a.rotate_around( target[0], target[1], angle ) + trans
xyB = np.float_(xyB)
xyB[:,0] *= lengthB/(xyB[:,0].max()-xyB[:,0].min())
xyB[:,1] *= widthB/(xyB[:,1].max()-xyB[:,1].min())
xyB += target
polB = patches.Polygon( xyB, **kwargsB )
polB.set_transform( tB )
else:
polB = None
return polA, polB
# ARROW 0
plt.annotate('arrow0',xy=(2,1.5),xycoords='data',
xytext=(1.1,1), textcoords='data',
arrowprops=dict(frac=0.1,headwidth=10., width=2.))
#
kwargsA = dict( lw=1., ec='k', fc='gray' )
kwargsB = dict( lw=1., ec='k', fc='b' )
# ARROW 1
orig = (1.1,2.)
target = (2.,2.5)
shrinkA = 0.
shrinkB = 0.
polA, polB = patchesAB( '|', 'arrow1', orig, target, 20.,1.,60.,60.,
kwargsA, kwargsB, shrinkA, shrinkB )
ax.add_patch(polA)
ax.add_patch(polB)
ax.annotate('arrow1', xy=target, xycoords='data',
xytext=orig, textcoords='data',
arrowprops=dict(arrowstyle='-', patchA=polA, patchB=polB,
lw=1., shrinkA=shrinkA, shrinkB=shrinkB, relpos=(0.,0.),
mutation_scale=1.))
# ARROW 2
orig = (1.1,3.)
target = (2.,3.5)
polA, polB = patchesAB( '|', 'arrow2', orig, target, 20.,1.,60.,60.,
kwargsA, kwargsB, shrinkA, shrinkB )
ax.add_patch(polA)
ax.add_patch(polB)
ax.annotate('arrow2', xy=target, xycoords='data',
xytext=orig, textcoords='data',
arrowprops=dict(arrowstyle='-', patchA=polA, patchB=polB,
lw=1., shrinkA=shrinkA, shrinkB=shrinkB, relpos=(0.,0.),
mutation_scale=1.))
plt.autoscale()
plt.xlim(1.,2.2)
plt.ylim(0.5,4)
plt.show()