Red Dot Tracker - python

I am coding a program that can monitor the red dot of a lazer and track the movement. I would like to collect the x and y movement of the laser (deviating from a stable point). Some searching found me this code on git hub which I plan to modify and use to track the red dot using openCV. I am however struggling to pull any data from this.
How would I go about storing the x and y coordinates of the laser in excel (or other useful means)?
import sys
import argparse
import cv2
import numpy
class LaserTracker(object):
def __init__(self, cam_width=640, cam_height=480, hue_min=20, hue_max=160,
sat_min=100, sat_max=255, val_min=200, val_max=256,
display_thresholds=False):
"""
* ``cam_width`` x ``cam_height`` -- This should be the size of the
image coming from the camera. Default is 640x480.
HSV color space Threshold values for a RED laser pointer are determined
by:
* ``hue_min``, ``hue_max`` -- Min/Max allowed Hue values
* ``sat_min``, ``sat_max`` -- Min/Max allowed Saturation values
* ``val_min``, ``val_max`` -- Min/Max allowed pixel values
If the dot from the laser pointer doesn't fall within these values, it
will be ignored.
* ``display_thresholds`` -- if True, additional windows will display
values for threshold image channels.
"""
self.cam_width = cam_width
self.cam_height = cam_height
self.hue_min = hue_min
self.hue_max = hue_max
self.sat_min = sat_min
self.sat_max = sat_max
self.val_min = val_min
self.val_max = val_max
self.display_thresholds = display_thresholds
self.capture = None # camera capture device
self.channels = {
'hue': None,
'saturation': None,
'value': None,
'laser': None,
}
self.previous_position = None
self.trail = numpy.zeros((self.cam_height, self.cam_width, 3),
numpy.uint8)
def create_and_position_window(self, name, xpos, ypos):
"""Creates a named widow placing it on the screen at (xpos, ypos)."""
# Create a window
cv2.namedWindow(name)
# Resize it to the size of the camera image
cv2.resizeWindow(name, self.cam_width, self.cam_height)
# Move to (xpos,ypos) on the screen
cv2.moveWindow(name, xpos, ypos)
def setup_camera_capture(self, device_num=0):
"""Perform camera setup for the device number (default device = 0).
Returns a reference to the camera Capture object.
"""
try:
device = int(device_num)
sys.stdout.write("Using Camera Device: {0}\n".format(device))
except (IndexError, ValueError):
# assume we want the 1st device
device = 0
sys.stderr.write("Invalid Device. Using default device 0\n")
# Try to start capturing frames
self.capture = cv2.VideoCapture(device)
if not self.capture.isOpened():
sys.stderr.write("Faled to Open Capture device. Quitting.\n")
sys.exit(1)
# set the wanted image size from the camera
self.capture.set(
cv2.cv.CV_CAP_PROP_FRAME_WIDTH if cv2.__version__.startswith('2') else cv2.CAP_PROP_FRAME_WIDTH,
self.cam_width
)
self.capture.set(
cv2.cv.CV_CAP_PROP_FRAME_HEIGHT if cv2.__version__.startswith('2') else cv2.CAP_PROP_FRAME_HEIGHT,
self.cam_height
)
return self.capture
def handle_quit(self, delay=10):
"""Quit the program if the user presses "Esc" or "q"."""
key = cv2.waitKey(delay)
c = chr(key & 255)
if c in ['c', 'C']:
self.trail = numpy.zeros((self.cam_height, self.cam_width, 3),
numpy.uint8)
if c in ['q', 'Q', chr(27)]:
sys.exit(0)
def threshold_image(self, channel):
if channel == "hue":
minimum = self.hue_min
maximum = self.hue_max
elif channel == "saturation":
minimum = self.sat_min
maximum = self.sat_max
elif channel == "value":
minimum = self.val_min
maximum = self.val_max
(t, tmp) = cv2.threshold(
self.channels[channel], # src
maximum, # threshold value
0, # we dont care because of the selected type
cv2.THRESH_TOZERO_INV # t type
)
(t, self.channels[channel]) = cv2.threshold(
tmp, # src
minimum, # threshold value
255, # maxvalue
cv2.THRESH_BINARY # type
)
if channel == 'hue':
# only works for filtering red color because the range for the hue
# is split
self.channels['hue'] = cv2.bitwise_not(self.channels['hue'])
def track(self, frame, mask):
"""
Track the position of the laser pointer.
Code taken from
http://www.pyimagesearch.com/2015/09/14/ball-tracking-with-opencv/
"""
center = None
countours = cv2.findContours(mask, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[-2]
# only proceed if at least one contour was found
if len(countours) > 0:
# find the largest contour in the mask, then use
# it to compute the minimum enclosing circle and
# centroid
c = max(countours, key=cv2.contourArea)
((x, y), radius) = cv2.minEnclosingCircle(c)
moments = cv2.moments(c)
if moments["m00"] > 0:
center = int(moments["m10"] / moments["m00"]), \
int(moments["m01"] / moments["m00"])
else:
center = int(x), int(y)
# only proceed if the radius meets a minimum size
if radius > 10:
# draw the circle and centroid on the frame,
cv2.circle(frame, (int(x), int(y)), int(radius),
(0, 255, 255), 2)
cv2.circle(frame, center, 5, (0, 0, 255), -1)
# then update the ponter trail
if self.previous_position:
cv2.line(self.trail, self.previous_position, center,
(255, 255, 255), 2)
cv2.add(self.trail, frame, frame)
self.previous_position = center
def detect(self, frame):
hsv_img = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# split the video frame into color channels
h, s, v = cv2.split(hsv_img)
self.channels['hue'] = h
self.channels['saturation'] = s
self.channels['value'] = v
# Threshold ranges of HSV components; storing the results in place
self.threshold_image("hue")
self.threshold_image("saturation")
self.threshold_image("value")
# Perform an AND on HSV components to identify the laser!
self.channels['laser'] = cv2.bitwise_and(
self.channels['hue'],
self.channels['value']
)
self.channels['laser'] = cv2.bitwise_and(
self.channels['saturation'],
self.channels['laser']
)
# Merge the HSV components back together.
hsv_image = cv2.merge([
self.channels['hue'],
self.channels['saturation'],
self.channels['value'],
])
self.track(frame, self.channels['laser'])
return hsv_image
def display(self, img, frame):
"""Display the combined image and (optionally) all other image channels
NOTE: default color space in OpenCV is BGR.
"""
cv2.imshow('RGB_VideoFrame', frame)
cv2.imshow('LaserPointer', self.channels['laser'])
if self.display_thresholds:
cv2.imshow('Thresholded_HSV_Image', img)
cv2.imshow('Hue', self.channels['hue'])
cv2.imshow('Saturation', self.channels['saturation'])
cv2.imshow('Value', self.channels['value'])
def setup_windows(self):
sys.stdout.write("Using OpenCV version: {0}\n".format(cv2.__version__))
# create output windows
self.create_and_position_window('LaserPointer', 0, 0)
self.create_and_position_window('RGB_VideoFrame',
10 + self.cam_width, 0)
if self.display_thresholds:
self.create_and_position_window('Thresholded_HSV_Image', 10, 10)
self.create_and_position_window('Hue', 20, 20)
self.create_and_position_window('Saturation', 30, 30)
self.create_and_position_window('Value', 40, 40)
def run(self):
# Set up window positions
self.setup_windows()
# Set up the camera capture
self.setup_camera_capture()
while True:
# 1. capture the current image
success, frame = self.capture.read()
if not success: # no image captured... end the processing
sys.stderr.write("Could not read camera frame. Quitting\n")
sys.exit(1)
hsv_image = self.detect(frame)
self.display(hsv_image, frame)
self.handle_quit()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run the Laser Tracker')
parser.add_argument('-W', '--width',
default=640,
type=int,
help='Camera Width')
parser.add_argument('-H', '--height',
default=480,
type=int,
help='Camera Height')
parser.add_argument('-u', '--huemin',
default=20,
type=int,
help='Hue Minimum Threshold')
parser.add_argument('-U', '--huemax',
default=160,
type=int,
help='Hue Maximum Threshold')
parser.add_argument('-s', '--satmin',
default=100,
type=int,
help='Saturation Minimum Threshold')
parser.add_argument('-S', '--satmax',
default=255,
type=int,
help='Saturation Maximum Threshold')
parser.add_argument('-v', '--valmin',
default=200,
type=int,
help='Value Minimum Threshold')
parser.add_argument('-V', '--valmax',
default=255,
type=int,
help='Value Maximum Threshold')
parser.add_argument('-d', '--display',
action='store_true',
help='Display Threshold Windows')
params = parser.parse_args()
tracker = LaserTracker(
cam_width=params.width,
cam_height=params.height,
hue_min=params.huemin,
hue_max=params.huemax,
sat_min=params.satmin,
sat_max=params.satmax,
val_min=params.valmin,
val_max=params.valmax,
display_thresholds=params.display
)
tracker.run()

I'm by no means an expert on this, but taking a quick look at the code it seems that self.previous_position = (x,y).
Not sure why you would want to save to excel but to save to file just add f = open(filename, 'w') at the start of the run function and then f.write(self.previous_position) at the end of each loop.
Your file will contain all the recorded x and y coordinates recorded, if your set on saving this to excel check out python's out-the-box csv library.

Related

Scope issue between scripts due to multiprocessing

I am following this tutorial. After cloning her repository and getting the "track" command to work, I wanted to try to integrate a scanning function.
I went into her manager.py script and added my scanning procedure in the set_servos function as shown below (in bold). This runs in the servos_process:
import logging
from multiprocessing import Value, Process, Manager, Queue
import pantilthat as pth
import signal
import sys
import time
import RPi.GPIO as GPIO
from rpi_deep_pantilt.detect.util.visualization import visualize_boxes_and_labels_on_image_array
from rpi_deep_pantilt.detect.camera import run_pantilt_detect
from rpi_deep_pantilt.control.pid import PIDController
GPIO.setmode(GPIO.BCM)
GPIO.setwarnings(False)
GPIO.setup(8,GPIO.OUT)
logging.basicConfig()
LOGLEVEL = logging.getLogger().getEffectiveLevel()
RESOLUTION = (320, 320)
SERVO_MIN = -90
SERVO_MAX = 90
CENTER = (
RESOLUTION[0] // 2,
RESOLUTION[1] // 2
)
# function to handle keyboard interrupt
def signal_handler(sig, frame):
# print a status message
print("[INFO] You pressed `ctrl + c`! Exiting...")
# disable the servos
pth.servo_enable(1, False)
pth.servo_enable(2, False)
GPIO.output(8,GPIO.LOW)
# exit
sys.exit()
def in_range(val, start, end):
# determine the input value is in the supplied range
return (val >= start and val <= end)
def set_servos(pan, tilt, scan):
# signal trap to handle keyboard interrupt
signal.signal(signal.SIGINT, signal_handler)
**
#visualize_boxes_and_labels_on_image_array()
print(scan.value) # output: 't'
while scan.value == 't':**
print('Scanning')
pth.servo_one(90)
pth.servo_two(25)
time.sleep(10)
pth.servo_one(30)
pth.servo_two(25)
time.sleep(10)
pth.servo_one(-30)
pth.servo_two(25)
time.sleep(10)
pth.servo_one(-90)
pth.servo_two(25)
time.sleep(10)
pth.servo_one(-30)
pth.servo_two(25)
time.sleep(10)
pth.servo_one(30)
pth.servo_two(25)
pth.time.sleep(10)
pth.servo_one(90)
pth.servo_two(25)
time.sleep(10)
continue
while True:
pan_angle = -1 * pan.value
tilt_angle = tilt.value
# if the pan angle is within the range, pan
if in_range(pan_angle, SERVO_MIN, SERVO_MAX):
pth.pan(pan_angle)
else:
logging.info(f'pan_angle not in range {pan_angle}')
if in_range(tilt_angle, SERVO_MIN, SERVO_MAX):
pth.tilt(tilt_angle)
else:
logging.info(f'tilt_angle not in range {tilt_angle}')
def pid_process(output, p, i, d, box_coord, origin_coord, action):
# signal trap to handle keyboard interrupt
signal.signal(signal.SIGINT, signal_handler)
# create a PID and initialize it
p = PIDController(p.value, i.value, d.value)
p.reset()
# loop indefinitely
while True:
error = origin_coord - box_coord.value
output.value = p.update(error)
# logging.info(f'{action} error {error} angle: {output.value}')
def pantilt_process_manager(
model_cls,
labels=('Raspi',),
rotation=0
):
pth.servo_enable(1, True)
pth.servo_enable(2, True)
with Manager() as manager:
**scan = manager.Value('c', 't')**
# set initial bounding box (x, y)-coordinates to center of frame
center_x = manager.Value('i', 0)
center_y = manager.Value('i', 0)
center_x.value = RESOLUTION[0] // 2
center_y.value = RESOLUTION[1] // 2
# pan and tilt angles updated by independent PID processes
pan = manager.Value('i', 0)
tilt = manager.Value('i', 0)
# PID gains for panning
pan_p = manager.Value('f', 0.05)
# 0 time integral gain until inferencing is faster than ~50ms
pan_i = manager.Value('f', 0.1)
pan_d = manager.Value('f', 0)
# PID gains for tilting
tilt_p = manager.Value('f', 0.15)
# 0 time integral gain until inferencing is faster than ~50ms
tilt_i = manager.Value('f', 0.2)
tilt_d = manager.Value('f', 0)
**detect_processr = Process(target=run_pantilt_detect,
args=(center_x, center_y, labels, model_cls, rotation, scan))**
pan_process = Process(target=pid_process,
args=(pan, pan_p, pan_i, pan_d, center_x, CENTER[0], 'pan'))
tilt_process = Process(target=pid_process,
args=(tilt, tilt_p, tilt_i, tilt_d, center_y, CENTER[1], 'tilt'))
**servo_process = Process(target=set_servos, args=(pan, tilt, scan))**
detect_processr.start()
pan_process.start()
tilt_process.start()
servo_process.start()
detect_processr.join()
pan_process.join()
tilt_process.join()
servo_process.join()
if __name__ == '__main__':
pantilt_process_manager()
In a separate script called visualization.py, there is a statement in the visualize_boxes_and_labels_on_image_array function which is responsible for overlaying bounding boxes on the camera feed once an object is detected as follows (In bold towards the end). This runs in the detect_processr:
# python
import collections
import logging
# lib
import numpy as np
import PIL.Image as Image
import PIL.ImageColor as ImageColor
import PIL.ImageDraw as ImageDraw
import PIL.ImageFont as ImageFont
import six
import RPi.GPIO as GPIO
import time
from time import sleep
import pantilthat as pth
GPIO.setmode(GPIO.BCM)
GPIO.setwarnings(False)
GPIO.setup(8,GPIO.OUT)
STANDARD_COLORS = [
'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
'WhiteSmoke', 'Yellow', 'YellowGreen'
]
def _get_multiplier_for_color_randomness():
num_colors = len(STANDARD_COLORS)
prime_candidates = [5, 7, 11, 13, 17]
# Remove all prime candidates that divide the number of colors.
prime_candidates = [p for p in prime_candidates if num_colors % p]
if not prime_candidates:
return 1
# Return the closest prime number to num_colors / 10.
abs_distance = [np.abs(num_colors / 10. - p) for p in prime_candidates]
num_candidates = len(abs_distance)
inds = [i for _, i in sorted(zip(abs_distance, range(num_candidates)))]
return prime_candidates[inds[0]]
def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
if image.dtype != np.uint8:
raise ValueError('`image` not of type np.uint8')
if mask.dtype != np.uint8:
raise ValueError('`mask` not of type np.uint8')
if np.any(np.logical_and(mask != 1, mask != 0)):
raise ValueError('`mask` elements should be in [0, 1]')
if image.shape[:2] != mask.shape:
raise ValueError('The image has spatial dimensions %s but the mask has '
'dimensions %s' % (image.shape[:2], mask.shape))
rgb = ImageColor.getrgb(color)
pil_image = Image.fromarray(image)
solid_color = np.expand_dims(
np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L')
pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
np.copyto(image, np.array(pil_image.convert('RGB')))
def draw_bounding_box_on_image(image,
ymin,
xmin,
ymax,
xmax,
color='red',
thickness=4,
display_str_list=(),
use_normalized_coordinates=True):
GPIO.output(8,GPIO.HIGH)
print('Object Detected')
draw = ImageDraw.Draw(image)
im_width, im_height = image.size
if use_normalized_coordinates:
(left, right, top, bottom) = (xmin * im_width, xmax * im_width,
ymin * im_height, ymax * im_height)
else:
(left, right, top, bottom) = (xmin, xmax, ymin, ymax)
draw.line([(left, top), (left, bottom), (right, bottom),
(right, top), (left, top)], width=thickness, fill=color)
try:
font = ImageFont.truetype('arial.ttf', 24)
except IOError:
font = ImageFont.load_default()
# If the total height of the display strings added to the top of the bounding
# box exceeds the top of the image, stack the strings below the bounding box
# instead of above.
display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
# Each display_str has a top and bottom margin of 0.05x.
total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
if top > total_display_str_height:
text_bottom = top
else:
text_bottom = bottom + total_display_str_height
# Reverse list and print from bottom to top.
for display_str in display_str_list[::-1]:
text_width, text_height = font.getsize(display_str)
margin = np.ceil(0.05 * text_height)
draw.rectangle(
[(left, text_bottom - text_height - 2 * margin), (left + text_width,
text_bottom)],
fill=color)
draw.text(
(left + margin, text_bottom - text_height - margin),
display_str,
fill='black',
font=font)
text_bottom -= text_height - 2 * margin
def draw_bounding_box_on_image_array(image,
ymin,
xmin,
ymax,
xmax,
color='red',
thickness=4,
display_str_list=(),
use_normalized_coordinates=True):
image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
thickness, display_str_list,
use_normalized_coordinates)
np.copyto(image, np.array(image_pil))
def draw_keypoints_on_image(image,
keypoints,
color='red',
radius=2,
use_normalized_coordinates=True):
draw = ImageDraw.Draw(image)
im_width, im_height = image.size
keypoints_x = [k[1] for k in keypoints]
keypoints_y = [k[0] for k in keypoints]
if use_normalized_coordinates:
keypoints_x = tuple([im_width * x for x in keypoints_x])
keypoints_y = tuple([im_height * y for y in keypoints_y])
for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y):
draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
(keypoint_x + radius, keypoint_y + radius)],
outline=color, fill=color)
def draw_keypoints_on_image_array(image,
keypoints,
color='red',
radius=2,
use_normalized_coordinates=True):
image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
draw_keypoints_on_image(image_pil, keypoints, color, radius,
use_normalized_coordinates)
np.copyto(image, np.array(image_pil))
def visualize_boxes_and_labels_on_image_array(
image,
boxes,
classes,
scores,
category_index,
** scan, **
instance_masks=None,
instance_boundaries=None,
keypoints=None,
track_ids=None,
use_normalized_coordinates=False,
max_boxes_to_draw=20,
min_score_thresh=.5,
agnostic_mode=False,
line_thickness=4,
groundtruth_box_visualization_color='black',
skip_scores=False,
skip_labels=False,
skip_track_ids=False):
GPIO.output(8,GPIO.LOW)
# Create a display string (and color) for every box location, group any boxes
# that correspond to the same location.
box_to_display_str_map = collections.defaultdict(list)
box_to_color_map = collections.defaultdict(str)
box_to_instance_masks_map = {}
box_to_instance_boundaries_map = {}
box_to_keypoints_map = collections.defaultdict(list)
box_to_track_ids_map = {}
if not max_boxes_to_draw:
max_boxes_to_draw = boxes.shape[0]
for i in range(min(max_boxes_to_draw, boxes.shape[0])):
if scores is None or scores[i] > min_score_thresh:
box = tuple(boxes[i].tolist())
if instance_masks is not None:
box_to_instance_masks_map[box] = instance_masks[i]
if instance_boundaries is not None:
box_to_instance_boundaries_map[box] = instance_boundaries[i]
if keypoints is not None:
box_to_keypoints_map[box].extend(keypoints[i])
if track_ids is not None:
box_to_track_ids_map[box] = track_ids[i]
if scores is None:
box_to_color_map[box] = groundtruth_box_visualization_color
else:
display_str = ''
if not skip_labels:
if not agnostic_mode:
if classes[i] in six.viewkeys(category_index):
class_name = category_index[classes[i]]['name']
else:
class_name = 'N/A'
display_str = str(class_name)
if not skip_scores:
if not display_str:
display_str = '{}%'.format(int(100*scores[i]))
else:
display_str = '{}: {}%'.format(
display_str, int(100*scores[i]))
if not skip_track_ids and track_ids is not None:
if not display_str:
display_str = 'ID {}'.format(track_ids[i])
else:
display_str = '{}: ID {}'.format(
display_str, track_ids[i])
box_to_display_str_map[box].append(display_str)
if agnostic_mode:
box_to_color_map[box] = 'DarkOrange'
elif track_ids is not None:
prime_multipler = _get_multiplier_for_color_randomness()
box_to_color_map[box] = STANDARD_COLORS[
(prime_multipler * track_ids[i]) % len(STANDARD_COLORS)]
else:
box_to_color_map[box] = STANDARD_COLORS[
classes[i] % len(STANDARD_COLORS)]
# Draw all boxes onto image.
for box, color in box_to_color_map.items():
ymin, xmin, ymax, xmax = box
if instance_masks is not None:
draw_mask_on_image_array(
image,
box_to_instance_masks_map[box],
color=color
)
if instance_boundaries is not None:
draw_mask_on_image_array(
image,
box_to_instance_boundaries_map[box],
color='red',
alpha=1.0
)
draw_bounding_box_on_image_array(
image,
ymin,
xmin,
ymax,
xmax,
color=color,
thickness=line_thickness,
display_str_list=box_to_display_str_map[box],
use_normalized_coordinates=use_normalized_coordinates)
**scan.value = 'f'
print(scan.value) # output: 'f'**
if keypoints is not None:
draw_keypoints_on_image_array(
image,
box_to_keypoints_map[box],
color=color,
radius=line_thickness / 2,
use_normalized_coordinates=use_normalized_coordinates)
return image
The hope was that once an object is detected, the scan function would break. Visualization.py is executed in the detect_process, but only when an object is detected.
The statements are correctly relayed from my print statements as follows, but the loop still doesn't break:
$ rpi-deep-pantilt track Raspi
t
Scanning
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
Object Detected
f
^C
The issue you're having here is that memory is (generally) not shared between portions of a multiprocessing python program. In this case, scan_on is a local variable in your manager.py script, which is then re-instantiated in the visualization.py script.
To share data between Processes, we can use Values. These are objectified values that allow sharing state between Processes by simply passing it as an argument.
Why? Simple values like integers and booleans are copied when you pass them into a function, rather than maintaining state across their multiple instances. The Value object, however, simply passes a reference to the value rather than a direct copy.
Though I don't see where you're calling visualize_boxes_and_labels_on_image_array(), here's a start:
scan_on = manager.Value('c', 't') # the 'c' references the unsigned c-char,
# other types found here: https://docs.python.org/3/library/array.html#module-array
# we'll edit your set_servo process to include this variable
servo_process = Process(target=set_servos, args=(pan, tilt, scan_on))
Whenever you call visualize_boxes_and_labels_on_image_array(), you'll also want to pass this same instance of scan_on. You should not have to redeclare it. To access or edit the value of scan_on, use the .value field.
print(scan_on.value) # output: 't'
scan_on.value = 'f'
print(scan_on.value) # output: 'f'

Disparity Map just shows contouring

The problem:
The goal is to create a disparity map for two parallel cameras. Currently the calculation itself is working, and I have a live disparitymap. It just shows contouring instead of information for every pixel, which is not what a disparity map should be doing.
.
What I have tried:
I tried the tsuka example, the lines are commented out, but they work. So this proves that the used functions work.
The result of my code is here: https://imgur.com/a/bIDmdkk (I probably don't have the reputation needed to upload images)
As can be seen in that image just the outline, the contour, of my face is visible. This contour reacts to my actual distance - with getting brighter or darker - but the rest of the image is dark.
With all parameters commented out (as is the example) it does now work either but has lots and lots of speckles laying over.
I also tried almost any combination of numDisparities and blocksize.
Changing the position of the cameras to one another alters the result but does not change it massively. I made sure to have them in a line with each other, looking in parallel.
Edit: I tinkered a bit and got this result: https://imgur.com/a/m2o9FOE compared to the previous result there are more features, but also more noise. (This one has fewer disparities and another color convertion)
SOLVED: [I tried running the stereo.compute within the while-loop with BGR-Images, but that does not work. The tsuka-example images are colored though, so there might be some case of wrong datatype that I do not see.
Everything is uint8 currently.] => I forgot that imread("",0) reads an image as grayscale. So everything behaves as it should in this regard.
.
So what is the difference between my left/right images and the ones resulting in https://docs.opencv.org/master/disparity_map.jpg ?
.
The code:
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
cap1 = cv.VideoCapture(1)
cap3 = cv.VideoCapture(3)
#imgR = cv.imread('tsuL.png',0)
#imgL = cv.imread('tsuR.png',0)
#stereoTest = cv.StereoBM_create(numDisparities=16, blockSize=15)
#disparityTest = stereoTest.compute(imgL,imgR)
while True:
# save current camera image
ret1, frame1 = cap1.read()
ret3, frame3 = cap3.read()
# switch from BGR to gray
grayFrame1 = cv.cvtColor(frame1, cv.COLOR_BGR2GRAY)
grayFrame3 = cv.cvtColor(frame3, cv.COLOR_BGR2GRAY)
# disparity params
stereo = cv.StereoBM_create(numDisparities=128, blockSize=5)
stereo.setTextureThreshold(600)
#stereo.setSpeckleRange(4)
#stereo.setSpeckleWindowSize(9)
stereo.setMinDisparity(0)
# calculate both variants (Camera 1 Left, Camera 2 Right and Camera 1 right, Camera 2 left)
disparity = stereo.compute(grayFrame1,grayFrame3)
disparity2 = stereo.compute(grayFrame3,grayFrame1)
#res = cv.cvtColor(disparity,cv.COLOR_GRAY2BGR)
# Should have been 65535 from int16 to int8, but 4095 works..
div = 65535.0/16
res = cv.convertScaleAbs(disparity, alpha=(255.0/div))
res2= cv.convertScaleAbs(disparity2, alpha=(255.0/div))
# Show disparity map
cv.namedWindow("Disparity")
cv.moveWindow("Disparity", 450, 20)
cv.imshow('Disparity', np.hstack([res,res2]))
keyboard = cv.waitKey(30)
if keyboard == 'q' or keyboard == 27:
break
cap.release()
cv.destroyAllWindows()
New Code
I got the camera calibration data from boofcv and copied some lines from https://stackoverflow.com/a/29151300/13150965 to my code.
Schwarz S/W
Xc 311,0 323,3
Yc 257,1 261,9
fx 603,0 593,6
fy 604,3 596,5
skew
radial 1,43e-01 1,1e-01
-3,03e-01 -2,43e-01
tangential 1,37e-02 1,25e-02
-9,77e-03 -9,79e-04
These are the values I received for each Camera (Schwarz and S/W are just names for each camera, they have different cables, that's how I recognize them)
import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
cap1 = cv.VideoCapture(0)
cap3 = cv.VideoCapture(1)
cameraMatrix1 = np.array(
[[603.0, 0, 311.0],
[0, 604.3, 257.1],
[0, 0, 1]]
)
cameraMatrix2 = np.array(
[[593.6, 0, 323.3],
[0, 596.5, 261.9],
[0, 0, 1]]
)
distCoeffs1 = np.array([[0.143, -0.303, 0.0137, -0.00977, 0.0]])
distCoeffs2 = np.array([[0.11, -0.243, 0.0125, -0.000979, 0.0]])
R = np.array(
[[1.0, 0.0, 0.0],
[0.0, 1.0, 0.0],
[0.0, 0.0, 1.0]]
)
T = np.array(
[[98.0],
[0.0],
[0.0]]
)
# Params from camera calibration
camMats = [cameraMatrix1, cameraMatrix2]
distCoeffs = [distCoeffs1, distCoeffs2]
camSources = [0,1]
for src in camSources:
distCoeffs[src][0][4] = 0.0 # use only the first 2 values in distCoeffs
xOff = 450
div = 64.0
i = 0
while True:
# save current camera image
ret1, frame1 = cap1.read()
ret3, frame3 = cap3.read()
w, h = frame1.shape[:2]
# The rectification process
newCams = [0,0]
roi = [0,0]
frames = [frame1, frame3]
i = i + 1
if i > 10:
for src in camSources:
newCams[src], roi[src] = cv.getOptimalNewCameraMatrix(cameraMatrix = camMats[src],
distCoeffs = distCoeffs[src],
imageSize = (w,h),
alpha = 0)
rectFrames = [0,0]
for src in camSources:
rectFrames[src] = cv.undistort(frames[src], camMats[src], distCoeffs[src])
R1,R2,P1,P2,Q,roi1,roi2 = cv.stereoRectify(
cameraMatrix1 =camMats[0],
cameraMatrix2 =camMats[1],
distCoeffs1 =distCoeffs1,
distCoeffs2 =distCoeffs2,
imageSize = (w,h),
R=R,
T=T,
alpha=1
)
# show camera images
cv.namedWindow("RectFrames")
cv.moveWindow("RectFrames", xOff, 532)
cv.imshow('RectFrames', np.hstack([rectFrames[0],rectFrames[1]]))
# switch from BGR to gray
grayFrame1 = cv.cvtColor(rectFrames[0], cv.COLOR_BGR2GRAY)
grayFrame3 = cv.cvtColor(rectFrames[1], cv.COLOR_BGR2GRAY)
# disparity params
stereo = cv.StereoBM_create(numDisparities=16, blockSize=15)
# calculate both variants (Camera 1 Left, Camera 2 Right and Camera 1 right, Camera 2 left)
disparity = stereo.compute(grayFrame1,grayFrame3)
disparity2 = stereo.compute(grayFrame3,grayFrame1)
# Should have been 65535 from int16 to int8, but 4095 works..
res = cv.convertScaleAbs(disparity, alpha=(255.0/(div-1)))
res2= cv.convertScaleAbs(disparity2, alpha=(255.0/(div-1)))
# Show disparity map
cv.namedWindow("Disparity")
cv.moveWindow("Disparity", xOff, 20)
cv.imshow('Disparity', np.hstack([res,res2]))
keyboard = cv.waitKey(30)
if keyboard == 'q' or keyboard == 27:
break
cap.release()
cv.destroyAllWindows()
I can see, that the images are being undistorted. https://imgur.com/a/SBmv7IY
But I am still doing something wrong.
The R and T are made up, as they look parallel (No Rotation) and are 9.8cm apart from another.
The Values for R and T calculated via the script from StereoCalibration in OpenCV on Python resulted in the unity-matrix for R and an empty vector for T. The latter cannot be right.
I now got the R and T values for a given calibration of the cameras. But it does in fact not solve my problem. So either there is still an error in that calculation or this problem has to be solved differently.
I rewrote the entire script, to see at which step it misbehaves - and do tidy things up. At is stands, the calibration works up to the cv2.initUndistortRectifyMap , if I use this map with cv2.remap onto my camera image, I just get a black image.
import numpy as np
import cv2
from VideoCapture import Device
from PIL import Image
import glob
print("Importing Images")
image_listR = []
image_listL = []
w = 640
h = 480
for filename in glob.glob('StereoCalibrate\imageR*'): #assuming gif
im=Image.open(filename).convert('RGB')
cvim= np.array(im)
cvim = cvim[:, :, ::-1].copy()
image_listR.append(cvim)
for filename in glob.glob('StereoCalibrate\imageL*'): #assuming gif
im=Image.open(filename).convert('RGB')
cvim= np.array(im)
cvim = cvim[:, :, ::-1].copy()
image_listL.append(cvim)
imagesR = len(image_listR)
imagesL = len(image_listL)
print("Found {%d} images for Left camera" % imagesL)
print("Found {%d} images for Right camera" % imagesR)
if imagesR == imagesL:
print("Number of Images match")
else:
print("Number of Images do not match")
print("Using loaded images")
board_w = 8
board_h = 5
board_sz = (8,5)
board_n = board_w*board_h
# termination criteria
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
# Arrays to store object points and image points from all the images.
object_points = [] # 3d point in real world space
imagePoints1 = [] # 2d points in image plane.
imagePoints2 = [] # 2d points in image plane.
corners1 = []
corners2 = []
obj = np.zeros((5*8,3), np.float32)
obj[:,:2] = np.mgrid[0:8,0:5].T.reshape(-1,2)
vidStreamL = cv2.VideoCapture(1) # index of your camera
vidStreamR = cv2.VideoCapture(0) # index of your camera
success = 0
found1 = False
found2 = False
i=0
while (success < imagesR*0.9):
#Loop through the image list
if i >= imagesL:
i = 0
img1 = image_listL[i]
img2 = image_listR[i]
#Convert images to grayscale
gray1 = cv2.cvtColor(img1,cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2,cv2.COLOR_BGR2GRAY)
#Check for Chessboard Pattern
found1, corners1 = cv2.findChessboardCorners(img1, board_sz)
found2, corners2 = cv2.findChessboardCorners(img2, board_sz)
#Draw Chessboard in image
if (found1):
cv2.cornerSubPix(gray1, corners1, (11, 11), (-1, -1),criteria)
cv2.drawChessboardCorners(gray1, board_sz, corners1, found1)
if (found2):
cv2.cornerSubPix(gray2, corners2, (11, 11), (-1, -1), criteria)
cv2.drawChessboardCorners(gray2, board_sz, corners2, found2)
#Show grayscale image with chessboard marker
cv2.imshow('image1', gray1)
cv2.imshow('image2', gray2)
if (found1 != 0 and found2 != 0):
#Remove successful detected images from list
image_listL.pop(i)
image_listR.pop(i)
imagesL-=1
imagePoints1.append(corners1);
imagePoints2.append(corners2);
object_points.append(obj);
success+=1
print("{", success, "} / {",imagesR*0.9,"} calibration images detected")
if (success >= imagesR*0.9):
break
i = i + 1
cv2.waitKey(1)
cv2.destroyAllWindows()
print("Calibrating")
cx1 = 327.0
cy1 = 247.9
fx1 = 608.3
fy1 = 607.7
rx1 = 0.129
ry1 = -0.269
tx1 = 0.00382
ty1 = -0.00151
camMat1 = np.array(
[[fx1, 0, cx1],
[0, fy1, cy1],
[0, 0, 1]])
cx2 = 329.8
cy2 = 249.0
fx2 = 601.7
fy2 = 601.1
rx2 = 0.149
ry2 = -0.322
tx2 = 0.0039
ty2 = -0.000837
camMat2 = np.array(
[[fx2, 0, cx2],
[0, fy2, cy2],
[0, 0, 1]])
disCoe1 = np.array([[0.0,0.0,0.0,0.0,0.0]])
disCoe2 = np.array([[0.0,0.0,0.0,0.0,0.0]])
R = np.zeros(shape=(3,3))
T = np.zeros(shape=(3,3))
E = np.zeros(shape=(3,3))
F = np.zeros(shape=(3,3))
retval, camMat1, disCoe1, camMat2, disCoe2, R, T, E, F = cv2.stereoCalibrate(object_points, imagePoints1, imagePoints2, camMat1, disCoe1, camMat2, disCoe2, (w, h), flags = cv2.CALIB_USE_INTRINSIC_GUESS)
print("Done Calibration\n")
R1 = np.zeros(shape=(3,3))
R2 = np.zeros(shape=(3,3))
P1 = np.zeros(shape=(3,4))
P2 = np.zeros(shape=(3,4))
print("T:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in T]))
print("E:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in E]))
print("F:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in F]))
print("R:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in R]))
print("CAM1:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in camMat1]))
print("CAM2:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in camMat2]))
print("DIS1:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in disCoe1]))
print("DIS2:")
print('\n'.join([' '.join(['{:4}'.format(item) for item in row])
for row in disCoe2]))
print("Rectifying cameras")
cv2.stereoRectify(camMat1, disCoe1, camMat2, disCoe2,(w, h), R, T)
#print("Undistort image")
#map1x, map1y = cv2.initUndistortRectifyMap(camMat1, disCoe1, R1, camMat1, (w, h), cv2.CV_32FC1)
#map2x, map2y = cv2.initUndistortRectifyMap(camMat2, disCoe2, R2, camMat2, (w, h), cv2.CV_32FC1)
print("Settings complete\n")
i = 1
j = 1
while(True):
retL, img1 = vidStreamL.read()
retR, img2 = vidStreamR.read()
img1 = cv2.undistort(img1, camMat1, disCoe1)
img2 = cv2.undistort(img2, camMat2, disCoe2)
cv2.imshow("ImgCam", np.hstack([img1,img2]));
#imgU1 = np.zeros((h,w,3), np.uint8)
#imgU2 = np.zeros((h,w,3), np.uint8)
#imgU1 = cv2.remap(img1, map1x, map1y, cv2.INTER_LINEAR, imgU1, cv2.BORDER_CONSTANT, 0)
#imgU2 = cv2.remap(img2, map2x, map2y, cv2.INTER_LINEAR, imgU2, cv2.BORDER_CONSTANT, 0)
#cv2.imshow("ImageCam", np.hstack([imgU1,imgU2]));
#imgU1 = cv2.cvtColor(imgU1, cv2.COLOR_BGR2GRAY)
#imgU2 = cv2.cvtColor(imgU2, cv2.COLOR_BGR2GRAY)
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
stereo = cv2.StereoBM_create(numDisparities=16, blockSize=15)
disparity = stereo.compute(img1,img2)
disparit2 = stereo.compute(img2,img1)
res = cv2.convertScaleAbs(disparity, alpha=(255.0/512.0))
re2 = cv2.convertScaleAbs(disparit2, alpha=(255.0/512.0))
cv2.namedWindow("Disparity")
cv2.imshow('Disparity', np.hstack([res,re2]))
cv2.waitKey(1)
Output:
Importing Images
Found {90} images for Left camera
Found {90} images for Right camera
Number of Images match
Using loaded images
{ 1 } / { 81.0 } calibration images detected
{ 2 } / { 81.0 } calibration images detected
...
{ 81 } / { 81.0 } calibration images detected
Calibrating
Done Calibration
T:
-3.4549164747952514
-0.15507627811210184
-0.058176064658149625
E:
0.0009397723130476023 0.05762864132890782 -0.15527769659160615
-0.01780225919479015 0.01349075458635349 3.455334047732434
-0.008356129824974412 -3.458367965240172 0.010848591597549652
F:
3.59441069386539e-08 2.1966757991956236e-06 -0.0032581679670958268
-6.799554333159719e-07 5.135279707045414e-07 0.060534502577423176
6.856712419870922e-06 -0.061575681061419536 1.0
R:
0.9988149170858261 -0.0472903202575948 -0.01150595570860947
0.047251107481307925 0.998876350140538 -0.0036564971909233096
0.011665943966274269 0.0031084947887139625 0.9999271188499311
CAM1:
457.8949692862012 0.0 333.02411929079784
0.0 459.45537763505865 239.7961684844508
0.0 0.0 1.0
CAM2:
460.4374113961873 0.0 342.68117331116434
0.0 461.07367491328057 244.62051778708334
0.0 0.0 1.0
DIS1:
0.06391854958023913 -0.2191286122082927 -0.000947168228999159 0.004660285089171575 0.08044318478168837
DIS2:
0.011643796283126952 0.14239490114798584 0.001548517080560543 0.011862118627062223 -0.5191998209097282
Rectifying cameras
Settings complete
You missed the Calibration and Rectification process, which is the first step of a disparity algorithm.
Below steps help you get your disparity map:
Calibrate your camera and find the intrinsic and extrinsic of the camera.
With the available camera and distortion matrix from the calibration, rectify your images.
Pass the images to your algorithm.
Get the disparity map.
Note: raw disparity map will be bad in a textureless region.

Improve HED algorithm for edge detection

I am working on an image processing task using python which depends mainly in detecting the grains in the image of soil samples so the first step in the processing process is edge detection ,I use HED algorithm (holistically nested edge detection ) for this step rather than using other edge detection functions in python as canny or sobel .
However , I face a problem in detecting the grains of fine soil particles as sand samples images shown below . I am asking if there is modification can be done on the image or the algorithm to improve edge detection to get the borders of the grains or as maximum as possible of the grains.
This is the used algorithm and results of using this algorithm in edge detection.
# USAGE
# python detect_edges_image.py --edge-detector hed_model --image images/guitar.jpg
# import the necessary packages
import argparse
import cv2
import os
import easygui
import pandas as pd
path = easygui.fileopenbox()
print(path)
hdir = os.path.dirname(path)
print(hdir)
hfilename = os.path.basename(path)
print(hfilename)
hname = os.path.splitext(hfilename)[0]
print(hname)
houtname = hname+"_out.jpg"
print(houtname)
hout = os.path.sep.join([hdir,houtname])
print(hout)
# # construct the argument parser and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-d", "--edge-detector", type=str, required=True,
# help="path to OpenCV's deep learning edge detector")
# ap.add_argument("-i", "--image", type=str, required=True,
# help="path to input image")
# args = vars(ap.parse_args())
class CropLayer(object):
def __init__(self, params, blobs):
# initialize our starting and ending (x, y)-coordinates of
# the crop
self.startX = 0
self.startY = 0
self.endX = 0
self.endY = 0
def getMemoryShapes(self, inputs):
# the crop layer will receive two inputs -- we need to crop
# the first input blob to match the shape of the second one,
# keeping the batch size and number of channels
(inputShape, targetShape) = (inputs[0], inputs[1])
(batchSize, numChannels) = (inputShape[0], inputShape[1])
(H, W) = (targetShape[2], targetShape[3])
# compute the starting and ending crop coordinates
self.startX = int((inputShape[3] - targetShape[3]) / 2)
self.startY = int((inputShape[2] - targetShape[2]) / 2)
self.endX = self.startX + W
self.endY = self.startY + H
# return the shape of the volume (we'll perform the actual
# crop during the forward pass
return [[batchSize, numChannels, H, W]]
def forward(self, inputs):
# use the derived (x, y)-coordinates to perform the crop
return [inputs[0][:, :, self.startY:self.endY,
self.startX:self.endX]]
# load our serialized edge detector from disk
print("[INFO] loading edge detector...")
fpath = os.path.abspath(__file__)
fdir = os.path.dirname(fpath)
print(fdir)
protoPath = os.path.sep.join([fdir,"hed_model", "deploy.prototxt"])
print(protoPath)
modelPath = os.path.sep.join([fdir,"hed_model","hed_pretrained_bsds.caffemodel"])
print(modelPath)
net = cv2.dnn.readNetFromCaffe(protoPath, modelPath)
# register our new layer with the model
cv2.dnn_registerLayer("Crop", CropLayer)
# load the input image and grab its dimensions
image = cv2.imread('D:\My work\MASTERS WORK\SAND - UNIFORM\sand_180pxfor1cm(130,120,75).jpg')
# image =cv2.equalizeHist(img)
# image = cv2.pyrMeanShiftFiltering(image1,10,20)
(H, W) = image.shape[:2]
# print(image.shape[:2])
# image.shape[:2] =(H*3, W*3)ho
# image = cv2.resize(image,0.5)
# convert the image to grayscale, blur it, and perform Canny
# edge detection
print("[INFO] performing Canny edge detection...")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# blurred = cv2.addWeighted(gray,1.5,blurred,-0.5,0)
canny = cv2.Canny(blurred,30, 150)
# construct a blob out of the input image for the Holistically-Nested
# Edge Detector
# cc = cv2.cvtColor(canny, cv2.COLOR_GRAY2BGR)
# image = image+cc
# mean = (104.00698793, 116.66876762, 122.67891434),
blob = cv2.dnn.blobFromImage(image, scalefactor=1.0, size=(W, H),
# mean=(110,95,95),
# mean=(104.00698793, 116.66876762, 122.67891434),
# mean=(104, 116, 122),
mean=(130, 120, 75),
# mean=(145, 147, 180),
swapRB= False, crop=False)
print( blob)
cv2.waitKey(0)
# set the blob as the input to the network and perform a forward pass
# to compute the edges
print("[INFO] performing holistically-nested edge detection...")
net.setInput(blob)
hed = net.forward()
hed = cv2.resize(hed[0, 0], (W, H))
hed = (255 * hed).astype("uint8")
# show the output edge detection results for Canny and
# Holistically-Nested Edge Detection
cv2.imshow("Input", image)
cv2.imshow("Canny", canny)
cv2.imshow("HED", hed)
cv2.imwrite(hout, hed)
cv2.waitKey(0)

Type Error: The return type must be a string, dict, tuple, Response instance, or WSGI callable, but it was a list

I'm making a Flask App that would take an image input ,process it and save the results in a JSON file,but after processing the image it gives me a Type Error mentioned in the title.To add more,it prints only one line and then stops;
Below is my Flask API that I'm using;
#app.route('/upload',methods=['GET','POST'])
def upload_analyze():
if request.method == 'POST':
# check if a file was passed into the POST request
if 'file' not in request.files:
flash('No file was uploaded.')
return redirect(request.url)
f = request.files['file']
filename = secure_filename(f.filename)
f.save(filename)
image = cv2.imread(filename)
#f.save(secure_filename(f.filename))
#return 'file uploaded successfully'
# image_file = request.files['image']
clt = KMeans(n_clusters = 3)
dataset = pd.read_csv('bb22.csv')
X = dataset.iloc[:, 1: 8].values
sc = StandardScaler()
global orig , r
# load the image, convert it to grayscale, and blur it slightly
#images = np.array(Image.open(image_file))
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (7, 7), 0)
# perform edge detection, then perform a dilation + erosion to
# close gaps in between object edges
edged = cv2.Canny(gray, 50, 100)
edged = cv2.dilate(edged, None, iterations=1)
edged = cv2.erode(edged, None, iterations=1)
# find contours in the edge map
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if imutils.is_cv2() else cnts[1]
# sort the contours from left-to-right and initialize the
# 'pixels per metric' calibration variable
(cnts, _) = contours.sort_contours(cnts)
pixelsPerMetric = None
object_num = 0
r=object_num
objects = []
idx=0
orig = image.copy()
counter = 0
leng = [0] * 400
width = [0] *400
# loop over the contours individually
for c in cnts:
# if the contour is not sufficiently large, ignore it
if cv2.contourArea(c) < 50:
continue
# compute the rotated bounding box of the contour
box = cv2.minAreaRect(c)
box = cv2.cv.BoxPoints(box) if imutils.is_cv2() else cv2.boxPoints(box)
box = np.array(box, dtype="int")
# order the points in the contour such that they appear
# in top-left, top-right, bottom-right, and bottom-left
# order, then draw the outline of the rotated bounding box
box = perspective.order_points(box)
cv2.drawContours(orig, [box.astype("int")], -1, (0, 255, 0), 2)
box.astype
# unpack the ordered bounding box, then compute the midpoint
# between the top-left and top-right coordinates, followed by
# the midpoint between bottom-left and bottom-right coordinates
(tl, tr, br, bl) = box
(tltrX, tltrY) = midpoint(tl, tr)
(blbrX, blbrY) = midpoint(bl, br)
# compute the midpoint between the top-left and top-right points,
# followed by the midpoint between the top-righ and bottom-right
(tlblX, tlblY) = midpoint(tl, bl)
(trbrX, trbrY) = midpoint(tr, br)
# compute the Euclidean distance between the midpoints
dA = dist.euclidean((tltrX, tltrY), (blbrX, blbrY))
dB = dist.euclidean((tlblX, tlblY), (trbrX, trbrY))
# if the pixels per metric has not been initialized, then
# compute it as the ratio of pixels to supplied metric (in this case, inches)
if pixelsPerMetric is None:
pixelsPerMetric = dB / 22.599 #previously its /22.50
# compute the size of the object
area = round(cv2.contourArea(c) / (pixelsPerMetric**2), 3)
perimeter = round(cv2.arcLength(c, True)/ pixelsPerMetric, 3)
hull = cv2.convexHull(c)
hull_area = round(cv2.contourArea(hull) / (pixelsPerMetric**2), 3)
(x,y),(ma,MA),angle = cv2.fitEllipse(c)
eccentricity = round(np.sqrt(1-(ma/MA)**2),3)
C = round(4*np.pi*area/perimeter**2, 3)
dimA = round(dA / pixelsPerMetric, 3)
dimB = round(dB / pixelsPerMetric, 3)
if (dimA >= dimB):
temp=dimA
dimA=dimB
dimB=temp
leng[counter] = str(dimB)
width[counter] = str(dimA)
counter = counter +1
x,y,w,h = cv2.boundingRect(c)
idx+=1
mask = np.zeros(image.shape[:2],np.uint8)
cv2.drawContours(mask, [c],-1, 255, -1)
dst = cv2.bitwise_and(image, image, mask=mask)
new_img=dst[y-20:y+h+20,x-20:x+w+20]
# pre-process the image for classification
if len(new_img) == 0:
WB = 0
continue
object_num = object_num+1
image1 = cv2.cvtColor(new_img, cv2.COLOR_BGR2RGB)
image1 = new_img.reshape((image1.shape[0] * new_img.shape[1], 3))
#classify color
clt.fit(image1)
count = 0
global dominant_color
dominant_color = [0,0,0]
for (color) in (clt.cluster_centers_):
a = [color.astype("uint8").tolist()[0], color.astype("uint8").tolist()[1],
color.astype("uint8").tolist()[2]]
count = count+1
if(count == 2) and (a != [0, 0, 0]):
dominant_color = a
#prepare image for broken classification
new_img = cv2.resize(new_img, (64, 64))
new_img = new_img.astype("float") / 255.0
new_img = img_to_array(new_img)
new_img = np.expand_dims(new_img, axis=0)
# classify the input image
with graph.as_default():
(yes, no) = model.predict(new_img)[0]
# build the label
if (yes > no):
WB = 0
y_new = "Broken"
else:
if object_num == 1:
print("true")
continue
WB = 1
X_new = array([[dimA, dimB, area, perimeter, hull_area, eccentricity, C]])
X=sc.fit_transform(X)
X_new = sc.transform(X_new)
y_new = type_model.predict(X_new)
print("X=%s, Predicted=%s" % (X_new[0], y_new))
obj_num=object_num-1 # because one item on the left most side we have for the pixel constant value
content = {
"Object_number": obj_num,
"Width": dimA,
"Length": dimB,
#"Area": area,
#"Perimeter": perimeter,
#"hull_area": hull_area,
#"eccentricity": eccentricity,
#"compactness": C,
"WB": WB # Whole or Broken
#"Type": str(y_new[0]),
#"color_rgb": dominant_color,
#"color_hex": rgb2hex(dominant_color[2], dominant_color[1], dominant_color[0])
}
objects.append(content)
return(objects)
objects=analyze()
with open('test6.json', 'w') as fout:
json.dump(objects , fout)
print(objects)
print(type(objects))
return 'ok'
Also in console only this 1 line gets printed:
X=[ 0.38739663 -0.25583995 0.22674784 -0.2933872 0.19980647 -0.03758974
0.4759277 ], Predicted=[4]
I'm returning this message to make sure that the JSON file is created but it doesn't gets created..I can't figure out what is wrong with the return type ..kindly help.
The views in Flask require a hashable return type. You can always convert your return values to hashable types viz string, dict, tuple etc and then transform from the result.
return { "data": [ { "name": "my name", age: "27" } ] }
User oz19 commented
You need to serialize objects before returning. import json and then json.dumps(objects)
and also
You have a return(objects) at the end of for c in cnts. That could be the problem
So the solution if, not using jsonify, is to call json.dumps on the list before returning it.
If you are using this below method, you can easily get required data in json format
# don't forgot to import jsonify
from flask import Flask, request, redirect, jsonify
#app.route('/sample', methods = ['GET', 'POST'])
def sample():
if(request.method == 'GET'): # i am using get you can change whatever you want
data = [{"A": "a",
"B": "b",
"C": "c",
}]
return jsonify({'data': data})
# now you can start your json dumping process here after
Hope it helps!

How to perform image convolution on an image using opencv python

I am trying to perform edge detection for my images of soil grains using holistically nested edge detection method HED as shown however when using combined fine and coarse soil grains , the region of fine particles is not clear so I suggest making image convolution by cutting the image into smaller rectangular areas in both directions and make HED for every portion of image and store them to black copy image so as to add the edged portions to this image .
I faced an error after repeating the algorithm of HED in a for loop by dividing the width of image to 5 portions and the height to 4 portions but I can't fix that error .
Here is the algorithm used
# import the necessary packages
import argparse
import cv2
import os
import easygui
path = easygui.fileopenbox()
print(path)
hdir = os.path.dirname(path)
print(hdir)
hfilename = os.path.basename(path)
print(hfilename)
hname = os.path.splitext(hfilename)[0]
print(hname)
houtname = hname+"_out.jpg"
print(houtname)
hout = os.path.sep.join([hdir,houtname])
print(hout)
# # construct the argument parser and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-d", "--edge-detector", type=str, required=True,
# help="path to OpenCV's deep learning edge detector")
# ap.add_argument("-i", "--image", type=str, required=True,
# help="path to input image")
# args = vars(ap.parse_args())
class CropLayer(object):
def __init__(self, params, blobs):
# initialize our starting and ending (x, y)-coordinates of
# the crop
self.startX = 0
self.startY = 0
self.endX = 0
self.endY = 0
def getMemoryShapes(self, inputs):
# the crop layer will receive two inputs -- we need to crop
# the first input blob to match the shape of the second one,
# keeping the batch size and number of channels
(inputShape, targetShape) = (inputs[0], inputs[1])
(batchSize, numChannels) = (inputShape[0], inputShape[1])
(H, W) = (targetShape[2], targetShape[3])
# compute the starting and ending crop coordinates
self.startX = int((inputShape[3] - targetShape[3]) / 2)
self.startY = int((inputShape[2] - targetShape[2]) / 2)
self.endX = self.startX + W
self.endY = self.startY + H
# return the shape of the volume (we'll perform the actual
# crop during the forward pass
return [[batchSize, numChannels, H, W]]
def forward(self, inputs):
# use the derived (x, y)-coordinates to perform the crop
return [inputs[0][:, :, self.startY:self.endY,
self.startX:self.endX]]
# load our serialized edge detector from disk
print("[INFO] loading edge detector...")
fpath = os.path.abspath(__file__)
fdir = os.path.dirname(fpath)
print(fdir)
protoPath = os.path.sep.join([fdir,"hed_model", "deploy.prototxt"])
print(protoPath)
modelPath = os.path.sep.join([fdir,"hed_model","hed_pretrained_bsds.caffemodel"])
print(modelPath)
net = cv2.dnn.readNetFromCaffe(protoPath, modelPath)
# register our new layer with the model
cv2.dnn_registerLayer("Crop", CropLayer)
# load the input image and grab its dimensions
image = cv2.imread('D:\My work\MASTERS WORK\GSD files\Sample E photos\SampleE_#1_26pxfor1mm.jpg')
im_copy = image.copy()*0
(H, W) = image.shape[:2]
# print(image.shape[:2])
# image.shape[:2] =(H*3, W*3)
# image = cv2.resize(image,0.5)
h=0
w=0
for m in range(0,H ,int(H/5)):
for n in range(0,W,int(W/3)):
gray = image[h:m,w:n]
# convert the image to grayscale, blur it, and perform Canny
# edge detection
print("[INFO] performing Canny edge detection...")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
canny = cv2.Canny(blurred, 30, 150)
# construct a blob out of the input image for the Holistically-Nested
# Edge Detector
# cc = cv2.cvtColor(canny, cv2.COLOR_GRAY2BGR)
# image = image+cc
# mean = (104.00698793, 116.66876762, 122.67891434),
blob = cv2.dnn.blobFromImage(image, scalefactor=1.0, size=((m-h), (n-w)),
# mean=(230, 120, 50),
mean=(104.00698793, 116.66876762, 122.67891434),
swapRB=False, crop=False)
print( blob)
cv2.waitKey(0)
# set the blob as the input to the network and perform a forward pass
# to compute the edges
print("[INFO] performing holistically-nested edge detection...")
net.setInput(blob)
hed = net.forward()
hed = cv2.resize(hed[0, 0], ((m-h), (n-w)))
hed = (255 * hed).astype("uint8")
# Adding the edge detection for each portion to the copy image as follows
im_copy = im_copy + hed
h+=int(H/5)
w+=int(W/4)
# show the output edge detection results for Canny and
# Holistically-Nested Edge Detection
cv2.imshow("Input", image)
cv2.imshow("Canny", canny)
cv2.imshow("HED", hed)
cv2.waitKey(0)
cv2.imshow('Frame ',im_copy)
cv2.imwrite(hout, im_copy)
cv2v2.waitKey(0)
I then use this edged image in further analysis on the image .
The error I got using the algorithm
net = cv2.dnn.readNetFromCaffe(protoPath, modelPath)
cv2.error: OpenCV(4.1.1) C:\projects\opencv-python\opencv\modules\dnn\src\caffe\caffe_io.cpp:1121: error: (-2:Unspecified error) FAILED: fs.is_open(). Can't open "D:\My work\MASTERS WORK\hed_model\deploy.prototxt" in function 'cv::dnn::ReadProtoFromTextFile'

Categories

Resources