How to join nearby bounding boxes in OpenCV Python - python

I am doing a college class project on image processing. This is my original image:
I want to join nearby/overlapping bounding boxes on individual text line images, but I don't know how. My code looks like this so far (thanks to #HansHirse for the help):
import os
import cv2
import numpy as np
from scipy import stats
image = cv2.imread('example.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
#dilation
kernel = np.ones((5,5), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
#find contours
ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0
# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b: b[1][i], reverse=reverse))
# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)
sortedctrs,sortedbbs=sort_contours(ctrs)
xyminmax=[]
for cnt in sortedctrs:
x, y, w, h = cv2.boundingRect(cnt)
xyminmax.append([x,y,x+w,y+h])
distances=[]
for i in range(len(xyminmax)):
try:
first_xmax = xyminmax[i][2]
second_xmin = xyminmax[i + 1][0]
distance=abs(second_xmin-first_xmax)
distances.append(distance)
except IndexError:
pass
THRESHOLD=stats.mode(distances, axis=None)[0][0]
new_rects=[]
for i in range(len(xyminmax)):
try:
# [xmin,ymin,xmax,ymax]
first_ymin=xyminmax[i][1]
first_ymax=xyminmax[i][3]
second_ymin=xyminmax[i+1][1]
second_ymax=xyminmax[i+1][3]
first_xmax = xyminmax[i][2]
second_xmin = xyminmax[i+1][0]
firstheight=abs(first_ymax-first_ymin)
secondheight=abs(second_ymax-second_ymin)
distance=abs(second_xmin-first_xmax)
if distance<THRESHOLD:
new_xmin=xyminmax[i][0]
new_xmax=xyminmax[i+1][2]
if first_ymin>second_ymin:
new_ymin=second_ymin
else:
new_ymin = first_ymin
if firstheight>secondheight:
new_ymax = first_ymax
else:
new_ymax = second_ymax
new_rects.append([new_xmin,new_ymin,new_xmax,new_ymax])
else:
new_rects.append(xyminmax[i])
except IndexError:
pass
for rect in new_rects:
cv2.rectangle(image, (rect[0], rect[1]), (rect[2], rect[3]), (121, 11, 189), 2)
cv2.imwrite("result.png",image)
which produces this image as a result:
I want to join very close or overlapping bounding boxes such as these
into a single bounding box so the formula doesn't get separated into single characters. I have tried using cv2.groupRectangles but the print results were just NULL.

So, here comes my solution. I partially modified your (initial) code to my preferred naming, etc. Also, I commented all the stuff, I added.
import cv2
import numpy as np
image = cv2.imread('images/example.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
kernel = np.ones((5, 5), np.uint8)
img_dilated = cv2.dilate(thresh, kernel, iterations = 1)
cnts, _ = cv2.findContours(img_dilated.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Array of initial bounding rects
rects = []
# Bool array indicating which initial bounding rect has
# already been used
rectsUsed = []
# Just initialize bounding rects and set all bools to false
for cnt in cnts:
rects.append(cv2.boundingRect(cnt))
rectsUsed.append(False)
# Sort bounding rects by x coordinate
def getXFromRect(item):
return item[0]
rects.sort(key = getXFromRect)
# Array of accepted rects
acceptedRects = []
# Merge threshold for x coordinate distance
xThr = 5
# Iterate all initial bounding rects
for supIdx, supVal in enumerate(rects):
if (rectsUsed[supIdx] == False):
# Initialize current rect
currxMin = supVal[0]
currxMax = supVal[0] + supVal[2]
curryMin = supVal[1]
curryMax = supVal[1] + supVal[3]
# This bounding rect is used
rectsUsed[supIdx] = True
# Iterate all initial bounding rects
# starting from the next
for subIdx, subVal in enumerate(rects[(supIdx+1):], start = (supIdx+1)):
# Initialize merge candidate
candxMin = subVal[0]
candxMax = subVal[0] + subVal[2]
candyMin = subVal[1]
candyMax = subVal[1] + subVal[3]
# Check if x distance between current rect
# and merge candidate is small enough
if (candxMin <= currxMax + xThr):
# Reset coordinates of current rect
currxMax = candxMax
curryMin = min(curryMin, candyMin)
curryMax = max(curryMax, candyMax)
# Merge candidate (bounding rect) is used
rectsUsed[subIdx] = True
else:
break
# No more merge candidates possible, accept current rect
acceptedRects.append([currxMin, curryMin, currxMax - currxMin, curryMax - curryMin])
for rect in acceptedRects:
img = cv2.rectangle(image, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (121, 11, 189), 2)
cv2.imwrite("images/result.png", image)
For your example
I get the following output
Now, you have to find a proper threshold to meet your expectations. Maybe, there is even some more work to do, especially to get the whole formula, since the distances don't vary that much.
Disclaimer: I'm new to Python in general, and specially to the Python API of OpenCV (C++ for the win). Comments, improvements, highlighting Python no-gos are highly welcome!

Here is a slightly different approach, using the OpenCV Wrapper library.
import cv2
import opencv_wrapper as cvw
image = cv2.imread("example.png")
gray = cvw.bgr2gray(image)
thresh = cvw.threshold_otsu(gray, inverse=True)
# dilation
img_dilation = cvw.dilate(thresh, 5)
# Find contours
contours = cvw.find_external_contours(img_dilation)
# Map contours to bounding rectangles, using bounding_rect property
rects = map(lambda c: c.bounding_rect, contours)
# Sort rects by top-left x (rect.x == rect.tl.x)
sorted_rects = sorted(rects, key=lambda r: r.x)
# Distance threshold
dt = 5
# List of final, joined rectangles
final_rects = [sorted_rects[0]]
for rect in sorted_rects[1:]:
prev_rect = final_rects[-1]
# Shift rectangle `dt` back, to find out if they overlap
shifted_rect = cvw.Rect(rect.tl.x - dt, rect.tl.y, rect.width, rect.height)
intersection = cvw.rect_intersection(prev_rect, shifted_rect)
if intersection is not None:
# Join the two rectangles
min_y = min((prev_rect.tl.y, rect.tl.y))
max_y = max((prev_rect.bl.y, rect.bl.y))
max_x = max((prev_rect.br.x, rect.br.x))
width = max_x - prev_rect.tl.x
height = max_y - min_y
new_rect = cvw.Rect(prev_rect.tl.x, min_y, width, height)
# Add new rectangle to final list, making it the new prev_rect
# in the next iteration
final_rects[-1] = new_rect
else:
# If no intersection, add the box
final_rects.append(rect)
for rect in sorted_rects:
cvw.rectangle(image, rect, cvw.Color.MAGENTA, line_style=cvw.LineStyle.DASHED)
for rect in final_rects:
cvw.rectangle(image, rect, cvw.Color.GREEN, thickness=2)
cv2.imwrite("result.png", image)
And the result
The green boxes are the final result, while the magenta boxes are the original ones.
I used the same threshold as #HansHirse.
The equals sign still needs some work. Either a higher dilation kernel size or use the same technique vertically.
Disclosure: I am the author of OpenCV Wrapper.

Easy-to-read solution:
contours = get_contours(frame)
boxes = [cv2.boundingRect(c) for c in contours]
boxes = merge_boxes(boxes, x_val=40, y_val=20) # Where x_val and y_val are axis thresholds
def get_contours(frame): # Returns a list of contours
contours = cv2.findContours(frame, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
return contours
def merge_boxes(boxes, x_val, y_val):
size = len(boxes)
if size < 2:
return boxes
if size == 2:
if boxes_mergeable(boxes[0], boxes[1], x_val, y_val):
boxes[0] = union(boxes[0], boxes[1])
del boxes[1]
return boxes
boxes = sorted(boxes, key=lambda r: r[0])
i = size - 2
while i >= 0:
if boxes_mergeable(boxes[i], boxes[i + 1], x_val, y_val):
boxes[i] = union(boxes[i], boxes[i + 1])
del boxes[i + 1]
i -= 1
return boxes
def boxes_mergeable(box1, box2, x_val, y_val):
(x1, y1, w1, h1) = box1
(x2, y2, w2, h2) = box2
return max(x1, x2) - min(x1, x2) - minx_w(x1, w1, x2, w2) < x_val \
and max(y1, y2) - min(y1, y2) - miny_h(y1, h1, y2, h2) < y_val
def minx_w(x1, w1, x2, w2):
return w1 if x1 <= x2 else w2
def miny_h(y1, h1, y2, h2):
return h1 if y1 <= y2 else h2
def union(a, b):
x = min(a[0], b[0])
y = min(a[1], b[1])
w = max(a[0] + a[2], b[0] + b[2]) - x
h = max(a[1] + a[3], b[1] + b[3]) - y
return x, y, w, h

--> If you have bounding boxes and want to merge along both X and Y directions, use this snippet
--> Adjust x_pixel_value and y_pixel_value to your preferences
--> But for this, you need to have the bounding boxes
import cv2
img = cv2.imread(your image path)
x_pixel_value = 5
y_pixel_value = 6
bboxes_list = [] # your bounding boxes list
rects_used = []
for i in bboxes_list:
rects_used.append(False)
end_bboxes_list = []
for enum,i in enumerate(bboxes_list):
if rects_used[enum] == True:
continue
xmin = i[0]
xmax = i[2]
ymin = i[1]
ymax = i[3]
for enum1,j in enumerate(bboxes_list[(enum+1):], start = (enum+1)):
i_xmin = j[0]
i_xmax = j[2]
i_ymin = j[1]
i_ymax = j[3]
if rects_used[enum1] == False:
if abs(ymin - i_ymin) < x_pixel_value:
if abs(xmin-i_xmax) < y_pixel_value or abs(xmax-i_xmin) < y_pixel_value:
rects_used[enum1] = True
xmin = min(xmin,i_xmin)
xmax = max(xmax,i_xmax)
ymin = min(ymin,i_ymin)
ymax = max(ymax,i_ymax)
final_box = [xmin,ymin,xmax,ymax]
end_bboxes_list.append(final_box)
for i in end_bboxes_list:
cv2.rectangle(img,(i[0],i[1]),(i[2],i[3]), color = [0,255,0], thickness = 2)
cv2.imshow("Image",img)
cv2.waitKey(10000)
cv2.destroyAllWindows()

Related

Find bounding box contour with largest surface area excluding intersection areas

I have an array of bounding boxes from the object detection system.
They are in the format:
[[x,y], [x,y], [x,y], [x,y]]
I want to find the largest bounding box that is not intersecting with any other provided boxes nor is inside an excluded box.
I am using python, but response in any programming language is welcomed :)
Visual example
How I tried and failed to solve this problem.
Approach I.
Iterate over every point and find the min and max of x and y.
Then crop to a polygon using these coordinates.
The problem is that algorithm on an example image would remove the top part of the image but there is no need to because we 'missed' top left and right boxes.
Approach II.
Try to choose to crop only one side at a time, because usually in my dataset things to exclude are on one side. e.g. remove top 100px
So I calculated the min and max of x and y like before.
Then the calculated area of every possible cut - left, right, top, bottom and choose one with the smallest area.
This approach failed pretty quickly when there are boxes on two sides of picture like left and right
Consider a full recangle (initially the whole picture) and take away one excluded box. You will get 2x2x2x2=16 possible rectangular subdivisions, for example this one.
┌────────────────────────┐
│ │
│ │
├───────┬───────┬────────┤
│ │ exc │ │
│ │ lude │ │
│ ├───────┴────────┤
│ │ │
│ │ │
└───────┴────────────────┘
For each box in the subdivision, take away the next excluded box.
Do this N times, and take the biggest box of the final step.
Here's a potential solution to find the bounding box contour with the largest surface area. We have two requirements:
Largest bounding box is not intersecting with any other box
Largest bounding box is not inside another box
Essentially we can reword the two requirements to this:
Given C1 and C2, determine if C1 and C2 intersect
Given C1 and C2, check if there is a point from C1 in C2
To solve #1, we can create a contour_intersect function that uses a bitwise AND operation with np.logical_and() to detect intersection. The idea is to create two separate masks for each contour and then use the logical AND operation on them. Any points that have a positive value (1 or True) will be points of intersection. Essentially, if the entire array is False then there was no intersection between the contours. But if there is a single True, then the contours touched at some point and thus intersect.
For #2, we can create a function contour_inside and use cv2.pointPolygonTest() to determine if a point is inside, outside, or on the edge of a contour. The function returns +1, -1, or 0 to indicate if a point is inside, outside, or on the contour, respectively. We find the centroid of C1 and then check if that point is inside C2.
Here's an example to visualize the scenarios:
Input image with three contours. Nothing special here, the expected answer would be the contour with the largest area.
Answer:
Contour #0 is the largest
Next we add two additional contours. Contour #3 will represent the intersection scenario and contour #4 will represent the inside contour scenario.
Answer:
Contour #0 has failed test
Contour #1 has failed test
Contour #2 is the largest
To solve this problem, we find contours then sort using contour area from largest to smallest. Next, we compare this contour with all other contours and check the two cases. If either case fails, we dump the current contour and move onto the next largest contour. The first contour that passes both tests for all other contours is our largest bounding box contour. Normally, contour #0 would be our largest but it fails the intersection test. We then move onto contour #1 but this fails the inside test. Thus the last remaining contour that passes both tests is contour #2.
import cv2
import numpy as np
# Check if C1 and C2 intersect
def contour_intersect(original_image, contour1, contour2):
# Two separate contours trying to check intersection on
contours = [contour1, contour2]
# Create image filled with zeros the same size of original image
blank = np.zeros(original_image.shape[0:2])
# Copy each contour into its own image and fill it with '1'
image1 = cv2.drawContours(blank.copy(), contours, 0, 1)
image2 = cv2.drawContours(blank.copy(), contours, 1, 1)
# Use the logical AND operation on the two images
# Since the two images had bitwise and applied to it,
# there should be a '1' or 'True' where there was intersection
# and a '0' or 'False' where it didnt intersect
intersection = np.logical_and(image1, image2)
# Check if there was a '1' in the intersection
return intersection.any()
# Check if C1 is in C2
def contour_inside(contour1, contour2):
# Find centroid of C1
M = cv2.moments(contour1)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
inside = cv2.pointPolygonTest(contour2, (cx, cy), False)
if inside == 0 or inside == -1:
return False
elif inside == 1:
return True
# Load image, convert to grayscale, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours, sort by contour area from largest to smallest
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
sorted_cnts = sorted(cnts, key=lambda x: cv2.contourArea(x), reverse=True)
# "Intersection" and "inside" contours
# Add both contours to test
# --------------------------------
intersect_contour = np.array([[[230, 93]], [[230, 187]], [[326, 187]], [[326, 93]]])
sorted_cnts.append(intersect_contour)
cv2.drawContours(original, [intersect_contour], -1, (36,255,12), 3)
inside_contour = np.array([[[380, 32]], [[380, 229]], [[740, 229]], [[740, 32]]])
sorted_cnts.append(inside_contour)
cv2.drawContours(original, [inside_contour], -1, (36,255,12), 3)
# --------------------------------
# Find centroid for each contour and label contour number
for count, c in enumerate(sorted_cnts):
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
cv2.putText(original, str(count), (cx-5, cy+5), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (246,255,12), 3)
# Find largest bounding box contour
largest_contour_name = ""
largest_contour = ""
contours_length = len(sorted_cnts)
for i1 in range(contours_length):
found = True
for i2 in range(i1 + 1, contours_length):
c1 = sorted_cnts[i1]
c2 = sorted_cnts[i2]
# Test intersection and "inside" contour
if contour_intersect(original, c1, c2) or contour_inside(c1, c2):
print('Contour #{} has failed test'.format(i1))
found = False
continue
if found:
largest_contour_name = i1
largest_contour = sorted_cnts[i1]
break
print('Contour #{} is the largest'.format(largest_contour_name))
print(largest_contour)
# Display
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.imshow('original', original)
cv2.waitKey()
Note: The assumption is that you have an array of contours from cv2.findContours() with the format like this example:
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
sorted_cnts = sorted(cnts, key=lambda x: cv2.contourArea(x), reverse=True)
for c in sorted_cnts:
print(c)
print(type(c))
x,y,w,h = cv2.boundingRect(c)
print((x,y,w,h))
Output
[[[230 93]]
[[230 187]]
[[326 187]]
[[326 93]]]
<class 'numpy.ndarray'>
(230, 93, 97, 95)
Performance note: The intersection check function suffers on the performance side since it creates three copies of the input image to draw the contours and may be slower when it comes to execution time with a greater number of contours or a larger input image size. I'll leave this optimization step to you!
You can use the cv2.boundingRect() method to get the x, y, w, h of each bounding box, and with the x, y, w, h of each bounding box, you can use the condition x2 + w2 > x1 > x2 - w1 and y2 + h2 > y1 > y2 - h1 to check if any two bounding boxes intersect or are within each others:
import cv2
import numpy as np
def intersect(b1, b2):
x1, y1, w1, h1 = b1
x2, y2, w2, h2 = b2
return x2 + w2 > x1 > x2 - w1 and y2 + h2 > y1 > y2 - h1
# Here I am generating a random array of 10 boxes in the format [[x,y], [x,y], [x,y], [x,y]]
np.random.seed(55)
boxes = np.random.randint(10, 150, (10, 4, 2)) + np.random.randint(0, 300, (10, 1, 2))
bounds = [cv2.boundingRect(box) for box in boxes]
valids = [b1 for b1 in bounds if not any(intersect(b1, b2) for b2 in bounds if b1 != b2)]
if valids:
x, y, w, h = max(valids, key=lambda b: b[2] * b[3])
print(f"x: {x} y: {y} w: {w} h: {h}")
else:
print("All boxes intersect.")
Output:
x: 75 y: 251 w: 62 h: 115
For visualization:
import cv2
import numpy as np
def intersect(b1, b2):
x1, y1, w1, h1 = b1
x2, y2, w2, h2 = b2
return x2 + w2 > x1 > x2 - w1 and y2 + h2 > y1 > y2 - h1
np.random.seed(55)
boxes = np.random.randint(10, 150, (10, 4, 2)) + np.random.randint(0, 300, (10, 1, 2))
bounds = [cv2.boundingRect(box) for box in boxes]
valids = [b1 for b1 in bounds if not any(intersect(b1, b2) for b2 in bounds if b1 != b2)]
img = np.zeros((500, 500), "uint8")
for x, y, w, h in bounds:
cv2.rectangle(img, (x, y), (x + w, y + h), 255, 1)
if valids:
x, y, w, h = max(valids, key=lambda b: b[2] * b[3])
cv2.rectangle(img, (x, y), (x + w, y + h), 128, -1)
cv2.imshow("IMAGE", img)
cv2.waitKey(0)
Output:
Assumption: you want the largest box from your array that complies with your rules, and it is not the largest NEW bounding box that complies.
This is pseudo code, you still have to fill in blanks
int largestBoxIndex = -1;
int largestBoxArea = -1;
for (i=0; i<allBoxes[].length; i++)
{
box CurrentBox = allBoxes[i];
bool isComply = false;
for (j=0; j<allBoxes[].length; j++)
{
isComply = false;
if(i==j) break;
ComparedBox = allBoxes[j]
if (isIntersected(CurrentBox, ComparedBox)) break;
if (isInside(CurrentBox, ComparedBox)) break;
isComply = true;
}
if(isComply)
if(Area(allBoxes[i]) > largestBoxArea)
{
largestBoxArea = Area(allBoxes[i]):
largestBoxIndex =i;
}
}
if(largestBoxIndex != -1)
largestBoxIndex;//this is the largest box
A simple mathematical solution to the problem
Suppose you are given 5 rectangles as shown below:
rects = [[100, 100, 200, 200],
[200, 200, 200, 200],
[200, 500, 200, 200],
[350, 50, 150, 200],
[500, 400, 200, 300]]
Note that the format of these rectangles is: [x, y, width, height]
Where, (x, y) is the coordinate of the top left corner of the rectangle, and width & height are the width and height of the rectangle respectively. You will have to covert your coordinates in this format first.
3 out of these 5 are intersecting.
Now what we will do is iterate over these rectangles one by one, and for each rectangle, find the intersection of this rectangle with the other rectangles one by one. If any rectangle is found to be intersecting with any of the other rectangles, then we'll set the flag value for the two rectangles as 0. If a rectangle is found not to be intersecting with any other rectangle, then its flag value will be set to 1. (Default flag value is -1). Finally, we'll find the rectangle of the greatest area among the rectangles with flag value 1.
Let's see the code for finding the intersection area of the two rectangles:
# Rect : [x, y, w, h]
def Intersection(Rect1, Rect2):
x = max(Rect1[0], Rect2[0])
y = max(Rect1[1], Rect2[1])
w = min(Rect1[0] + Rect1[2], Rect2[0] + Rect2[2]) - x
h = min(Rect1[1] + Rect1[3], Rect2[1] + Rect2[3]) - y
if w < 0 or h < 0:
return None
return [x, y, w, h]
This function will return None if there is no intersecting area between these rectangles or it will return the coordinates of the intersection rectangle(Ignore this value for the current problem. This might be helpful in other problems).
Now, let's have a look at the algorithm.
n = len(rects)
# -1 : Not determined
# 0 : Intersects with some
# 1 : No intersection
flag = [-1]*n
for i in range(n):
if flag[i] == 0:
continue
isIntersecting = False
for j in range(n):
if i == j or flag[j] == 1:
continue
Int_Rect = Intersection(rects[i], rects[j])
if Int_Rect is not None:
isIntersecting = True
flag[j] = 0
flag[i] = 0
break
if isIntersecting == False:
flag[i] = 1
# Finding the maximum area rectangle without any intersection.
maxRect = None
maxArea = -1
for i in range(n):
if flag[i] == 1:
if rects[i][2] * rects[i][3] > maxArea:
maxRect = rects[i]
maxArea = rects[i][2] * rects[i][3]
print(maxRect)
Note: Add the "excluded areas" rectangle coordinates to the rects list and assign their flag value as 0 to avoid them from getting selected as the maximum area rectangle.
This solution does not involve any images so it will be the fastest algorithm unless it is optimized.
Find the biggest square in numpy array
Maybe this would help? If you know the size of the whole area you can calculate the biggest box within numpy array. If you set all your given boxes to 1 and your whole area to 0 you need to find the largest area that is unique and not 1.
Here's a O(n^2) solution. find_maxbox takes array of rectangles and convert them into Box objects and then compare each pair of boxes to eliminate invalid rectangles. This solution assumes that the rectangles' sides are parallel to X-Y axes.
class Box():
def __init__(self, coordinates):
self.coordinates = tuple(sorted(coordinates))
self.original = coordinates
self.height = abs(self.coordinates[0][1] - self.coordinates[3][1])
self.width = abs(self.coordinates[0][0] - self.coordinates[3][0])
self.excluded = False
def __eq__(self, b2):
return self.coordinates == b2.coordinates
def get_area(self):
return self.height * self.width
def bounding_box(self, b2):
maxX, maxY = map(max, zip(*self.coordinates, *b2.coordinates))
minX, minY = map(min, zip(*self.coordinates, *b2.coordinates))
return Box([(minX, minY), (maxX, minY), (minX, maxY), (maxX, maxY)])
def intersects(self, b2):
box = self.bounding_box(b2)
if box.height < self.height + b2.height and box.width < self.width + b2.width:
return True
else: return False
def encloses(self, b2):
return self == self.bounding_box(b2)
def exclude(self):
self.excluded = True
def is_excluded(self):
return self.excluded
def __str__(self):
return str(self.original)
def __repr__(self):
return str(self.original)
# Pass array of rectangles as argument.
def find_maxbox(boxes):
boxes = sorted(map(Box, boxes), key=Box.get_area, reverse=True)
_boxes = []
_boxes.append((boxes[0], boxes[0]))
for b1 in boxes[1:]:
b2, bb2 = _boxes[-1]
bbox = b1.bounding_box(bb2)
if not b1.intersects(bb2):
_boxes.append((b1, bbox))
continue
for (b2, bb2) in reversed(_boxes):
if not b1.intersects(bb2):
break
if b1.intersects(b2):
if b2.encloses(b1):
b1.exclude()
break
b1.exclude()
b2.exclude()
_boxes.append((b1, bbox))
for box in boxes:
if box.is_excluded():
continue
else: return box.original
return None
In other words:
rectangles that share points are excluded
of the remaining rectangles, take the largest
No need for contours, centroids, bounding boxes, masking or redrawing pixels!
As stated before, in the provided case, the rectangles coordinates contain duplicates. Here, we use a single class to store the outer limits of the rectangle. The Separating Axis theorem from this answer by #samgak is used in an intersects() method.
from __future__ import annotations # optional
from dataclasses import dataclass # optional ?
#dataclass
class Rectangle:
left: int
top: int
right: int
bottom: int
def __repr__(self):
"""String representation of the rectangle's coordinates."""
return f"⟔ {self.left},{self.top} ⟓ {self.right},{self.bottom}"
def intersects(self, other: Rectangle):
"""Whether this Rectangle shares points with another Rectangle."""
h = self.right < other.left or self.left > other.right
v = self.bottom < other.top or self.top > other.bottom
return not h or not v
def size(self):
"""An indicator of the Rectangle's size, equal to half the perimeter."""
return self.right - self.left + self.bottom - self.top
main = Rectangle(100, 100, 325, 325)
others = {
0: Rectangle(100, 100, 400, 400),
1: Rectangle(200, 200, 300, 300),
2: Rectangle(200, 300, 300, 500),
3: Rectangle(300, 300, 500, 500),
4: Rectangle(500, 500, 600, 600),
5: Rectangle(350, 350, 600, 600),
}
for i, r in others.items():
print(i, main.intersects(r), r.size())
Simply put, h is True if the other rectangle is completely to the left or to the right; v is True if it's at the top or the bottom. The intersects() method returns True if the rectangles share points (even so much as a corner).
Output:
0 True 600
1 True 200
2 True 300
3 True 400
4 False 500
5 False 200
It is then trivial to find the largest:
valid = {r.size():i for i, r in others.items() if not main.intersects(r)}
print('Largest:', valid[max(valid)], 'with size', max(valid))
Output:
Largest: 4 with size 500
This answer assumes left < right and top < bottom for all rectangles.
The following function turns the provided rectangle coordinates to the kind used by the Rectangle class above. This assumes that the order is [[l, t], [r, t], [r, b], [l, b]] (a path).
def trim(coordinates):
"""Remove redundant coordinates in a path describing a rectangle."""
return coordinates[0][0], coordinates[1][1], coordinates[2][0], coordinates[3][1]
Finally, we want to do this for all rectangles, not just a "main" one. We can simply have each rectangle be the main one in turns. Use itertools.combinations() on an iterable such as a list:
itertools.combinations(rectangles, 2)
This will ensure that we don't compare two rectangles more than one time.

Python OpenCV append matches' center x, y coordinates in tuples

I have this simple opencv template matching function written in Python.
image:
template:
def find(object, sensitivity):
screen = "tool.png"
screen_read = cv2.imread(screen)
screen_gray = cv2.cvtColor(screen_read, cv2.COLOR_BGR2GRAY)
obj = cv2.imread(object, cv2.IMREAD_GRAYSCALE)
w, h = obj.shape[::-1]
location = np.where(cv2.matchTemplate(screen_gray, obj, cv2.TM_CCORR_NORMED) >= sensitivity)
positions = []
for xy in zip(*location[::-1]):
cv2.rectangle(screen_read, xy, (xy[0] + w, xy[1] + h), (0, 0, 255), 1)
x = random(xy[0], (xy[0] + w) - 2)
y = random(xy[1], (xy[1] + h) - 2)
print(x, y)
positions.append(str(x) + ", " + str(y))
#cv2.imshow("Test", screen_read)
#cv2.waitKey(0)
find("enemylogo.png", 0.90)
It will find all the templates correctly, as shown here:
However, my goal here is to pass the center coordinate to be used in loop, outside the function. For this, I need to store the x, y coordinates in an array (positions), as tuples.
However, I'm not getting desired results, it's adding too many tuples instead of only 2.
What I'm trying to do is:
for x in find("enemylogo.png", 0.90):
click(x) #x would be the coordinate of every template matched.
Could someone help me, please?
The line location = np.where.... will give you a lot of matches, and many of them will be right next to each other. Another technique is to recursively use minMaxLoc. This function will only give you the best result. But if you overwrite the best match with zeros on the first pass through, the second pass will find another match.
import cv2
import numpy as np
def find_templates(obj, sensitivity):
image = cv2.imread('tool.png', cv2.IMREAD_COLOR )
template = cv2.imread(obj, cv2.IMREAD_COLOR)
h, w = template.shape[:2]
print('h', h, 'w', w)
method = cv2.TM_CCORR_NORMED
threshold = 0.90
res = cv2.matchTemplate(image, template, method)
res_h, res_w = res.shape[:2]
# fake out max_val for first run through loop
max_val = 1
centers = []
while max_val > sensitivity:
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
if max_val > sensitivity:
centers.append( (max_loc[0] + w//2, max_loc[1] + h//2) )
x1 = max(max_loc[0] - w//2, 0)
y1 = max(max_loc[1] - h//2, 0)
x2 = min(max_loc[0] + w//2, res_w)
y2 = min(max_loc[1] + h//2, res_h)
res[y1:y2, x1:x2] = 0
image = cv2.rectangle(image,(max_loc[0],max_loc[1]), (max_loc[0]+w+1, max_loc[1]+h+1), (0,255,0) )
print(centers)
cv2.imwrite('output.png', image)
find_templates("enemy_logo.png", 0.90)
which gives
[(52, 52), (169, 52)]

Extracting data from tables without any grid lines and border from scanned image of document

Extracting table data from digital PDFs have been simple using camelot and tabula. However, the solution doesn't work with scanned images of the document pages specifically when the table doesn't have borders and inner grids. I have been trying to generate vertical and horizontal lines using OpenCV. However, since the scanned images will have slight rotation angles, it is difficult to proceed with the approach.
How can we utilize OpenCV to generate grids (horizontal and vertical lines) and borders for the scanned document page which contains table data (along with paragraphs of text)? If this is feasible, how to nullify the rotation angle of the scanned image?
I wrote some code to estimate the horizontal lines from the printed letters in the page. The same could be done for vertical ones I guess. The code below follows some general assumptions, here
some basic steps in pseudo code style:
prepare picture for contour detection
do contour detection
we assume most contours are letters
calc mean width of all contours
calc mean area of contours
filter all contours with two conditions:
a) contour (letter) heigths < meanHigh * 2
b) contour area > 4/5 meanArea
calc center point of all remaining contours
assume we have line regions (bins)
list all center point which are inside the region
do linear regression of region points
save slope and intercept
calc mean slope and intercept
here the full code:
import cv2
import numpy as np
from scipy import stats
def resizeImageByPercentage(img,scalePercent = 60):
width = int(img.shape[1] * scalePercent / 100)
height = int(img.shape[0] * scalePercent / 100)
dim = (width, height)
# resize image
return cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
def calcAverageContourWithAndHeigh(contourList):
hs = list()
ws = list()
for cnt in contourList:
(x, y, w, h) = cv2.boundingRect(cnt)
ws.append(w)
hs.append(h)
return np.mean(ws),np.mean(hs)
def calcAverageContourArea(contourList):
areaList = list()
for cnt in contourList:
a = cv2.minAreaRect(cnt)
areaList.append(a[2])
return np.mean(areaList)
def calcCentroid(contour):
houghMoments = cv2.moments(contour)
# calculate x,y coordinate of centroid
if houghMoments["m00"] != 0: #case no contour could be calculated
cX = int(houghMoments["m10"] / houghMoments["m00"])
cY = int(houghMoments["m01"] / houghMoments["m00"])
else:
# set values as what you need in the situation
cX, cY = -1, -1
return cX,cY
def getCentroidWhenSizeInRange(contourList,letterSizeWidth,letterSizeHigh,deltaOffset,minLetterArea=10.0):
centroidList=list()
for cnt in contourList:
(x, y, w, h) = cv2.boundingRect(cnt)
area = cv2.minAreaRect(cnt)
#calc diff
diffW = abs(w-letterSizeWidth)
diffH = abs(h-letterSizeHigh)
#thresold A: almost smaller than mean letter size +- offset
#when almost letterSize
if diffW < deltaOffset and diffH < deltaOffset:
#threshold B > min area
if area[2] > minLetterArea:
cX,cY = calcCentroid(cnt)
if cX!=-1 and cY!=-1:
centroidList.append((cX,cY))
return centroidList
DEBUGMODE = True
#read image, do git clone https://github.com/WZBSocialScienceCenter/pdftabextract.git for the example
img = cv2.imread('pdftabextract/examples/catalogue_30s/data/ALA1934_RR-excerpt.pdf-2_1.png')
#get some basic infos
imgHeigh, imgWidth, imgChannelAmount = img.shape
if DEBUGMODE:
cv2.imwrite("img00original.jpg",resizeImageByPercentage(img,30))
cv2.imshow("original",img)
# prepare img
imgGrey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# apply Gaussian filter
imgGaussianBlur = cv2.GaussianBlur(imgGrey,(5,5),0)
#make binary img, black or white
_, imgBinThres = cv2.threshold(imgGaussianBlur, 130, 255, cv2.THRESH_BINARY)
## detect contours
contours, _ = cv2.findContours(imgBinThres, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#we get some letter parameter
averageLetterWidth, averageLetterHigh = calcAverageContourWithAndHeigh(contours)
threshold1AllowedLetterSizeOffset = averageLetterHigh * 2 # double size
averageContourAreaSizeOfMinRect = calcAverageContourArea(contours)
threshHold2MinArea = 4 * averageContourAreaSizeOfMinRect / 5 # 4/5 * mean
print("mean letter Width: ", averageLetterWidth)
print("mean letter High: ", averageLetterHigh)
print("threshold 1 tolerance: ", threshold1AllowedLetterSizeOffset)
print("mean letter area ", averageContourAreaSizeOfMinRect)
print("thresold 2 min letter area ", threshHold2MinArea)
#we get all centroid of letter sizes contours, the other we ignore
centroidList = getCentroidWhenSizeInRange(contours,averageLetterWidth,averageLetterHigh,threshold1AllowedLetterSizeOffset,threshHold2MinArea)
if DEBUGMODE:
#debug print all centers:
imgFilteredCenter = img.copy()
for cX,cY in centroidList:
#draw in red color as BGR
cv2.circle(imgFilteredCenter, (cX, cY), 5, (0, 0, 255), -1)
cv2.imwrite("img01letterCenters.jpg",resizeImageByPercentage(imgFilteredCenter,30))
cv2.imshow("letterCenters",imgFilteredCenter)
#we estimate a bin widths
amountPixelFreeSpace = averageLetterHigh #TODO get better estimate out of histogram
estimatedBinWidth = round( averageLetterHigh + amountPixelFreeSpace) #TODO round better ?
binCollection = dict() #range(0,imgHeigh,estimatedBinWidth)
#we do seperate the center points into bins by y coordinate
for i in range(0,imgHeigh,estimatedBinWidth):
listCenterPointsInBin = list()
yMin = i
yMax = i + estimatedBinWidth
for cX,cY in centroidList:
if yMin < cY < yMax:#if fits in bin
listCenterPointsInBin.append((cX,cY))
binCollection[i] = listCenterPointsInBin
#we assume all point are in one line ?
#model = slope (x) + intercept
#model = m (x) + n
mList = list() #slope abs in img
nList = list() #intercept abs in img
nListRelative = list() #intercept relative to bin start
minAmountRegressionElements = 12 #is also alias for letter amount we expect
#we do regression for every point in the bin
for startYOfBin, values in binCollection.items():
#we reform values
xValues = [] #TODO use more short transform
yValues = []
for x,y in values:
xValues.append(x)
yValues.append(y)
#we assume a min limit of point in bin
if len(xValues) >= minAmountRegressionElements :
slope, intercept, r, p, std_err = stats.linregress(xValues, yValues)
mList.append(slope)
nList.append(intercept)
#we calc the relative intercept
nRelativeToBinStart = intercept - startYOfBin
nListRelative.append(nRelativeToBinStart)
if DEBUGMODE:
#we debug print all lines in one picute
imgLines = img.copy()
colorOfLine = (0, 255, 0) #green
for i in range(0,len(mList)):
slope = mList[i]
intercept = nList[i]
startPoint = (0, int( intercept)) #better round ?
endPointY = int( (slope * imgWidth + intercept) )
if endPointY < 0:
endPointY = 0
endPoint = (imgHeigh,endPointY)
cv2.line(imgLines, startPoint, endPoint, colorOfLine, 2)
cv2.imwrite("img02lines.jpg",resizeImageByPercentage(imgLines,30))
cv2.imshow("linesOfLetters ",imgLines)
#we assume in mean we got it right
meanIntercept = np.mean(nListRelative)
meanSlope = np.mean(mList)
print("meanIntercept :", meanIntercept)
print("meanSlope ", meanSlope)
#TODO calc angle with math.atan(slope) ...
if DEBUGMODE:
cv2.waitKey(0)
original:
center point of letters:
lines:
I had the same problem some time ago and this tutorial is the solution to that. It explains using pdftabextract which is a Python library by Markus Konrad and leverages OpenCV’s Hough transform to detect the lines and works even if the scanned document is a bit tilted. The tutorial walks your through parsing a 1920s German newspaper

Detect multiple rectangles in image

I am trying to detect the count of pipes in this picture. For this, I'm using OpenCV and Python-based detection. Based, on existing answers to similar questions, I was able to come up with the following steps
Open the image
Filter it
Apply Edge Detection
Use Contours
Check for the count
The total count of pipes is ~909 when we count it manually give or take 4.
After applying the filter
import cv2
import matplotlib.pyplot as plt
import numpy as np
img = cv2.imread('images/input-rectpipe-1.jpg')
blur_hor = cv2.filter2D(img[:, :, 0], cv2.CV_32F, kernel=np.ones((11,1,1), np.float32)/11.0, borderType=cv2.BORDER_CONSTANT)
blur_vert = cv2.filter2D(img[:, :, 0], cv2.CV_32F, kernel=np.ones((1,11,1), np.float32)/11.0, borderType=cv2.BORDER_CONSTANT)
mask = ((img[:,:,0]>blur_hor*1.2) | (img[:,:,0]>blur_vert*1.2)).astype(np.uint8)*255
I get this masked image
This looks fairly accurate in terms of the number of visible rectangles it shows. However, when I try to take the count and plot the bounding box on top of the picture, it picks a lot of unwanted regions as well. For circles, HoughCircles has a way of defining the max and min radius. Is there something similar for rectangles that can improve accuracy. Also, I'm open to suggestions for alternative approaches to this problem.
ret,thresh = cv2.threshold(mask,127,255,0)
contours,hierarchy = cv2.findContours(thresh, 1, 2)
count = 0
for i in range(len(contours)):
count = count+1
x,y,w,h = cv2.boundingRect(contours[i])
rect = cv2.minAreaRect(contours[i])
area = cv2.contourArea(contours[i])
box = cv2.boxPoints(rect)
ratio = w/h
M = cv2.moments(contours[i])
if M["m00"] == 0.0:
cX = int(M["m10"] / 1 )
cY = int(M["m01"] / 1 )
if M["m00"] != 0.0:
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
if (area > 50 and area < 220 and hierarchy[0][i][2] < 0 and (ratio > .5 and ratio < 2)):
#cv2.rectangle(img, (x,y), (x+w,y+h), (0,255,0), 2)
cv2.circle(img, (cX, cY), 1, (255, 255, 255), -1)
count = count + 1
print(count)
cv2.imshow("m",mask)
cv2.imshow("f",img)
cv2.waitKey(0)
UPDATE
Based on the second answer I have converted the c++ code to python code and got closer results but still missing out on a few obvious rectangles.
Of course you could filter them by their area. I took your binary image and continued the work as below:
1- Do a loop on all the contours you found from findContours
2- In the loop check if each contour, is an internal contour or not
3- From those which are internal contours, check their area and if the area is in the acceptable range, check the width/height ratio of each contour and finally if it is good too, count that contour as a pipe.
I did the above method on your binary image, and found 794 pipes:
(Some boxes are lost though, You should change the parameters of the edge detector to get more separable boxes in the image.)
and here is the code (It's c++ but easily convertible to python):
Mat img__1, img__2,img__ = imread("E:/R.jpg", 0);
threshold(img__, img__1, 128, 255, THRESH_BINARY);
vector<vector<Point>> contours;
vector< Vec4i > hierarchy;
findContours(img__1, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_NONE);
Mat tmp = Mat::zeros(img__1.size(), CV_8U);
int k = 0;
for (size_t i = 0; i < contours.size(); i++)
{
double area = contourArea(contours[i]);
Rect rec = boundingRect(contours[i]);
float ratio = rec.width / float(rec.height);
if (area > 50 && area < 220 && hierarchy[i][2]<0 && (ratio > .5 && ratio < 2) ) # hierarchy[i][2]<0 stands for internal contours
{
k++;
drawContours(tmp, contours, i, Scalar(255, 255, 255), -1);
}
}
cout << "k= " << k << "\n";
imshow("1", img__1);
imshow("2", tmp);
waitKey(0);
There are many methods to solve this problem but i doubt there will be a single method without some kind of ad-hod measures. Here is another attempt to this problem.
Instead of using the edge information, i suggest a LBP(local binary pattern)-like filter that compares the surrounding pixel with the center value. If a certain percentage of surrounding pixel is larger than the center pixel, the center pixel will be labeled 255. if the condition is not met, then the center pixel will be labeled 0.
This intensity based method is run on the assumption that the pipe center is always darker than the pipe edges. Since it is comparing intensity,it should work well as long as some contrast remains.
Through this process, you will obtain an image with binary blobs for every pipe and some noises. You will have to remove them with some pre-known condition such as, size, shape, fill_ratio, color and etc. The condition can be found in the given code.
import cv2
import matplotlib.pyplot as plt
import numpy as np
# Morphological function sets
def morph_operation(matinput):
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))
morph = cv2.erode(matinput,kernel,iterations=1)
morph = cv2.dilate(morph,kernel,iterations=2)
morph = cv2.erode(matinput,kernel,iterations=1)
morph = cv2.dilate(morph,kernel,iterations=1)
return morph
# Analyze blobs
def analyze_blob(matblobs,display_frame):
_,blobs,_ = cv2.findContours(matblobs,cv2.RETR_LIST ,cv2.CHAIN_APPROX_SIMPLE)
valid_blobs = []
for i,blob in enumerate(blobs):
rot_rect = cv2.minAreaRect(blob)
b_rect = cv2.boundingRect(blob)
(cx,cy),(sw,sh),angle = rot_rect
rx,ry,rw,rh = b_rect
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# Draw the segmented Box region
frame = cv2.drawContours(display_frame,[box],0,(0,0,255),1)
on_count = cv2.contourArea(blob)
total_count = sw*sh
if total_count <= 0:
continue
if sh > sw :
temp = sw
sw = sh
sh = temp
# minimum area
if sw * sh < 20:
continue
# maximum area
if sw * sh > 100:
continue
# ratio of box
rect_ratio = sw / sh
if rect_ratio <= 1 or rect_ratio >= 3.5:
continue
# ratio of fill
fill_ratio = on_count / total_count
if fill_ratio < 0.4 :
continue
# remove blob that is too bright
if display_frame[int(cy),int(cx),0] > 75:
continue
valid_blobs.append(blob)
if valid_blobs:
print("Number of Blobs : " ,len(valid_blobs))
cv2.imshow("display_frame_in",display_frame)
return valid_blobs
def lbp_like_method(matinput,radius,stren,off):
height, width = np.shape(matinput)
roi_radius = radius
peri = roi_radius * 8
matdst = np.zeros_like(matinput)
for y in range(height):
y_ = y - roi_radius
_y = y + roi_radius
if y_ < 0 or _y >= height:
continue
for x in range(width):
x_ = x - roi_radius
_x = x + roi_radius
if x_ < 0 or _x >= width:
continue
r1 = matinput[y_:_y,x_]
r2 = matinput[y_:_y,_x]
r3 = matinput[y_,x_:_x]
r4 = matinput[_y,x_:_x]
center = matinput[y,x]
valid_cell_1 = len(r1[r1 > center + off])
valid_cell_2 = len(r2[r2 > center + off])
valid_cell_3 = len(r3[r3 > center + off])
valid_cell_4 = len(r4[r4 > center + off])
total = valid_cell_1 + valid_cell_2 + valid_cell_3 + valid_cell_4
if total > stren * peri:
matdst[y,x] = 255
return matdst
def main_process():
img = cv2.imread('image.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Blured to remove noise
blurred = cv2.GaussianBlur(gray,(3,3),-1)
# Parameter tuning
winsize = 5
peri = 0.6
off = 4
matlbp = lbp_like_method(gray,winsize,peri,off)
cv2.imshow("matlbp",matlbp)
cv2.waitKey(1)
matmorph = morph_operation(matlbp)
cv2.imshow("matmorph",matmorph)
cv2.waitKey(1)
display_color = cv2.cvtColor(gray,cv2.COLOR_GRAY2BGR)
valid_blobs = analyze_blob(matmorph,display_color)
for b in range(len(valid_blobs)):
cv2.drawContours(display_color,valid_blobs,b,(0,255,255),-1)
cv2.imshow("display_color",display_color)
cv2.waitKey(0)
if __name__ == '__main__':
main_process()
Result from the LBP-like processing
After cleaning with morphological process
Final result with the red boxes showing all the blob candidates and the yellow segments showing blobs that pass all the condition we set. There are some false alarms below and on top of the pipe bundle but they can be omitted with some boundary conditions.
Total pipe found : 943

Retrieving information from a Mask_RCNN Tensor

I've succesfully trained a Mask_RCNN, and for illustration purposes, let's focus on this sample image the network generates:
It's all very good, no problem. What I'd like to achieve however is to have the following variables with their values per instance:
mask: (as an image which shows the detected object only, like a binary map)
box: (as a list)
mask_border_positions (x,y) : (as a list)
mask_center_position (x,y) : (as a tuple)
I've also the function which visualizes the above image, from the official site:
def display_instances(image, boxes, masks, class_ids, class_names,
scores=None, title="",
figsize=(16, 16), ax=None,
show_mask=True, show_bbox=True,
colors=None, captions=None):
"""
boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
masks: [height, width, num_instances]
class_ids: [num_instances]
class_names: list of class names of the dataset
scores: (optional) confidence scores for each box
title: (optional) Figure title
show_mask, show_bbox: To show masks and bounding boxes or not
figsize: (optional) the size of the image
colors: (optional) An array or colors to use with each object
captions: (optional) A list of strings to use as captions for each object
"""
# Number of instances
N = boxes.shape[0]
if not N:
print("\n*** No instances to display *** \n")
else:
assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
# If no axis is passed, create one and automatically call show()
auto_show = False
if not ax:
_, ax = plt.subplots(1, figsize=figsize)
auto_show = True
# Generate random colors
colors = colors or random_colors(N)
# Show area outside image boundaries.
height, width = image.shape[:2]
ax.set_ylim(height + 10, -10)
ax.set_xlim(-10, width + 10)
ax.axis('off')
ax.set_title(title)
masked_image = image.astype(np.uint32).copy()
for i in range(N):
color = colors[i]
# Bounding box
if not np.any(boxes[i]):
# Skip this instance. Has no bbox. Likely lost in image cropping.
continue
y1, x1, y2, x2 = boxes[i]
if show_bbox:
p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
alpha=0.7, linestyle="dashed",
edgecolor=color, facecolor='none')
ax.add_patch(p)
# Label
if not captions:
class_id = class_ids[i]
score = scores[i] if scores is not None else None
label = class_names[class_id]
x = random.randint(x1, (x1 + x2) // 2)
caption = "{} {:.3f}".format(label, score) if score else label
else:
caption = captions[i]
ax.text(x1, y1 + 8, caption,
color='w', size=11, backgroundcolor="none")
# Mask
mask = masks[:, :, i]
if show_mask:
masked_image = apply_mask(masked_image, mask, color)
# Mask Polygon
# Pad to ensure proper polygons for masks that touch image edges.
padded_mask = np.zeros(
(mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
padded_mask[1:-1, 1:-1] = mask
contours = find_contours(padded_mask, 0.5)
for verts in contours:
# Subtract the padding and flip (y, x) to (x, y)
verts = np.fliplr(verts) - 1
p = Polygon(verts, facecolor="none", edgecolor=color)
ax.add_patch(p)
ax.imshow(masked_image.astype(np.uint8))
if auto_show:
plt.show()
These code snippets below are then called in the main as follows:
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
masks_prediction = np.zeros((510, 510, len(file_names)))
for i in range(len(file_names)):
print(i)
image = skimage.io.imread(file_names[i])
predictions = model.detect([image], verbose=1)
p = predictions[0]
masks = p['masks']
merged_mask = np.zeros((masks.shape[0], masks.shape[1]))
for j in range(masks.shape[2]):
merged_mask[masks[:,:,j]==True] = True
masks_prediction[:,:,i] = merged_mask
print(masks_prediction.shape)
and:
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
class_names = ['BG', 'car', 'traffic_light', 'person']
test_image = skimage.io.imread(file_names[random.randint(0,len(file_names)-1)])
predictions = model.detect([test_image], verbose=1) # We are replicating the same image to fill up the batch_size
p = predictions[0]
visualize.display_instances(test_image, p['rois'], p['masks'], p['class_ids'],
class_names, p['scores'])
I know it's probably a trivial question and they already exist in the code somewhere, but since I am a starter, I could not get the mask outliers or their centers. If there is a way to have these information per instance, it would be great.
Thanks in advance.
The following does it right:
masks = p['masks']
class_ids = p['class_ids']
rois = p['rois']
scores = p['scores']
bounding_box = rois[enumerator]
as for the outline coordinates:
def getBoundaryPositions(im):
class_ids = p['class_ids'] # for usage convenience
im = im.astype(np.uint8)
# Find contours:
(im, contours, hierarchy) = cv2.findContours(im, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
cnts = contours[0]
outline_posesXY = np.array([np.append(x[0]) for x in cnts])
# Calculate image moments of the detected contour
M = cv2.moments(contours[0])
# collect pose points (for now only position because we don't have pose) of the center
positionXY = []
positionXY.append(round(M['m10'] / M['m00']))
positionXY.append(round(M['m01'] / M['m00']))
return (im, positionXY, outline_posesXY)

Categories

Resources