Retrieving information from a Mask_RCNN Tensor

Retrieving information from a Mask_RCNN Tensor - python

I've succesfully trained a Mask_RCNN, and for illustration purposes, let's focus on this sample image the network generates:
It's all very good, no problem. What I'd like to achieve however is to have the following variables with their values per instance:
mask: (as an image which shows the detected object only, like a binary map)
box: (as a list)
mask_border_positions (x,y) : (as a list)
mask_center_position (x,y) : (as a tuple)
I've also the function which visualizes the above image, from the official site:
def display_instances(image, boxes, masks, class_ids, class_names,
scores=None, title="",
figsize=(16, 16), ax=None,
show_mask=True, show_bbox=True,
colors=None, captions=None):
"""
boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates.
masks: [height, width, num_instances]
class_ids: [num_instances]
class_names: list of class names of the dataset
scores: (optional) confidence scores for each box
title: (optional) Figure title
show_mask, show_bbox: To show masks and bounding boxes or not
figsize: (optional) the size of the image
colors: (optional) An array or colors to use with each object
captions: (optional) A list of strings to use as captions for each object
"""
# Number of instances
N = boxes.shape[0]
if not N:
print("\n*** No instances to display *** \n")
else:
assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
# If no axis is passed, create one and automatically call show()
auto_show = False
if not ax:
_, ax = plt.subplots(1, figsize=figsize)
auto_show = True
# Generate random colors
colors = colors or random_colors(N)
# Show area outside image boundaries.
height, width = image.shape[:2]
ax.set_ylim(height + 10, -10)
ax.set_xlim(-10, width + 10)
ax.axis('off')
ax.set_title(title)
masked_image = image.astype(np.uint32).copy()
for i in range(N):
color = colors[i]
# Bounding box
if not np.any(boxes[i]):
# Skip this instance. Has no bbox. Likely lost in image cropping.
continue
y1, x1, y2, x2 = boxes[i]
if show_bbox:
p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
alpha=0.7, linestyle="dashed",
edgecolor=color, facecolor='none')
ax.add_patch(p)
# Label
if not captions:
class_id = class_ids[i]
score = scores[i] if scores is not None else None
label = class_names[class_id]
x = random.randint(x1, (x1 + x2) // 2)
caption = "{} {:.3f}".format(label, score) if score else label
else:
caption = captions[i]
ax.text(x1, y1 + 8, caption,
color='w', size=11, backgroundcolor="none")
# Mask
mask = masks[:, :, i]
if show_mask:
masked_image = apply_mask(masked_image, mask, color)
# Mask Polygon
# Pad to ensure proper polygons for masks that touch image edges.
padded_mask = np.zeros(
(mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
padded_mask[1:-1, 1:-1] = mask
contours = find_contours(padded_mask, 0.5)
for verts in contours:
# Subtract the padding and flip (y, x) to (x, y)
verts = np.fliplr(verts) - 1
p = Polygon(verts, facecolor="none", edgecolor=color)
ax.add_patch(p)
ax.imshow(masked_image.astype(np.uint8))
if auto_show:
plt.show()
These code snippets below are then called in the main as follows:
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
masks_prediction = np.zeros((510, 510, len(file_names)))
for i in range(len(file_names)):
print(i)
image = skimage.io.imread(file_names[i])
predictions = model.detect([image], verbose=1)
p = predictions[0]
masks = p['masks']
merged_mask = np.zeros((masks.shape[0], masks.shape[1]))
for j in range(masks.shape[2]):
merged_mask[masks[:,:,j]==True] = True
masks_prediction[:,:,i] = merged_mask
print(masks_prediction.shape)
and:
file_names = glob(os.path.join(IMAGE_DIR, "*.jpg"))
class_names = ['BG', 'car', 'traffic_light', 'person']
test_image = skimage.io.imread(file_names[random.randint(0,len(file_names)-1)])
predictions = model.detect([test_image], verbose=1) # We are replicating the same image to fill up the batch_size
p = predictions[0]
visualize.display_instances(test_image, p['rois'], p['masks'], p['class_ids'],
class_names, p['scores'])
I know it's probably a trivial question and they already exist in the code somewhere, but since I am a starter, I could not get the mask outliers or their centers. If there is a way to have these information per instance, it would be great.
Thanks in advance.

The following does it right:
masks = p['masks']
class_ids = p['class_ids']
rois = p['rois']
scores = p['scores']
bounding_box = rois[enumerator]
as for the outline coordinates:
def getBoundaryPositions(im):
class_ids = p['class_ids'] # for usage convenience
im = im.astype(np.uint8)
# Find contours:
(im, contours, hierarchy) = cv2.findContours(im, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
cnts = contours[0]
outline_posesXY = np.array([np.append(x[0]) for x in cnts])
# Calculate image moments of the detected contour
M = cv2.moments(contours[0])
# collect pose points (for now only position because we don't have pose) of the center
positionXY = []
positionXY.append(round(M['m10'] / M['m00']))
positionXY.append(round(M['m01'] / M['m00']))
return (im, positionXY, outline_posesXY)

Related

Counting drop on an image

I have image of drops and I want to calculate the number of it.
Here is the original image :
And Here after threshold application :
i tried a lot of fonction on OpenCV and it's never right.
Do you have any ideas on how to do ?
Thanks
The best I got, was by using :
(img_morph is my binairized image)
rbc_bw = label(img_morph)
rbc_props = regionprops(rbc_bw)
fig, ax = plt.subplots(figsize=(18, 8))
ax.imshow(img_morph)
rbc_count = 0
for i, prop in enumerate(filter(lambda x: x.area > 250, rbc_props)):
y1, x1, y2, x2 = (prop.bbox[0], prop.bbox[1],
prop.bbox[2], prop.bbox[3])
width = x2 - x1
height = y2 - y1
r = plt.Rectangle((x1, y1), width = width, height=height,
color='b', fill=False)
ax.add_patch(r)
rbc_count += 1
print('Red Blood Cell Count:', rbc_count)
plt.show()
And all my circles are detected here but also the gap in between.
A more difficult image :

Core idea: matchTemplate.
Approach:
pick a template manually from the picture
histogram equalization for badly lit inputs (or always)
matchTemplate with suitable matching mode
also using copyMakeBorder to catch instances clipping the border
thresholding and non-maximum suppression
I'll skip the boring parts and use the first example input.
Manually picked template:
scores = cv.matchTemplate(haystack, template, cv.TM_CCOEFF_NORMED)
Thresholding and NMS:
levelmask = (scores >= 0.3)
localmax = cv.dilate(scores, None, iterations=26)
localmax = (scores == localmax)
candidates = levelmask & localmax
(nlabels, labels, stats, centroids) = cv.connectedComponentsWithStats(candidates.astype(np.uint8), connectivity=8)
print(nlabels-1, "found") # background counted too
# and then draw a circle for each centroid except label 0
And that finds 766 instances. I see a few false negatives (missed) and saw a false positive too once, but that looks like less than 1%.

How to stitch the two images more accurately?

I'm doing a "Circle View"(Bird View) system for a long truck. Due to the fact that the car is long on the sides of one camera is not enough. I decided to try to put two cameras and sew them, but there is a drawback. the code that I use stitches the image is not quite even and the joint is visible. How can I change the code to make the joint was less visible and sewn better? A chessboard with a size of 4x6. It stands in the middle of two video cameras. Maybe there's a way to stitch on a checkerboard?
Here's my stitching result:
Here are two images to be stitched together:
1 image:
2 image:
The code I have now:
import cv2 as cv
import numpy as np
def FindHomography(Matches, BaseImage_kp, SecImage_kp):
# If less than 4 matches found, exit the code.
if len(Matches) < 4:
print("\nNot enough matches found between the images.\n")
exit(0)
# Storing coordinates of points corresponding to the matches found in both the images
BaseImage_pts = []
SecImage_pts = []
for Match in Matches:
BaseImage_pts.append(BaseImage_kp[Match[0].queryIdx].pt)
SecImage_pts.append(SecImage_kp[Match[0].trainIdx].pt)
# Changing the datatype to "float32" for finding homography
BaseImage_pts = np.float32(BaseImage_pts)
SecImage_pts = np.float32(SecImage_pts)
# Finding the homography matrix(transformation matrix).
(HomographyMatrix, Status) = cv.findHomography(SecImage_pts, BaseImage_pts, cv.RANSAC, 4.0)
return HomographyMatrix, Status
def GetNewFrameSizeAndMatrix(HomographyMatrix, Sec_ImageShape, Base_ImageShape):
# Reading the size of the image
(Height, Width) = Sec_ImageShape
# Taking the matrix of initial coordinates of the corners of the secondary image
# Stored in the following format: [[x1, x2, x3, x4], [y1, y2, y3, y4], [1, 1, 1, 1]]
# Where (xi, yi) is the coordinate of the i th corner of the image.
InitialMatrix = np.array([[0, Width - 1, Width - 1, 0],
[0, 0, Height - 1, Height - 1],
[1, 1, 1, 1]])
# Finding the final coordinates of the corners of the image after transformation.
# NOTE: Here, the coordinates of the corners of the frame may go out of the
# frame(negative values). We will correct this afterwards by updating the
# homography matrix accordingly.
FinalMatrix = np.dot(HomographyMatrix, InitialMatrix)
[x, y, c] = FinalMatrix
x = np.divide(x, c)
y = np.divide(y, c)
# Finding the dimentions of the stitched image frame and the "Correction" factor
min_x, max_x = int(round(min(x))), int(round(max(x)))
min_y, max_y = int(round(min(y))), int(round(max(y)))
New_Width = max_x
New_Height = max_y
Correction = [0, 0]
if min_x < 0:
New_Width -= min_x
Correction[0] = abs(min_x)
if min_y < 0:
New_Height -= min_y
Correction[1] = abs(min_y)
# Again correcting New_Width and New_Height
# Helpful when secondary image is overlaped on the left hand side of the Base image.
if New_Width < Base_ImageShape[1] + Correction[0]:
New_Width = Base_ImageShape[1] + Correction[0]
if New_Height < Base_ImageShape[0] + Correction[1]:
New_Height = Base_ImageShape[0] + Correction[1]
# Finding the coordinates of the corners of the image if they all were within the frame.
x = np.add(x, Correction[0])
y = np.add(y, Correction[1])
OldInitialPoints = np.float32([[0, 0],
[Width - 1, 0],
[Width - 1, Height - 1],
[0, Height - 1]])
NewFinalPonts = np.float32(np.array([x, y]).transpose())
# Updating the homography matrix. Done so that now the secondary image completely
# lies inside the frame
HomographyMatrix = cv.getPerspectiveTransform(OldInitialPoints, NewFinalPonts)
return [New_Height, New_Width], Correction, HomographyMatrix
ratio_thresh = 0.9
image1 = cv.imread(filename='/home/msi-user/PycharmProjects/170Camera/1_camera.jpg')
image2 = cv.imread(filename='/home/msi-user/PycharmProjects/170Camera/2_camera.jpg')
# -----------------------------------------KAZE--------------------------------#
AKAZE = cv.KAZE_create() # KAZE, AKAZE, ORB, BRISK, xfeatures2d.SURF
keypoints1, descriptors1 = AKAZE.detectAndCompute(image1, None)
keypoints2, descriptors2 = AKAZE.detectAndCompute(image2, None)
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
descriptors1 = np.float32(descriptors1)
descriptors2 = np.float32(descriptors2)
FLANN = cv.FlannBasedMatcher(indexParams=index_params,
searchParams=search_params)
matches = FLANN.knnMatch(queryDescriptors=descriptors1,
trainDescriptors=descriptors2,
k=2)
good_matches = []
t = []
for m, n in matches:
if m.distance < ratio_thresh * n.distance:
good_matches.append([m])
t.append(m)
output = cv.drawMatches(img1=image1,
keypoints1=keypoints1,
img2=image2,
keypoints2=keypoints2,
matches1to2=t,
outImg=None,
flags=cv.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
cv.namedWindow("drawMatches.jpg", cv.WINDOW_NORMAL)
cv.imshow("drawMatches.jpg", output)
# ----------------------------------FindHomography-------------------------------------------#
HomographyMatrix, Status = FindHomography(good_matches, keypoints1, keypoints2)
BaseImage = image1
SecImage = image2
NewFrameSize, Correction, HomographyMatrix = GetNewFrameSizeAndMatrix(HomographyMatrix, SecImage.shape[:2],
BaseImage.shape[:2])
StitchedImage = cv.warpPerspective(SecImage, HomographyMatrix, (NewFrameSize[1], NewFrameSize[0]))
StitchedImage[Correction[1]:Correction[1] + BaseImage.shape[0],
Correction[0]:Correction[0] + BaseImage.shape[1]] = BaseImage
cv.namedWindow("stisched2.jpg", cv.WINDOW_NORMAL)
cv.imshow("stisched2.jpg", StitchedImage)
cv.imwrite("result.jpg", StitchedImage)
while True:
if cv.waitKey(1) == 27:
break

why am I getting multiple bounding boxes?

I am trying to turn an object detector for images into object detector for videos.
But, I am getting multiple bounding boxes and I don't know why.
It seems like the first frame of the video has the correct number of bounding boxes, namely 1. But as it loops the function draw_boxes outputs images that have multiple or overlapping bounding boxes.
If you can help I will appreciate it. Thanks.
Here is an example of some frame:
And here is the code:
for i in tqdm(range(nb_frames)):
_, frame = video_reader.read()
cv2.imwrite("framey.jpg", frame)
filename = "framey.jpg"
image, image_w, image_h = load_image_pixels(filename, (input_w, input_h))
yhat = model.predict(image)
for i in range(len(yhat)):
# decode the output of the network
boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w)
# correct the sizes of the bounding boxes for the shape of the image
correct_yolo_boxes(boxes, image_h, image_w, input_h, input_w)
# suppress non-maximal boxes
do_nms(boxes, 0.5)
# get the details of the detected objects
v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)
# draw what we found
imagex = draw_boxes(filename, v_boxes, v_labels, v_scores)
video_writer.write(imagex)
video_reader.release()
video_writer.release()
And here is the function that is spitting out the above image:
def draw_boxes(filename, v_boxes, v_labels, v_scores):
# load the image
data = pyplot.imread(filename)
# plot the image
pyplot.imshow(data)
# get the context for drawing boxes
ax = pyplot.gca()
# plot each box
for i in range(len(v_boxes)):
box = v_boxes[i]
# get coordinates
y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
# calculate width and height of the box
width, height = x2 - x1, y2 - y1
# create the shape
rect = Rectangle((x1, y1), width, height, fill=False, color='white')
# draw the box
ax.add_patch(rect)
# draw text and score in top left corner
label = "%s (%.3f)" % (v_labels[i], v_scores[i])
pyplot.text(x1, y1, label, color='white')
# show the plot
pyplot.savefig('detected.jpg')
filename = "detected.jpg"
image = load_img(filename)
image_array = img_to_array(image)
image_array = (image_array*255).astype(np.uint8)
return image_array

So, the error was in the 'draw_boxes' function.
I changed 'draw_boxes' and it worked.
def draw_bounding_boxes(image, v_boxes, v_labels, v_scores):
for i in range(len(v_boxes)):
box = v_boxes[i]
y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
width, height = x2 - x1, y2 - y1
label = "%s (%.3f)" % (v_labels[i], v_scores[i])
region = np.array([[x1 - 3, y1],
[x1-3, y1 - height-26],
[x1+width+13, y1-height-26],
[x1+width+13, y1]], dtype='int32')
cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 5)
cv2.fillPoly(image,[region], (255, 0, 0))
cv2.putText(image,
label,
(x1+13, y1-13),
cv2.FONT_HERSHEY_SIMPLEX,
1e-3 * image.shape[0],
(0,0,0),
2)
return image

Extracting data from tables without any grid lines and border from scanned image of document

Extracting table data from digital PDFs have been simple using camelot and tabula. However, the solution doesn't work with scanned images of the document pages specifically when the table doesn't have borders and inner grids. I have been trying to generate vertical and horizontal lines using OpenCV. However, since the scanned images will have slight rotation angles, it is difficult to proceed with the approach.
How can we utilize OpenCV to generate grids (horizontal and vertical lines) and borders for the scanned document page which contains table data (along with paragraphs of text)? If this is feasible, how to nullify the rotation angle of the scanned image?

I wrote some code to estimate the horizontal lines from the printed letters in the page. The same could be done for vertical ones I guess. The code below follows some general assumptions, here
some basic steps in pseudo code style:
prepare picture for contour detection
do contour detection
we assume most contours are letters
calc mean width of all contours
calc mean area of contours
filter all contours with two conditions:
a) contour (letter) heigths < meanHigh * 2
b) contour area > 4/5 meanArea
calc center point of all remaining contours
assume we have line regions (bins)
list all center point which are inside the region
do linear regression of region points
save slope and intercept
calc mean slope and intercept
here the full code:
import cv2
import numpy as np
from scipy import stats
def resizeImageByPercentage(img,scalePercent = 60):
width = int(img.shape[1] * scalePercent / 100)
height = int(img.shape[0] * scalePercent / 100)
dim = (width, height)
# resize image
return cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
def calcAverageContourWithAndHeigh(contourList):
hs = list()
ws = list()
for cnt in contourList:
(x, y, w, h) = cv2.boundingRect(cnt)
ws.append(w)
hs.append(h)
return np.mean(ws),np.mean(hs)
def calcAverageContourArea(contourList):
areaList = list()
for cnt in contourList:
a = cv2.minAreaRect(cnt)
areaList.append(a[2])
return np.mean(areaList)
def calcCentroid(contour):
houghMoments = cv2.moments(contour)
# calculate x,y coordinate of centroid
if houghMoments["m00"] != 0: #case no contour could be calculated
cX = int(houghMoments["m10"] / houghMoments["m00"])
cY = int(houghMoments["m01"] / houghMoments["m00"])
else:
# set values as what you need in the situation
cX, cY = -1, -1
return cX,cY
def getCentroidWhenSizeInRange(contourList,letterSizeWidth,letterSizeHigh,deltaOffset,minLetterArea=10.0):
centroidList=list()
for cnt in contourList:
(x, y, w, h) = cv2.boundingRect(cnt)
area = cv2.minAreaRect(cnt)
#calc diff
diffW = abs(w-letterSizeWidth)
diffH = abs(h-letterSizeHigh)
#thresold A: almost smaller than mean letter size +- offset
#when almost letterSize
if diffW < deltaOffset and diffH < deltaOffset:
#threshold B > min area
if area[2] > minLetterArea:
cX,cY = calcCentroid(cnt)
if cX!=-1 and cY!=-1:
centroidList.append((cX,cY))
return centroidList
DEBUGMODE = True
#read image, do git clone https://github.com/WZBSocialScienceCenter/pdftabextract.git for the example
img = cv2.imread('pdftabextract/examples/catalogue_30s/data/ALA1934_RR-excerpt.pdf-2_1.png')
#get some basic infos
imgHeigh, imgWidth, imgChannelAmount = img.shape
if DEBUGMODE:
cv2.imwrite("img00original.jpg",resizeImageByPercentage(img,30))
cv2.imshow("original",img)
# prepare img
imgGrey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# apply Gaussian filter
imgGaussianBlur = cv2.GaussianBlur(imgGrey,(5,5),0)
#make binary img, black or white
_, imgBinThres = cv2.threshold(imgGaussianBlur, 130, 255, cv2.THRESH_BINARY)
## detect contours
contours, _ = cv2.findContours(imgBinThres, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#we get some letter parameter
averageLetterWidth, averageLetterHigh = calcAverageContourWithAndHeigh(contours)
threshold1AllowedLetterSizeOffset = averageLetterHigh * 2 # double size
averageContourAreaSizeOfMinRect = calcAverageContourArea(contours)
threshHold2MinArea = 4 * averageContourAreaSizeOfMinRect / 5 # 4/5 * mean
print("mean letter Width: ", averageLetterWidth)
print("mean letter High: ", averageLetterHigh)
print("threshold 1 tolerance: ", threshold1AllowedLetterSizeOffset)
print("mean letter area ", averageContourAreaSizeOfMinRect)
print("thresold 2 min letter area ", threshHold2MinArea)
#we get all centroid of letter sizes contours, the other we ignore
centroidList = getCentroidWhenSizeInRange(contours,averageLetterWidth,averageLetterHigh,threshold1AllowedLetterSizeOffset,threshHold2MinArea)
if DEBUGMODE:
#debug print all centers:
imgFilteredCenter = img.copy()
for cX,cY in centroidList:
#draw in red color as BGR
cv2.circle(imgFilteredCenter, (cX, cY), 5, (0, 0, 255), -1)
cv2.imwrite("img01letterCenters.jpg",resizeImageByPercentage(imgFilteredCenter,30))
cv2.imshow("letterCenters",imgFilteredCenter)
#we estimate a bin widths
amountPixelFreeSpace = averageLetterHigh #TODO get better estimate out of histogram
estimatedBinWidth = round( averageLetterHigh + amountPixelFreeSpace) #TODO round better ?
binCollection = dict() #range(0,imgHeigh,estimatedBinWidth)
#we do seperate the center points into bins by y coordinate
for i in range(0,imgHeigh,estimatedBinWidth):
listCenterPointsInBin = list()
yMin = i
yMax = i + estimatedBinWidth
for cX,cY in centroidList:
if yMin < cY < yMax:#if fits in bin
listCenterPointsInBin.append((cX,cY))
binCollection[i] = listCenterPointsInBin
#we assume all point are in one line ?
#model = slope (x) + intercept
#model = m (x) + n
mList = list() #slope abs in img
nList = list() #intercept abs in img
nListRelative = list() #intercept relative to bin start
minAmountRegressionElements = 12 #is also alias for letter amount we expect
#we do regression for every point in the bin
for startYOfBin, values in binCollection.items():
#we reform values
xValues = [] #TODO use more short transform
yValues = []
for x,y in values:
xValues.append(x)
yValues.append(y)
#we assume a min limit of point in bin
if len(xValues) >= minAmountRegressionElements :
slope, intercept, r, p, std_err = stats.linregress(xValues, yValues)
mList.append(slope)
nList.append(intercept)
#we calc the relative intercept
nRelativeToBinStart = intercept - startYOfBin
nListRelative.append(nRelativeToBinStart)
if DEBUGMODE:
#we debug print all lines in one picute
imgLines = img.copy()
colorOfLine = (0, 255, 0) #green
for i in range(0,len(mList)):
slope = mList[i]
intercept = nList[i]
startPoint = (0, int( intercept)) #better round ?
endPointY = int( (slope * imgWidth + intercept) )
if endPointY < 0:
endPointY = 0
endPoint = (imgHeigh,endPointY)
cv2.line(imgLines, startPoint, endPoint, colorOfLine, 2)
cv2.imwrite("img02lines.jpg",resizeImageByPercentage(imgLines,30))
cv2.imshow("linesOfLetters ",imgLines)
#we assume in mean we got it right
meanIntercept = np.mean(nListRelative)
meanSlope = np.mean(mList)
print("meanIntercept :", meanIntercept)
print("meanSlope ", meanSlope)
#TODO calc angle with math.atan(slope) ...
if DEBUGMODE:
cv2.waitKey(0)
original:
center point of letters:
lines:

I had the same problem some time ago and this tutorial is the solution to that. It explains using pdftabextract which is a Python library by Markus Konrad and leverages OpenCV’s Hough transform to detect the lines and works even if the scanned document is a bit tilted. The tutorial walks your through parsing a 1920s German newspaper

How to join nearby bounding boxes in OpenCV Python

I am doing a college class project on image processing. This is my original image:
I want to join nearby/overlapping bounding boxes on individual text line images, but I don't know how. My code looks like this so far (thanks to #HansHirse for the help):
import os
import cv2
import numpy as np
from scipy import stats
image = cv2.imread('example.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
#dilation
kernel = np.ones((5,5), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
#find contours
ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0
# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b: b[1][i], reverse=reverse))
# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)
sortedctrs,sortedbbs=sort_contours(ctrs)
xyminmax=[]
for cnt in sortedctrs:
x, y, w, h = cv2.boundingRect(cnt)
xyminmax.append([x,y,x+w,y+h])
distances=[]
for i in range(len(xyminmax)):
try:
first_xmax = xyminmax[i][2]
second_xmin = xyminmax[i + 1][0]
distance=abs(second_xmin-first_xmax)
distances.append(distance)
except IndexError:
pass
THRESHOLD=stats.mode(distances, axis=None)[0][0]
new_rects=[]
for i in range(len(xyminmax)):
try:
# [xmin,ymin,xmax,ymax]
first_ymin=xyminmax[i][1]
first_ymax=xyminmax[i][3]
second_ymin=xyminmax[i+1][1]
second_ymax=xyminmax[i+1][3]
first_xmax = xyminmax[i][2]
second_xmin = xyminmax[i+1][0]
firstheight=abs(first_ymax-first_ymin)
secondheight=abs(second_ymax-second_ymin)
distance=abs(second_xmin-first_xmax)
if distance<THRESHOLD:
new_xmin=xyminmax[i][0]
new_xmax=xyminmax[i+1][2]
if first_ymin>second_ymin:
new_ymin=second_ymin
else:
new_ymin = first_ymin
if firstheight>secondheight:
new_ymax = first_ymax
else:
new_ymax = second_ymax
new_rects.append([new_xmin,new_ymin,new_xmax,new_ymax])
else:
new_rects.append(xyminmax[i])
except IndexError:
pass
for rect in new_rects:
cv2.rectangle(image, (rect[0], rect[1]), (rect[2], rect[3]), (121, 11, 189), 2)
cv2.imwrite("result.png",image)
which produces this image as a result:
I want to join very close or overlapping bounding boxes such as these
into a single bounding box so the formula doesn't get separated into single characters. I have tried using cv2.groupRectangles but the print results were just NULL.

So, here comes my solution. I partially modified your (initial) code to my preferred naming, etc. Also, I commented all the stuff, I added.
import cv2
import numpy as np
image = cv2.imread('images/example.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
kernel = np.ones((5, 5), np.uint8)
img_dilated = cv2.dilate(thresh, kernel, iterations = 1)
cnts, _ = cv2.findContours(img_dilated.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Array of initial bounding rects
rects = []
# Bool array indicating which initial bounding rect has
# already been used
rectsUsed = []
# Just initialize bounding rects and set all bools to false
for cnt in cnts:
rects.append(cv2.boundingRect(cnt))
rectsUsed.append(False)
# Sort bounding rects by x coordinate
def getXFromRect(item):
return item[0]
rects.sort(key = getXFromRect)
# Array of accepted rects
acceptedRects = []
# Merge threshold for x coordinate distance
xThr = 5
# Iterate all initial bounding rects
for supIdx, supVal in enumerate(rects):
if (rectsUsed[supIdx] == False):
# Initialize current rect
currxMin = supVal[0]
currxMax = supVal[0] + supVal[2]
curryMin = supVal[1]
curryMax = supVal[1] + supVal[3]
# This bounding rect is used
rectsUsed[supIdx] = True
# Iterate all initial bounding rects
# starting from the next
for subIdx, subVal in enumerate(rects[(supIdx+1):], start = (supIdx+1)):
# Initialize merge candidate
candxMin = subVal[0]
candxMax = subVal[0] + subVal[2]
candyMin = subVal[1]
candyMax = subVal[1] + subVal[3]
# Check if x distance between current rect
# and merge candidate is small enough
if (candxMin <= currxMax + xThr):
# Reset coordinates of current rect
currxMax = candxMax
curryMin = min(curryMin, candyMin)
curryMax = max(curryMax, candyMax)
# Merge candidate (bounding rect) is used
rectsUsed[subIdx] = True
else:
break
# No more merge candidates possible, accept current rect
acceptedRects.append([currxMin, curryMin, currxMax - currxMin, curryMax - curryMin])
for rect in acceptedRects:
img = cv2.rectangle(image, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (121, 11, 189), 2)
cv2.imwrite("images/result.png", image)
For your example
I get the following output
Now, you have to find a proper threshold to meet your expectations. Maybe, there is even some more work to do, especially to get the whole formula, since the distances don't vary that much.
Disclaimer: I'm new to Python in general, and specially to the Python API of OpenCV (C++ for the win). Comments, improvements, highlighting Python no-gos are highly welcome!

Here is a slightly different approach, using the OpenCV Wrapper library.
import cv2
import opencv_wrapper as cvw
image = cv2.imread("example.png")
gray = cvw.bgr2gray(image)
thresh = cvw.threshold_otsu(gray, inverse=True)
# dilation
img_dilation = cvw.dilate(thresh, 5)
# Find contours
contours = cvw.find_external_contours(img_dilation)
# Map contours to bounding rectangles, using bounding_rect property
rects = map(lambda c: c.bounding_rect, contours)
# Sort rects by top-left x (rect.x == rect.tl.x)
sorted_rects = sorted(rects, key=lambda r: r.x)
# Distance threshold
dt = 5
# List of final, joined rectangles
final_rects = [sorted_rects[0]]
for rect in sorted_rects[1:]:
prev_rect = final_rects[-1]
# Shift rectangle `dt` back, to find out if they overlap
shifted_rect = cvw.Rect(rect.tl.x - dt, rect.tl.y, rect.width, rect.height)
intersection = cvw.rect_intersection(prev_rect, shifted_rect)
if intersection is not None:
# Join the two rectangles
min_y = min((prev_rect.tl.y, rect.tl.y))
max_y = max((prev_rect.bl.y, rect.bl.y))
max_x = max((prev_rect.br.x, rect.br.x))
width = max_x - prev_rect.tl.x
height = max_y - min_y
new_rect = cvw.Rect(prev_rect.tl.x, min_y, width, height)
# Add new rectangle to final list, making it the new prev_rect
# in the next iteration
final_rects[-1] = new_rect
else:
# If no intersection, add the box
final_rects.append(rect)
for rect in sorted_rects:
cvw.rectangle(image, rect, cvw.Color.MAGENTA, line_style=cvw.LineStyle.DASHED)
for rect in final_rects:
cvw.rectangle(image, rect, cvw.Color.GREEN, thickness=2)
cv2.imwrite("result.png", image)
And the result
The green boxes are the final result, while the magenta boxes are the original ones.
I used the same threshold as #HansHirse.
The equals sign still needs some work. Either a higher dilation kernel size or use the same technique vertically.
Disclosure: I am the author of OpenCV Wrapper.

Easy-to-read solution:
contours = get_contours(frame)
boxes = [cv2.boundingRect(c) for c in contours]
boxes = merge_boxes(boxes, x_val=40, y_val=20) # Where x_val and y_val are axis thresholds
def get_contours(frame): # Returns a list of contours
contours = cv2.findContours(frame, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
return contours
def merge_boxes(boxes, x_val, y_val):
size = len(boxes)
if size < 2:
return boxes
if size == 2:
if boxes_mergeable(boxes[0], boxes[1], x_val, y_val):
boxes[0] = union(boxes[0], boxes[1])
del boxes[1]
return boxes
boxes = sorted(boxes, key=lambda r: r[0])
i = size - 2
while i >= 0:
if boxes_mergeable(boxes[i], boxes[i + 1], x_val, y_val):
boxes[i] = union(boxes[i], boxes[i + 1])
del boxes[i + 1]
i -= 1
return boxes
def boxes_mergeable(box1, box2, x_val, y_val):
(x1, y1, w1, h1) = box1
(x2, y2, w2, h2) = box2
return max(x1, x2) - min(x1, x2) - minx_w(x1, w1, x2, w2) < x_val \
and max(y1, y2) - min(y1, y2) - miny_h(y1, h1, y2, h2) < y_val
def minx_w(x1, w1, x2, w2):
return w1 if x1 <= x2 else w2
def miny_h(y1, h1, y2, h2):
return h1 if y1 <= y2 else h2
def union(a, b):
x = min(a[0], b[0])
y = min(a[1], b[1])
w = max(a[0] + a[2], b[0] + b[2]) - x
h = max(a[1] + a[3], b[1] + b[3]) - y
return x, y, w, h

--> If you have bounding boxes and want to merge along both X and Y directions, use this snippet
--> Adjust x_pixel_value and y_pixel_value to your preferences
--> But for this, you need to have the bounding boxes
import cv2
img = cv2.imread(your image path)
x_pixel_value = 5
y_pixel_value = 6
bboxes_list = [] # your bounding boxes list
rects_used = []
for i in bboxes_list:
rects_used.append(False)
end_bboxes_list = []
for enum,i in enumerate(bboxes_list):
if rects_used[enum] == True:
continue
xmin = i[0]
xmax = i[2]
ymin = i[1]
ymax = i[3]
for enum1,j in enumerate(bboxes_list[(enum+1):], start = (enum+1)):
i_xmin = j[0]
i_xmax = j[2]
i_ymin = j[1]
i_ymax = j[3]
if rects_used[enum1] == False:
if abs(ymin - i_ymin) < x_pixel_value:
if abs(xmin-i_xmax) < y_pixel_value or abs(xmax-i_xmin) < y_pixel_value:
rects_used[enum1] = True
xmin = min(xmin,i_xmin)
xmax = max(xmax,i_xmax)
ymin = min(ymin,i_ymin)
ymax = max(ymax,i_ymax)
final_box = [xmin,ymin,xmax,ymax]
end_bboxes_list.append(final_box)
for i in end_bboxes_list:
cv2.rectangle(img,(i[0],i[1]),(i[2],i[3]), color = [0,255,0], thickness = 2)
cv2.imshow("Image",img)
cv2.waitKey(10000)
cv2.destroyAllWindows()

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Retrieving information from a Mask_RCNN Tensor - python

Related

Counting drop on an image

How to stitch the two images more accurately?

why am I getting multiple bounding boxes?

Extracting data from tables without any grid lines and border from scanned image of document

How to join nearby bounding boxes in OpenCV Python

Categories

Resources