Python OpenCV append matches' center x, y coordinates in tuples

Python OpenCV append matches' center x, y coordinates in tuples - python

I have this simple opencv template matching function written in Python.
image:
template:
def find(object, sensitivity):
screen = "tool.png"
screen_read = cv2.imread(screen)
screen_gray = cv2.cvtColor(screen_read, cv2.COLOR_BGR2GRAY)
obj = cv2.imread(object, cv2.IMREAD_GRAYSCALE)
w, h = obj.shape[::-1]
location = np.where(cv2.matchTemplate(screen_gray, obj, cv2.TM_CCORR_NORMED) >= sensitivity)
positions = []
for xy in zip(*location[::-1]):
cv2.rectangle(screen_read, xy, (xy[0] + w, xy[1] + h), (0, 0, 255), 1)
x = random(xy[0], (xy[0] + w) - 2)
y = random(xy[1], (xy[1] + h) - 2)
print(x, y)
positions.append(str(x) + ", " + str(y))
#cv2.imshow("Test", screen_read)
#cv2.waitKey(0)
find("enemylogo.png", 0.90)
It will find all the templates correctly, as shown here:
However, my goal here is to pass the center coordinate to be used in loop, outside the function. For this, I need to store the x, y coordinates in an array (positions), as tuples.
However, I'm not getting desired results, it's adding too many tuples instead of only 2.
What I'm trying to do is:
for x in find("enemylogo.png", 0.90):
click(x) #x would be the coordinate of every template matched.
Could someone help me, please?

The line location = np.where.... will give you a lot of matches, and many of them will be right next to each other. Another technique is to recursively use minMaxLoc. This function will only give you the best result. But if you overwrite the best match with zeros on the first pass through, the second pass will find another match.
import cv2
import numpy as np
def find_templates(obj, sensitivity):
image = cv2.imread('tool.png', cv2.IMREAD_COLOR )
template = cv2.imread(obj, cv2.IMREAD_COLOR)
h, w = template.shape[:2]
print('h', h, 'w', w)
method = cv2.TM_CCORR_NORMED
threshold = 0.90
res = cv2.matchTemplate(image, template, method)
res_h, res_w = res.shape[:2]
# fake out max_val for first run through loop
max_val = 1
centers = []
while max_val > sensitivity:
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
if max_val > sensitivity:
centers.append( (max_loc[0] + w//2, max_loc[1] + h//2) )
x1 = max(max_loc[0] - w//2, 0)
y1 = max(max_loc[1] - h//2, 0)
x2 = min(max_loc[0] + w//2, res_w)
y2 = min(max_loc[1] + h//2, res_h)
res[y1:y2, x1:x2] = 0
image = cv2.rectangle(image,(max_loc[0],max_loc[1]), (max_loc[0]+w+1, max_loc[1]+h+1), (0,255,0) )
print(centers)
cv2.imwrite('output.png', image)
find_templates("enemy_logo.png", 0.90)
which gives
[(52, 52), (169, 52)]

Related

Is it possible to find the bending point using opencv Python?

Aim of my program is find the angle of bending Led.
I got the angle using convexity defects in convex hull but the midpoint is move away from center point of that bend.
original image
original
below image is the output of program
output
black dot is starting point.
red dot is end point.
blue dot is mid point.
Now I want move blue dot to the center of the curve
my code
import cv2
import numpy as np
from math import sqrt
from collections import OrderedDict
def findangle(x1,y1,x2,y2,x3,y3):
ria = np.arctan2(y2 - y1, x2 - x1) - np.arctan2(y3 - y1, x3 - x1)
if ria > 0:
if ria < 3:
webangle = int(np.abs(ria * 180 / np.pi))
elif ria > 3:
webangle = int(np.abs(ria * 90 / np.pi))
elif ria < 0:
if ria < -3:
webangle = int(np.abs(ria * 90 / np.pi))
elif ria > -3:
webangle = int(np.abs(ria * 180 / np.pi))
return webangle
image = cv2.imread("cam/2022-09-27 10:01:57image.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
contours,hie= cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
selected_contour = max(contours, key=lambda x: cv2.contourArea(x))
# Draw Contour
approx = cv2.approxPolyDP(selected_contour, 0.0035 * cv2.arcLength(selected_contour, True), True)
for point in approx:
cv2.drawContours(image, [point], 0, (0, 0, 255), 3)
convexHull = cv2.convexHull(selected_contour,returnPoints=False)
cv2.drawContours(image, cv2.convexHull(selected_contour), 0, (0, 255, 0), 3)
convexHull[::-1].sort(axis=0)
convexityDefects = cv2.convexityDefects(selected_contour, convexHull)
start2,distance=[],[]
for i in range(convexityDefects.shape[0]):
s, e, f, d = convexityDefects[i, 0]
start = tuple(selected_contour[s][0])
end = tuple(selected_contour[e][0])
far = tuple(selected_contour[f][0])
start2.append(start)
cv2.circle(image, start, 2, (255, 0, 0), 3)
cv2.line(image,start,end , (0, 255, 0), 3)
distance.append(d)
distance.sort(reverse=True)
for i in range(convexityDefects.shape[0]):
s, e, f, d = convexityDefects[i, 0]
if distance[0]==d:
defect={"s":s,"e":e,"f":f,"d":d}
cv2.circle(image, selected_contour[defect.get("f")][0], 2, (255, 0, 0), 3)
cv2.circle(image, selected_contour[defect.get("s")][0], 2, (0, 0, 0), 3)
cv2.circle(image, selected_contour[defect.get("e")][0], 2, (0, 0, 255), 3)
x1, y1 = selected_contour[defect.get("f")][0]
x2, y2 = selected_contour[defect.get("e")][0]
x3, y3 = selected_contour[defect.get("s")][0]
cv2.line(image,(x1,y1),(x2,y2),(255,200,0),2)
cv2.line(image,(x1,y1),(x3,y3),(255,200,0),2)
cv2.putText(image, "Web Angle : " + str((findangle(x1,y1,x2,y2,x3,y3))), (50, 200), cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 1, (0,0,0),2,cv2.LINE_AA)
cv2.imshow("frame",image)
cv2.waitKey(0)
cv2.destroyAllWindows()
so i want any concept to get exact center of the bend point.

Here is one way to do that in Python/OpenCV. I make no guarantees that it is universal and would work on all such images. I also leave it for others to add trapping for empty arrays/lists and other general best practices.
Read the input
Threshold to binary on white using cv2.inRange()
Apply morphology to close up the gap near the top
Skeletonize the binary image
Get the x and y coordinates of the points of the skeleton
Zip the x and y coordinates
Sort the zipped data by x
Sort another copy of the zipped data by y
Get the first line (end points) from the top for 40% of y from the y sorted data, since that region of the skeleton is nearly straight
Get the first line (end points) from the left for 40% of x from the x sorted data, since that region of the skeleton is nearly straight
Get the intersection point of these two lines
Compute the x and y derivatives of the x coordinates and the y coordinates, respectively
Loop over each point and compute the slope from the derivatives, which will be tangent to the skeleton at the point
Then still in the loop compute the inverse slope of the line from the point to the previously computed intersection point. This will be normal (perpendicular) to this line.
Compute the difference in slopes and find the point where the difference is minimum. This will be the bend point.
Draw relevant lines and points on skeleton and input
Save results
Input:
import cv2
import numpy as np
import skimage.morphology
img = cv2.imread("wire.png")
# create a binary thresholded image
lower = (255,255,255)
upper = (255,255,255)
thresh = cv2.inRange(img, lower, upper)
thresh = (thresh/255).astype(np.float64)
# apply morphology to connect at top
kernel = np.ones((11,11), np.uint8)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# apply skeletonization
skeleton = skimage.morphology.skeletonize(thresh)
skeleton = (255*skeleton).clip(0,255).astype(np.uint8)
# get skeleton points
pts = np.where(skeleton != 0)
x = pts[1]
y = pts[0]
num_pts = len(x)
print(num_pts)
# zip x and y
xy1 = zip(x,y)
xy2 = zip(x,y)
# sort on y
xy_sorty = sorted(xy1, key = lambda x: x[1])
#print(xy_sorty[0])
# sort on x
xy_sortx = sorted(xy2, key = lambda x: x[0])
#print(xy_sortx[0])
# unzip x and y for xy_sortedy
xu1, yu1 = zip(*xy_sorty)
# get first line from top
# find miny from y sort, then get point 40% down from miny
miny = np.amin(yu1)
y1 = miny
[xy1] = [(xi, yi) for (xi, yi) in xy_sorty if abs(yi - y1) <= 0.00001]
x1 = xy1[0]
y1 = xy1[1]
#print(x1,y1)
maxy = np.amax(yu1)
dely = maxy - miny
y2 = int(y1+0.4*dely)
[xy2] = [(xi, yi) for (xi, yi) in xy_sorty if abs(yi - y2) <= 0.00001]
x2 = xy2[0]
y2 = xy2[1]
#print(x2,y2)
# unzip x and y for xy_sortedx
xu2, yu2 = zip(*xy_sortx)
# get first line from left
# find minx from x sort, then get point 40% right from minx
minx = np.amin(xu2)
x3 = minx
[xy3] = [(xi, yi) for (xi, yi) in xy_sortx if abs(xi - x3) <= 0.00001]
x3 = xy3[0]
y3 = xy3[1]
#print(x3,y3)
maxx = np.amax(xu2)
delx = maxx - minx
x4 = int(x3+0.4*delx)
[xy4] = [(xi, yi) for (xi, yi) in xy_sortx if abs(xi - x4) <= 0.00001]
x4 = xy4[0]
y4 = xy4[1]
#print(x4,y4)
# draw lines on copy of skeleton
skeleton_lines = skeleton.copy()
skeleton_lines = cv2.merge([skeleton_lines,skeleton_lines,skeleton_lines])
cv2.line(skeleton_lines, (x1,y1), (x2,y2), (0,0,255), 2)
cv2.line(skeleton_lines, (x3,y3), (x4,y4), (0,0,255), 2)
# get intersection between line1 (x1,y1 to x2,y2) and line2 (x3,y3 to x4,y4) and draw circle
# https://en.wikipedia.org/wiki/Line–line_intersection
den = (x1-x2)*(y3-y4) - (y1-y2)*(x3-x4)
px = ((x1*y2-y1*x2)*(x3-x4) - (x1-x2)*(x3*y4-y3*x4))/den
py = ((x1*y2-y1*x2)*(y3-y4) - (y1-y2)*(x3*y4-y3*x4))/den
px = int(px)
py = int(py)
cv2.circle(skeleton_lines, (px,py), 3, (0,255,0), -1)
# compute first derivatives in x and also in y
dx = np.gradient(x, axis=0)
dy = np.gradient(y, axis=0)
# loop over each point
# get the slope of the tangent to the curve
# get the inverse slop of the line from the point to the intersection point (inverse slope is normal direction)
# get difference in slopes and find the point that has the minimum difference
min_diff = 1000000
eps = 0.0000000001
for i in range(num_pts):
slope1 = abs(dy[i]/(dx[i] + eps))
slope2 = abs((px - x[i])/(py - y[i] + eps))
slope_diff = abs(slope1 - slope2)
if slope_diff < min_diff:
min_diff = slope_diff
bend_x = x[i]
bend_y = y[i]
#print(x[i], y[i], min_diff)
bend_x = int(bend_x)
bend_y = int(bend_y)
#print(bend_x, bend_y)
cv2.line(skeleton_lines, (px,py), (bend_x,bend_y), (0,0,255), 2)
cv2.circle(skeleton_lines, (bend_x,bend_y), 3, (0,255,0), -1)
# get end points and bend point and draw on copy of input
result = img.copy()
end1 = (x1,y1)
end2 = (x3,y3)
bend = (bend_x,bend_y)
print("end1:", end1)
print("end2:", end2)
print("bend:", bend)
cv2.circle(result, (end1), 3, (0,0,255), -1)
cv2.circle(result, (end2), 3, (0,0,255), -1)
cv2.circle(result, (bend), 3, (0,0,255), -1)
# save result
cv2.imwrite("wire_skeleton.png", skeleton)
cv2.imwrite("wire_skeleton_lines.png", skeleton_lines)
cv2.imwrite("wire_result.png", result)
# show results
cv2.imshow("thresh", (255*thresh).astype(np.uint8))
cv2.imshow("skeleton", skeleton)
cv2.imshow("skeleton_lines", skeleton_lines)
cv2.imshow("skeleton_result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Skeleton:
Skeleton with lines:
Result showing end points and bend point:

Efficiently transform and blend images in OpenCV

I'm making a program to simulate brush strokes with a given image, and I'm held up the actual putting the brush strokes down part. Right now I'm using OpenCV and Numpy to rotate and mix images together by setting a section of the original image to be the new one. Here's the current code.
def rotate(img, deg):
"""Rotates an image a certain number of degrees"""
h, w, c = img.shape
matrix = cv2.getRotationMatrix2D((w / 2, h / 2), deg, 1)
out = cv2.warpAffine(img, matrix, (w, h), flags=cv2.INTER_LINEAR)
return out
def paste(img1, img2, mask, x, y):
"""Pastes one image on top of another at a given location"""
h1, w1, _ = img1.shape # dimensions of original image
h2, w2, _ = img2.shape # dimensions of pasted image
if y > h1 or x > w1:
return img1
stamp = img2
factor = mask
subx = ((w2 + x) - w1)
suby = ((h2 + y) - h1)
if suby <= 0:
suby = 0
if subx <= 0:
subx = 0
stamp = img2[0:h2 - suby, 0:w2 - subx]
factor = mask[0:h2 - suby, 0:w2 - subx]
snippet = img1[y:y + (h2 - suby), x:x + (w2 - subx)]
stamp = cv2.add(cv2.multiply(snippet, (1-factor)), cv2.multiply(stamp, factor))
out = img1
out[y:y + h2, x:x + w2] = stamp
return out
It works, but only barely. There are issues that come up with certain scenarios, it's very limited as to what you're allowed to do with it, and is extremely slow. Is there an easier/better way to transform and blend images with OpenCV, or would a different framework (sprite/object based, like pygame) be better for this? Thanks!

I suggest you try seamless cloning. Check this excellent tutorial from Satya Mallick: https://www.learnopencv.com/seamless-cloning-using-opencv-python-cpp/
In this case you can substitute the plane in the tutorial for your brush.

How to join nearby bounding boxes in OpenCV Python

I am doing a college class project on image processing. This is my original image:
I want to join nearby/overlapping bounding boxes on individual text line images, but I don't know how. My code looks like this so far (thanks to #HansHirse for the help):
import os
import cv2
import numpy as np
from scipy import stats
image = cv2.imread('example.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
#dilation
kernel = np.ones((5,5), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
#find contours
ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0
# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b: b[1][i], reverse=reverse))
# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)
sortedctrs,sortedbbs=sort_contours(ctrs)
xyminmax=[]
for cnt in sortedctrs:
x, y, w, h = cv2.boundingRect(cnt)
xyminmax.append([x,y,x+w,y+h])
distances=[]
for i in range(len(xyminmax)):
try:
first_xmax = xyminmax[i][2]
second_xmin = xyminmax[i + 1][0]
distance=abs(second_xmin-first_xmax)
distances.append(distance)
except IndexError:
pass
THRESHOLD=stats.mode(distances, axis=None)[0][0]
new_rects=[]
for i in range(len(xyminmax)):
try:
# [xmin,ymin,xmax,ymax]
first_ymin=xyminmax[i][1]
first_ymax=xyminmax[i][3]
second_ymin=xyminmax[i+1][1]
second_ymax=xyminmax[i+1][3]
first_xmax = xyminmax[i][2]
second_xmin = xyminmax[i+1][0]
firstheight=abs(first_ymax-first_ymin)
secondheight=abs(second_ymax-second_ymin)
distance=abs(second_xmin-first_xmax)
if distance<THRESHOLD:
new_xmin=xyminmax[i][0]
new_xmax=xyminmax[i+1][2]
if first_ymin>second_ymin:
new_ymin=second_ymin
else:
new_ymin = first_ymin
if firstheight>secondheight:
new_ymax = first_ymax
else:
new_ymax = second_ymax
new_rects.append([new_xmin,new_ymin,new_xmax,new_ymax])
else:
new_rects.append(xyminmax[i])
except IndexError:
pass
for rect in new_rects:
cv2.rectangle(image, (rect[0], rect[1]), (rect[2], rect[3]), (121, 11, 189), 2)
cv2.imwrite("result.png",image)
which produces this image as a result:
I want to join very close or overlapping bounding boxes such as these
into a single bounding box so the formula doesn't get separated into single characters. I have tried using cv2.groupRectangles but the print results were just NULL.

So, here comes my solution. I partially modified your (initial) code to my preferred naming, etc. Also, I commented all the stuff, I added.
import cv2
import numpy as np
image = cv2.imread('images/example.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
kernel = np.ones((5, 5), np.uint8)
img_dilated = cv2.dilate(thresh, kernel, iterations = 1)
cnts, _ = cv2.findContours(img_dilated.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Array of initial bounding rects
rects = []
# Bool array indicating which initial bounding rect has
# already been used
rectsUsed = []
# Just initialize bounding rects and set all bools to false
for cnt in cnts:
rects.append(cv2.boundingRect(cnt))
rectsUsed.append(False)
# Sort bounding rects by x coordinate
def getXFromRect(item):
return item[0]
rects.sort(key = getXFromRect)
# Array of accepted rects
acceptedRects = []
# Merge threshold for x coordinate distance
xThr = 5
# Iterate all initial bounding rects
for supIdx, supVal in enumerate(rects):
if (rectsUsed[supIdx] == False):
# Initialize current rect
currxMin = supVal[0]
currxMax = supVal[0] + supVal[2]
curryMin = supVal[1]
curryMax = supVal[1] + supVal[3]
# This bounding rect is used
rectsUsed[supIdx] = True
# Iterate all initial bounding rects
# starting from the next
for subIdx, subVal in enumerate(rects[(supIdx+1):], start = (supIdx+1)):
# Initialize merge candidate
candxMin = subVal[0]
candxMax = subVal[0] + subVal[2]
candyMin = subVal[1]
candyMax = subVal[1] + subVal[3]
# Check if x distance between current rect
# and merge candidate is small enough
if (candxMin <= currxMax + xThr):
# Reset coordinates of current rect
currxMax = candxMax
curryMin = min(curryMin, candyMin)
curryMax = max(curryMax, candyMax)
# Merge candidate (bounding rect) is used
rectsUsed[subIdx] = True
else:
break
# No more merge candidates possible, accept current rect
acceptedRects.append([currxMin, curryMin, currxMax - currxMin, curryMax - curryMin])
for rect in acceptedRects:
img = cv2.rectangle(image, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (121, 11, 189), 2)
cv2.imwrite("images/result.png", image)
For your example
I get the following output
Now, you have to find a proper threshold to meet your expectations. Maybe, there is even some more work to do, especially to get the whole formula, since the distances don't vary that much.
Disclaimer: I'm new to Python in general, and specially to the Python API of OpenCV (C++ for the win). Comments, improvements, highlighting Python no-gos are highly welcome!

Here is a slightly different approach, using the OpenCV Wrapper library.
import cv2
import opencv_wrapper as cvw
image = cv2.imread("example.png")
gray = cvw.bgr2gray(image)
thresh = cvw.threshold_otsu(gray, inverse=True)
# dilation
img_dilation = cvw.dilate(thresh, 5)
# Find contours
contours = cvw.find_external_contours(img_dilation)
# Map contours to bounding rectangles, using bounding_rect property
rects = map(lambda c: c.bounding_rect, contours)
# Sort rects by top-left x (rect.x == rect.tl.x)
sorted_rects = sorted(rects, key=lambda r: r.x)
# Distance threshold
dt = 5
# List of final, joined rectangles
final_rects = [sorted_rects[0]]
for rect in sorted_rects[1:]:
prev_rect = final_rects[-1]
# Shift rectangle `dt` back, to find out if they overlap
shifted_rect = cvw.Rect(rect.tl.x - dt, rect.tl.y, rect.width, rect.height)
intersection = cvw.rect_intersection(prev_rect, shifted_rect)
if intersection is not None:
# Join the two rectangles
min_y = min((prev_rect.tl.y, rect.tl.y))
max_y = max((prev_rect.bl.y, rect.bl.y))
max_x = max((prev_rect.br.x, rect.br.x))
width = max_x - prev_rect.tl.x
height = max_y - min_y
new_rect = cvw.Rect(prev_rect.tl.x, min_y, width, height)
# Add new rectangle to final list, making it the new prev_rect
# in the next iteration
final_rects[-1] = new_rect
else:
# If no intersection, add the box
final_rects.append(rect)
for rect in sorted_rects:
cvw.rectangle(image, rect, cvw.Color.MAGENTA, line_style=cvw.LineStyle.DASHED)
for rect in final_rects:
cvw.rectangle(image, rect, cvw.Color.GREEN, thickness=2)
cv2.imwrite("result.png", image)
And the result
The green boxes are the final result, while the magenta boxes are the original ones.
I used the same threshold as #HansHirse.
The equals sign still needs some work. Either a higher dilation kernel size or use the same technique vertically.
Disclosure: I am the author of OpenCV Wrapper.

Easy-to-read solution:
contours = get_contours(frame)
boxes = [cv2.boundingRect(c) for c in contours]
boxes = merge_boxes(boxes, x_val=40, y_val=20) # Where x_val and y_val are axis thresholds
def get_contours(frame): # Returns a list of contours
contours = cv2.findContours(frame, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
return contours
def merge_boxes(boxes, x_val, y_val):
size = len(boxes)
if size < 2:
return boxes
if size == 2:
if boxes_mergeable(boxes[0], boxes[1], x_val, y_val):
boxes[0] = union(boxes[0], boxes[1])
del boxes[1]
return boxes
boxes = sorted(boxes, key=lambda r: r[0])
i = size - 2
while i >= 0:
if boxes_mergeable(boxes[i], boxes[i + 1], x_val, y_val):
boxes[i] = union(boxes[i], boxes[i + 1])
del boxes[i + 1]
i -= 1
return boxes
def boxes_mergeable(box1, box2, x_val, y_val):
(x1, y1, w1, h1) = box1
(x2, y2, w2, h2) = box2
return max(x1, x2) - min(x1, x2) - minx_w(x1, w1, x2, w2) < x_val \
and max(y1, y2) - min(y1, y2) - miny_h(y1, h1, y2, h2) < y_val
def minx_w(x1, w1, x2, w2):
return w1 if x1 <= x2 else w2
def miny_h(y1, h1, y2, h2):
return h1 if y1 <= y2 else h2
def union(a, b):
x = min(a[0], b[0])
y = min(a[1], b[1])
w = max(a[0] + a[2], b[0] + b[2]) - x
h = max(a[1] + a[3], b[1] + b[3]) - y
return x, y, w, h

--> If you have bounding boxes and want to merge along both X and Y directions, use this snippet
--> Adjust x_pixel_value and y_pixel_value to your preferences
--> But for this, you need to have the bounding boxes
import cv2
img = cv2.imread(your image path)
x_pixel_value = 5
y_pixel_value = 6
bboxes_list = [] # your bounding boxes list
rects_used = []
for i in bboxes_list:
rects_used.append(False)
end_bboxes_list = []
for enum,i in enumerate(bboxes_list):
if rects_used[enum] == True:
continue
xmin = i[0]
xmax = i[2]
ymin = i[1]
ymax = i[3]
for enum1,j in enumerate(bboxes_list[(enum+1):], start = (enum+1)):
i_xmin = j[0]
i_xmax = j[2]
i_ymin = j[1]
i_ymax = j[3]
if rects_used[enum1] == False:
if abs(ymin - i_ymin) < x_pixel_value:
if abs(xmin-i_xmax) < y_pixel_value or abs(xmax-i_xmin) < y_pixel_value:
rects_used[enum1] = True
xmin = min(xmin,i_xmin)
xmax = max(xmax,i_xmax)
ymin = min(ymin,i_ymin)
ymax = max(ymax,i_ymax)
final_box = [xmin,ymin,xmax,ymax]
end_bboxes_list.append(final_box)
for i in end_bboxes_list:
cv2.rectangle(img,(i[0],i[1]),(i[2],i[3]), color = [0,255,0], thickness = 2)
cv2.imshow("Image",img)
cv2.waitKey(10000)
cv2.destroyAllWindows()

What's the most efficient way to select a non-rectangular ROI of an Image in OpenCV?

I want to create a binary image mask, containing only ones and zeros in python. The Region of Interest(white) is non-rectangular, defined by 4 corner points and looks for example as follows:
In my approach, I first calculate the line equation of the upper and lower ROI border and then I check for each mask element, if it's smaller or bigger than the boarders. The code is working, but far to slow. A 2000x1000 mask takes up to 4s of processing my machine.
from matplotlib import pyplot as plt
import cv2
import numpy as np
import time
def line_eq(line):
"""input:
2 points of a line
returns:
slope and intersection of the line
"""
(x1, y1), (x2, y2) = line
slope = (y2 - y1) / float((x2 - x1))
intersect = int(slope * (-x1) + y1)
return slope,intersect
def maskByROI(mask,ROI):
"""
input:
ROI: with 4 corner points e.g. ((x0,y0),(x1,y1),(x2,y2),(x3,y3))
mask:
output:
mask with roi set to 1, rest to 0
"""
line1 = line_eq((ROI[0],ROI[1]))
line2 = line_eq((ROI[2],ROI[3]))
slope1 = line1[0]
intersect1 = line1[1]
#upper line
if slope1>0:
for (x,y), value in np.ndenumerate(mask):
if y > slope1*x +intersect1:
mask[x,y] = 0
else:
for (x,y), value in np.ndenumerate(mask):
if y < slope1*x +intersect1:
mask[x,y] = 0
#lower line
slope2 = line2[0]
intersect2 = line2[1]
if slope2<0:
for (x,y), value in np.ndenumerate(mask):
if y > slope2*x +intersect2:
mask[x,y] = 0
else:
for (x,y), value in np.ndenumerate(mask):
if y < slope2*x +intersect2:
mask[x,y] = 0
return mask
mask = np.ones((2000,1000))
myROI = ((750,0),(900,1000),(1000,1000),(1500,0))
t1 = time.time()
mask = maskByROI(mask,myROI)
t2 = time.time()
print "execution time: ", t2-t1
plt.imshow(mask,cmap='Greys_r')
plt.show()
What is a more efficient way to create a mask like this?
Are there any similar solutions for non-rectangular shapes provided by
numpy, OpenCV or a similar Library?

Draw the mask with fillPoly:
mask = np.ones((1000, 2000)) # (height, width)
myROI = [(750, 0), (900, 1000), (1000, 1000), (1500, 0)] # (x, y)
cv2.fillPoly(mask, [np.array(myROI)], 0)
This should take ~1ms.

Processing an image of a table to get data from it

I have this image of a table (seen below). And I'm trying to get the data from the table, similar to this form (first row of table image):
rows[0] = [x,x, , , , ,x, ,x,x, ,x, ,x, , , , ,x, , , ,x,x,x, ,x, ,x, , , , ]
I need the number of x's as well as the number of spaces.
There will also be other table images that are similar to this one (all having x's and the same number of columns).
So far, I am able to detect all of the x's using an image of an x. And I can somewhat detect the lines. I'm using open cv2 for python. I'm also using a houghTransform to detect the horizontal and vertical lines (that works really well).
I'm trying to figure out how I can go row by row and store the information in a list.
These are the training images:
used to detect x (train1.png in the code)
used to detect lines (train2.png in the code)
used to detect lines (train3.png in the code)
This is the code I have so far:
# process images
from pytesser import *
from PIL import Image
from matplotlib import pyplot as plt
import pytesseract
import numpy as np
import cv2
import math
import os
# the table images
images = ['table1.png', 'table2.png', 'table3.png', 'table4.png', 'table5.png']
# the template images used for training
templates = ['train1.png', 'train2.png', 'train3.png']
def hough_transform(im):
img = cv2.imread('imgs/'+im)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
lines = cv2.HoughLines(edges, 1, np.pi/180, 200)
i = 1
for rho, theta in lines[0]:
a = np.cos(theta)
b = np.sin(theta)
x0 = a*rho
y0 = b*rho
x1 = int(x0 + 1000*(-b))
y1 = int(y0 + 1000*(a))
x2 = int(x0 - 1000*(-b))
y2 = int(y0 - 1000*(a))
#print '%s - 0:(%s,%s) 1:(%s,%s), 2:(%s,%s)' % (i,x0,y0,x1,y1,x2,y2)
cv2.line(img, (x1,y1), (x2,y2), (0,0,255), 2)
i += 1
fn = os.path.splitext(im)[0]+'-lines'
cv2.imwrite('imgs/'+fn+'.png', img)
def match_exes(im, te):
img_rgb = cv2.imread('imgs/'+im)
img_gry = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
template = cv2.imread('imgs/'+te, 0)
w, h = template.shape[::-1]
res = cv2.matchTemplate(img_gry, template, cv2.TM_CCOEFF_NORMED)
threshold = 0.71
loc = np.where(res >= threshold)
pts = []
exes = []
blanks = []
for pt in zip(*loc[::-1]):
pts.append(pt)
cv2.rectangle(img_rgb, pt, (pt[0]+w, pt[1]+h), (0,0,255), 1)
fn = os.path.splitext(im)[0]+'-exes'
cv2.imwrite('imgs/'+fn+'.png', img_rgb)
return pts, exes, blanks
def match_horizontal_lines(im, te, te2):
img_rgb = cv2.imread('imgs/'+im)
img_gry = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
template = cv2.imread('imgs/'+te, 0)
w1, h1 = template.shape[::-1]
template2 = cv2.imread('imgs/'+te2, 0)
w2, h2 = template2.shape[::-1]
# first line template (the downward facing line)
res1 = cv2.matchTemplate(img_gry, template, cv2.TM_CCOEFF_NORMED)
threshold1 = 0.8
loc1 = np.where(res1 >= threshold1)
# second line template (the upward facing line)
res2 = cv2.matchTemplate(img_gry, template2, cv2.TM_CCOEFF_NORMED)
threshold2 = 0.8
loc2 = np.where(res2 >= threshold2)
pts = []
exes = []
blanks = []
# find first line template (the downward facing line)
for pt in zip(*loc1[::-1]):
pts.append(pt)
cv2.rectangle(img_rgb, pt, (pt[0]+w1, pt[1]+h1), (0,0,255), 1)
# find second line template (the upward facing line)
for pt in zip(*loc2[::-1]):
pts.append(pt)
cv2.rectangle(img_rgb, pt, (pt[0]+w2, pt[0]+h2), (0,0,255), 1)
fn = os.path.splitext(im)[0]+'-horiz'
cv2.imwrite('imgs/'+fn+'.png', img_rgb)
return pts, exes, blanks
# process
text = ''
for img in images:
print 'processing %s' % img
hough_transform(img)
pts, exes, blanks = match_exes(img, templates[0])
pts1, exes1, blanks1 = match_horizontal_lines(img, templates[1], templates[2])
text += '%s: %s x\'s & %s horizontal lines\n' % (img, len(pts), len(pts1))
# statistics file
outputFile = open('counts.txt', 'w')
outputFile.write(text)
outputFile.close()
And, the output images look like this (as you can see, all x's are detected but not all lines)
x's
horizontal lines
hough transform
As I said, I'm actually just trying to get the data from the table, similar to this form (first row of table image):
row a = [x,x, , , , ,x, ,x,x, ,x, ,x, , , , ,x, , , ,x,x,x, ,x, ,x, , , , ]
I need the number of x's as well as the number of spaces.
There will also be other table images that are similar to this one (all having x's and the same number of columns and a different number of rows).
Also, I am using python 2.7

Ok, I have figured it out. I used the suggestion provided by #beaker of looking between the grid lines.
Before doing that I had to remove the duplicate lines from the hough transformation code. Then, I sorted those remaining lines into 2 lists, vertical and horizontal. From there, I could loop through the horizontal and then vertical and then create a region of interest (roi) image. Each roi image represents a 'cell' in the table master image. I checked each of those cells for contours and noticed that when there was an 'x' in the cell, len(contours) >= 2. So, any len(contours) < 2 was a blank space (I did several test programs to figure this out). Here is the code I used to get it working:
import cv2
import numpy as np
import os
# the list of images (tables)
images = ['table1.png', 'table2.png', 'table3.png', 'table4.png', 'table5.png']
# the list of templates (used for template matching)
templates = ['train1.png']
def remove_duplicates(lines):
# remove duplicate lines (lines within 10 pixels of eachother)
for x1, y1, x2, y2 in lines:
for index, (x3, y3, x4, y4) in enumerate(lines):
if y1 == y2 and y3 == y4:
diff = abs(y1-y3)
elif x1 == x2 and x3 == x4:
diff = abs(x1-x3)
else:
diff = 0
if diff < 10 and diff is not 0:
del lines[index]
return lines
def sort_line_list(lines):
# sort lines into horizontal and vertical
vertical = []
horizontal = []
for line in lines:
if line[0] == line[2]:
vertical.append(line)
elif line[1] == line[3]:
horizontal.append(line)
vertical.sort()
horizontal.sort(key=lambda x: x[1])
return horizontal, vertical
def hough_transform_p(image, template, tableCnt):
# open and process images
img = cv2.imread('imgs/'+image)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
# probabilistic hough transform
lines = cv2.HoughLinesP(edges, 1, np.pi/180, 200, minLineLength=20, maxLineGap=999)[0].tolist()
# remove duplicates
lines = remove_duplicates(lines)
# draw image
for x1, y1, x2, y2 in lines:
cv2.line(img, (x1, y1), (x2, y2), (0, 0, 255), 1)
# sort lines into vertical & horizontal lists
horizontal, vertical = sort_line_list(lines)
# go through each horizontal line (aka row)
rows = []
for i, h in enumerate(horizontal):
if i < len(horizontal)-1:
row = []
for j, v in enumerate(vertical):
if i < len(horizontal)-1 and j < len(vertical)-1:
# every cell before last cell
# get width & height
width = horizontal[i+1][1] - h[1]
height = vertical[j+1][0] - v[0]
else:
# last cell, width = cell start to end of image
# get width & height
width = tW
height = tH
tW = width
tH = height
# get roi (region of interest) to find an x
roi = img[h[1]:h[1]+width, v[0]:v[0]+height]
# save image (for testing)
dir = 'imgs/table%s' % (tableCnt+1)
if not os.path.exists(dir):
os.makedirs(dir)
fn = '%s/roi_r%s-c%s.png' % (dir, i, j)
cv2.imwrite(fn, roi)
# if roi contains an x, add x to array, else add _
roi_gry = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(roi_gry, 127, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
if len(contours) > 1:
# there is an x for 2 or more contours
row.append('x')
else:
# there is no x when len(contours) is <= 1
row.append('_')
row.pop()
rows.append(row)
# save image (for testing)
fn = os.path.splitext(image)[0] + '-hough_p.png'
cv2.imwrite('imgs/'+fn, img)
def process():
for i, img in enumerate(images):
# perform probabilistic hough transform on each image
hough_transform_p(img, templates[0], i)
if __name__ == '__main__':
process()
So, the sample image:
And, the output (code to generate text file was deleted for brevity):
As you can see, the text file contains the same number of x's in the same position as the image. Now that the hard part is over, I can continue on with my assignment!

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python OpenCV append matches' center x, y coordinates in tuples - python

Related

Is it possible to find the bending point using opencv Python?

Efficiently transform and blend images in OpenCV

How to join nearby bounding boxes in OpenCV Python

What's the most efficient way to select a non-rectangular ROI of an Image in OpenCV?

Processing an image of a table to get data from it

Categories

Resources