Aim of my program is find the angle of bending Led.
I got the angle using convexity defects in convex hull but the midpoint is move away from center point of that bend.
original image
original
below image is the output of program
output
black dot is starting point.
red dot is end point.
blue dot is mid point.
Now I want move blue dot to the center of the curve
my code
import cv2
import numpy as np
from math import sqrt
from collections import OrderedDict
def findangle(x1,y1,x2,y2,x3,y3):
ria = np.arctan2(y2 - y1, x2 - x1) - np.arctan2(y3 - y1, x3 - x1)
if ria > 0:
if ria < 3:
webangle = int(np.abs(ria * 180 / np.pi))
elif ria > 3:
webangle = int(np.abs(ria * 90 / np.pi))
elif ria < 0:
if ria < -3:
webangle = int(np.abs(ria * 90 / np.pi))
elif ria > -3:
webangle = int(np.abs(ria * 180 / np.pi))
return webangle
image = cv2.imread("cam/2022-09-27 10:01:57image.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
contours,hie= cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
selected_contour = max(contours, key=lambda x: cv2.contourArea(x))
# Draw Contour
approx = cv2.approxPolyDP(selected_contour, 0.0035 * cv2.arcLength(selected_contour, True), True)
for point in approx:
cv2.drawContours(image, [point], 0, (0, 0, 255), 3)
convexHull = cv2.convexHull(selected_contour,returnPoints=False)
cv2.drawContours(image, cv2.convexHull(selected_contour), 0, (0, 255, 0), 3)
convexHull[::-1].sort(axis=0)
convexityDefects = cv2.convexityDefects(selected_contour, convexHull)
start2,distance=[],[]
for i in range(convexityDefects.shape[0]):
s, e, f, d = convexityDefects[i, 0]
start = tuple(selected_contour[s][0])
end = tuple(selected_contour[e][0])
far = tuple(selected_contour[f][0])
start2.append(start)
cv2.circle(image, start, 2, (255, 0, 0), 3)
cv2.line(image,start,end , (0, 255, 0), 3)
distance.append(d)
distance.sort(reverse=True)
for i in range(convexityDefects.shape[0]):
s, e, f, d = convexityDefects[i, 0]
if distance[0]==d:
defect={"s":s,"e":e,"f":f,"d":d}
cv2.circle(image, selected_contour[defect.get("f")][0], 2, (255, 0, 0), 3)
cv2.circle(image, selected_contour[defect.get("s")][0], 2, (0, 0, 0), 3)
cv2.circle(image, selected_contour[defect.get("e")][0], 2, (0, 0, 255), 3)
x1, y1 = selected_contour[defect.get("f")][0]
x2, y2 = selected_contour[defect.get("e")][0]
x3, y3 = selected_contour[defect.get("s")][0]
cv2.line(image,(x1,y1),(x2,y2),(255,200,0),2)
cv2.line(image,(x1,y1),(x3,y3),(255,200,0),2)
cv2.putText(image, "Web Angle : " + str((findangle(x1,y1,x2,y2,x3,y3))), (50, 200), cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, 1, (0,0,0),2,cv2.LINE_AA)
cv2.imshow("frame",image)
cv2.waitKey(0)
cv2.destroyAllWindows()
so i want any concept to get exact center of the bend point.
Here is one way to do that in Python/OpenCV. I make no guarantees that it is universal and would work on all such images. I also leave it for others to add trapping for empty arrays/lists and other general best practices.
Read the input
Threshold to binary on white using cv2.inRange()
Apply morphology to close up the gap near the top
Skeletonize the binary image
Get the x and y coordinates of the points of the skeleton
Zip the x and y coordinates
Sort the zipped data by x
Sort another copy of the zipped data by y
Get the first line (end points) from the top for 40% of y from the y sorted data, since that region of the skeleton is nearly straight
Get the first line (end points) from the left for 40% of x from the x sorted data, since that region of the skeleton is nearly straight
Get the intersection point of these two lines
Compute the x and y derivatives of the x coordinates and the y coordinates, respectively
Loop over each point and compute the slope from the derivatives, which will be tangent to the skeleton at the point
Then still in the loop compute the inverse slope of the line from the point to the previously computed intersection point. This will be normal (perpendicular) to this line.
Compute the difference in slopes and find the point where the difference is minimum. This will be the bend point.
Draw relevant lines and points on skeleton and input
Save results
Input:
import cv2
import numpy as np
import skimage.morphology
img = cv2.imread("wire.png")
# create a binary thresholded image
lower = (255,255,255)
upper = (255,255,255)
thresh = cv2.inRange(img, lower, upper)
thresh = (thresh/255).astype(np.float64)
# apply morphology to connect at top
kernel = np.ones((11,11), np.uint8)
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# apply skeletonization
skeleton = skimage.morphology.skeletonize(thresh)
skeleton = (255*skeleton).clip(0,255).astype(np.uint8)
# get skeleton points
pts = np.where(skeleton != 0)
x = pts[1]
y = pts[0]
num_pts = len(x)
print(num_pts)
# zip x and y
xy1 = zip(x,y)
xy2 = zip(x,y)
# sort on y
xy_sorty = sorted(xy1, key = lambda x: x[1])
#print(xy_sorty[0])
# sort on x
xy_sortx = sorted(xy2, key = lambda x: x[0])
#print(xy_sortx[0])
# unzip x and y for xy_sortedy
xu1, yu1 = zip(*xy_sorty)
# get first line from top
# find miny from y sort, then get point 40% down from miny
miny = np.amin(yu1)
y1 = miny
[xy1] = [(xi, yi) for (xi, yi) in xy_sorty if abs(yi - y1) <= 0.00001]
x1 = xy1[0]
y1 = xy1[1]
#print(x1,y1)
maxy = np.amax(yu1)
dely = maxy - miny
y2 = int(y1+0.4*dely)
[xy2] = [(xi, yi) for (xi, yi) in xy_sorty if abs(yi - y2) <= 0.00001]
x2 = xy2[0]
y2 = xy2[1]
#print(x2,y2)
# unzip x and y for xy_sortedx
xu2, yu2 = zip(*xy_sortx)
# get first line from left
# find minx from x sort, then get point 40% right from minx
minx = np.amin(xu2)
x3 = minx
[xy3] = [(xi, yi) for (xi, yi) in xy_sortx if abs(xi - x3) <= 0.00001]
x3 = xy3[0]
y3 = xy3[1]
#print(x3,y3)
maxx = np.amax(xu2)
delx = maxx - minx
x4 = int(x3+0.4*delx)
[xy4] = [(xi, yi) for (xi, yi) in xy_sortx if abs(xi - x4) <= 0.00001]
x4 = xy4[0]
y4 = xy4[1]
#print(x4,y4)
# draw lines on copy of skeleton
skeleton_lines = skeleton.copy()
skeleton_lines = cv2.merge([skeleton_lines,skeleton_lines,skeleton_lines])
cv2.line(skeleton_lines, (x1,y1), (x2,y2), (0,0,255), 2)
cv2.line(skeleton_lines, (x3,y3), (x4,y4), (0,0,255), 2)
# get intersection between line1 (x1,y1 to x2,y2) and line2 (x3,y3 to x4,y4) and draw circle
# https://en.wikipedia.org/wiki/Line–line_intersection
den = (x1-x2)*(y3-y4) - (y1-y2)*(x3-x4)
px = ((x1*y2-y1*x2)*(x3-x4) - (x1-x2)*(x3*y4-y3*x4))/den
py = ((x1*y2-y1*x2)*(y3-y4) - (y1-y2)*(x3*y4-y3*x4))/den
px = int(px)
py = int(py)
cv2.circle(skeleton_lines, (px,py), 3, (0,255,0), -1)
# compute first derivatives in x and also in y
dx = np.gradient(x, axis=0)
dy = np.gradient(y, axis=0)
# loop over each point
# get the slope of the tangent to the curve
# get the inverse slop of the line from the point to the intersection point (inverse slope is normal direction)
# get difference in slopes and find the point that has the minimum difference
min_diff = 1000000
eps = 0.0000000001
for i in range(num_pts):
slope1 = abs(dy[i]/(dx[i] + eps))
slope2 = abs((px - x[i])/(py - y[i] + eps))
slope_diff = abs(slope1 - slope2)
if slope_diff < min_diff:
min_diff = slope_diff
bend_x = x[i]
bend_y = y[i]
#print(x[i], y[i], min_diff)
bend_x = int(bend_x)
bend_y = int(bend_y)
#print(bend_x, bend_y)
cv2.line(skeleton_lines, (px,py), (bend_x,bend_y), (0,0,255), 2)
cv2.circle(skeleton_lines, (bend_x,bend_y), 3, (0,255,0), -1)
# get end points and bend point and draw on copy of input
result = img.copy()
end1 = (x1,y1)
end2 = (x3,y3)
bend = (bend_x,bend_y)
print("end1:", end1)
print("end2:", end2)
print("bend:", bend)
cv2.circle(result, (end1), 3, (0,0,255), -1)
cv2.circle(result, (end2), 3, (0,0,255), -1)
cv2.circle(result, (bend), 3, (0,0,255), -1)
# save result
cv2.imwrite("wire_skeleton.png", skeleton)
cv2.imwrite("wire_skeleton_lines.png", skeleton_lines)
cv2.imwrite("wire_result.png", result)
# show results
cv2.imshow("thresh", (255*thresh).astype(np.uint8))
cv2.imshow("skeleton", skeleton)
cv2.imshow("skeleton_lines", skeleton_lines)
cv2.imshow("skeleton_result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Skeleton:
Skeleton with lines:
Result showing end points and bend point:
Related
Given a binary image, how do I box around the majority of the white pixels? For example, consider the following image:
As canny segmentation results in a binary image, I thought I could use np.nonzero to identify the location of the points, and then draw a box around it. I have the following function to identify the location of the bounding box but its not working as intended (as you can see by the box in the image above):
def get_bounding_box(image,thresh=0.95):
nonzero_indices = np.nonzero(image)
min_row, max_row = np.min(nonzero_indices[0]), np.max(nonzero_indices[0])
min_col, max_col = np.min(nonzero_indices[1]), np.max(nonzero_indices[1])
box_size = max_row - min_row + 1, max_col - min_col + 1
print(box_size)
#box_size_thresh = (int(box_size[0] * thresh), int(box_size[1] * thresh))
box_size_thresh = (int(box_size[0]), int(box_size[1]))
#coordinates of the box that contains 95% of the highest pixel values
top_left = (min_row + int((box_size[0] - box_size_thresh[0]) / 2), min_col + int((box_size[1] - box_size_thresh[1]) / 2))
bottom_right = (top_left[0] + box_size_thresh[0], top_left[1] + box_size_thresh[1])
print((top_left[0], top_left[1]), (bottom_right[0], bottom_right[1]))
return (top_left[0], top_left[1]), (bottom_right[0], bottom_right[1])
and using the following code to get the coords and draw the box as follows:
seg= canny_segmentation(gray)
bb_thresh = get_bounding_box(seg,0.95)
im_crop = gray[bb_thresh[0][1]:bb_thresh[1][1],bb_thresh[0][0]:bb_thresh[1][0]]
why is this code not giving me the right top left / bottom right coordinates?
I have a example colab workbook here https://colab.research.google.com/drive/15TNVPsYeZOCiOB51I-geVXgGFyIp5PjU?usp=sharing
The issue is related to the order of the coordinates returned from get_bounding_box:
return (top_left[0], top_left[1]), (bottom_right[0], bottom_right[1])
Applies the following ordering:
(y0, x0), (y1, x1)
When y is the row and x is column.
When returned coordinates are used by im_crop = gray[bb_thresh[0][1]:bb_thresh[1][1], bb_thresh[0][0]:bb_thresh[1][0]] the rows and columns are switched up.
For avoiding confusion I recommend storing the coordinates in x0, y0, x1, y1 first:
(y0, x0), (y1, x1) = bb_thresh
Then use the coordinates in the correct order:
im_crop = gray[y0:y1, x0:x1]
For testing, we may also draw a rectangle using cv2.rectangle:
cv2.rectangle(bgr_image, (x0, y0), (x1, y1), (0, 255, 0), 2)
Part of the confusion is related to the fact the NumPy array indexing is (y, x) and OpenCV "point" coordinate convention is (x, y)
Code sample (not using Google Colab):
import cv2
import numpy as np
def get_bounding_box(image, thresh=0.95):
nonzero_indices = np.nonzero(image)
min_row, max_row = np.min(nonzero_indices[0]), np.max(nonzero_indices[0])
min_col, max_col = np.min(nonzero_indices[1]), np.max(nonzero_indices[1])
box_size = max_row - min_row + 1, max_col - min_col + 1
print(box_size)
#box_size_thresh = (int(box_size[0] * thresh), int(box_size[1] * thresh))
box_size_thresh = (int(box_size[0]), int(box_size[1]))
#coordinates of the box that contains 95% of the highest pixel values
top_left = (min_row + int((box_size[0] - box_size_thresh[0]) / 2), min_col + int((box_size[1] - box_size_thresh[1]) / 2))
bottom_right = (top_left[0] + box_size_thresh[0], top_left[1] + box_size_thresh[1])
print((top_left[0], top_left[1]), (bottom_right[0], bottom_right[1]))
return (top_left[0], top_left[1]), (bottom_right[0], bottom_right[1]) # Return format is (y0, x0), (y1, x1), when y is the row and x is the column
def canny_segmentation(img, low_threshold=100, high_threshold=200):
edges = cv2.Canny(img, low_threshold, high_threshold)
return edges
gray = cv2.imread('small_grayscale_image.png', cv2.IMREAD_GRAYSCALE) # Read input image as Grayscale
seg = canny_segmentation(gray, 300, 320) # Use high thresholds - for testing
bb_thresh = get_bounding_box(seg, 0.95)
#im_crop = gray[bb_thresh[0][1]:bb_thresh[1][1], bb_thresh[0][0]:bb_thresh[1][0]]
(y0, x0), (y1, x1) = bb_thresh # Store coordinates in intermediate variables in order to avoid confusion.
im_crop = gray[y0:y1, x0:x1]
# Draw green rectangle for testing
bgr_image = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
cv2.rectangle(bgr_image, (x0, y0), (x1, y1), (0, 255, 0), 2)
cv2.imshow('bgr_image', bgr_image)
cv2.imshow('seg', seg)
cv2.waitKey()
cv2.destroyAllWindows()
bgr_image.png:
im_crop:
seg:
small_grayscale_image (input image):
I think that the top left and bottom right coordinates of the bounding box are not correctly calculated in the get_bounding_box function. The problem might lie in the calculation of top_left and bottom_right. The indices for the top left and bottom right coordinates of the bounding box should be calculated based on the min_row, max_row, min_col, max_col values, and not box_size_thresh.
Here's a corrected version of the code:
def get_bounding_box(image,thresh=0.95):
nonzero_indices = np.nonzero(image)
min_row, max_row = np.min(nonzero_indices[0]), np.max(nonzero_indices[0])
min_col, max_col = np.min(nonzero_indices[1]), np.max(nonzero_indices[1])
top_left = (min_row, min_col)
bottom_right = (max_row, max_col)
return top_left, bottom_right
Hope this helped!
It turns out I needed to transpose the image before getting the coordinates, a simple .T did the trick
nonzero_indices = np.nonzero(image.T)
I am pretty new to Python and want to do the following: I want to divide the following image into 8 pie segments:
I want it to look something like this (I made this in PowerPoint):
The background should be black and the edge of the figure should have an unique color as well as each pie segment.
EDIT: I have written a code that divides the whole image in 8 segments:
from PIL import Image, ImageDraw
im=Image.open('C:/Users/20191881/Documents/OGO Beeldanalyse/Python/asymmetrie/rotation.png')
fill = 255
draw = ImageDraw.Draw(im)
draw.line((0,0) + im.size, fill)
draw.line((0, im.size[1], im.size[0], 0), fill)
draw.line((0.5*im.size[0],0, 0.5*im.size[0], im.size[1]), fill)
draw.line((0, 0.5*im.size[1], im.size[0], 0.5*im.size[1]), fill)
del draw
im.show()
The output gives:
The only thing that is left to do is to find a way to make each black segment inside the border an unique color and also give all the white edge segments an unique color.
Your code divides the image in eight parts, that's correct, but with respect to the image center, you don't get eight "angular equally" pie segments like you show in your sketch.
Here would be my solution, only using Pillow and the math module:
import math
from PIL import Image, ImageDraw
def segment_color(i_color, n_colors):
r = int((192 - 64) / (n_colors - 1) * i_color + 64)
g = int((224 - 128) / (n_colors - 1) * i_color + 128)
b = 255
return (r, g, b)
# Load image; generate ImageDraw
im = Image.open('path_to/vgdrD.png').convert('RGB')
draw = ImageDraw.Draw(im)
# Number of pie segments (must be an even number)
n = 8
# Replace (all-white) edge with defined edge color
edge_color = (255, 128, 0)
pixels = im.load()
for y in range(im.height):
for x in range(im.width):
if pixels[x, y] == (255, 255, 255):
pixels[x, y] = edge_color
# Draw lines with defined line color
line_color = (0, 255, 0)
d = min(im.width, im.height) - 10
center = (int(im.width/2), int(im.height)/2)
for i in range(int(n/2)):
angle = 360 / n * i
x1 = math.cos(angle/180*math.pi) * d/2 + center[0]
y1 = math.sin(angle/180*math.pi) * d/2 + center[1]
x2 = math.cos((180+angle)/180*math.pi) * d/2 + center[0]
y2 = math.sin((180+angle)/180*math.pi) * d/2 + center[1]
draw.line([(x1, y1), (x2, y2)], line_color)
# Fill pie segments with defined segment colors
for i in range(n):
angle = 360 / n * i + 360 / n / 2
x = math.cos(angle/180*math.pi) * 20 + center[0]
y = math.sin(angle/180*math.pi) * 20 + center[1]
ImageDraw.floodfill(im, (x, y), segment_color(i, n))
im.save(str(n) + '_pie.png')
For n = 8 pie segments, the following result is produced:
The first step is to replace all white pixels in the original image with the desired edge color. Of course, the assumption here is, that there are no other (white) pixels in the image. Also, this might be better done using NumPy and vectorized code, but I wanted to keep the solution Pillow-only.
Next step is to draw the (green) lines. Here, I calculate the proper coordinates of the lines' start and end using sin and cos.
The last step is to flood fill the pie segments' area, cf. ImageDraw.floodfill. Therefore, I calculate the seed points the same way as before, but add an angular shift to hit a point exactly within the pie segment.
As you can see, n is variable in my solution (n must be even):
Of course, there are limitations regarding the angular resolution, most due to the small image.
Hope that helps!
EDIT: Here's a modified version to also allow for individually colored edges.
import math
from PIL import Image, ImageDraw
def segment_color(i_color, n_colors):
r = int((192 - 64) / (n_colors - 1) * i_color + 64)
g = int((224 - 128) / (n_colors - 1) * i_color + 128)
b = 255
return (r, g, b)
def edge_color(i_color, n_colors):
r = 255
g = 255 - int((224 - 32) / (n_colors - 1) * i_color + 32)
b = 255 - int((192 - 16) / (n_colors - 1) * i_color + 16)
return (r, g, b)
# Load image; generate ImageDraw
im = Image.open('images/vgdrD.png').convert('RGB')
draw = ImageDraw.Draw(im)
center = (int(im.width/2), int(im.height)/2)
# Number of pie segments (must be an even number)
n = 8
# Replace (all-white) edge with defined edge color
max_len = im.width + im.height
im_pix = im.load()
for i in range(n):
mask = Image.new('L', im.size, 0)
mask_draw = ImageDraw.Draw(mask)
angle = 360 / n * i
x1 = math.cos(angle/180*math.pi) * max_len + center[0]
y1 = math.sin(angle/180*math.pi) * max_len + center[1]
angle = 360 / n * (i+1)
x2 = math.cos(angle/180*math.pi) * max_len + center[0]
y2 = math.sin(angle/180*math.pi) * max_len + center[1]
mask_draw.polygon([center, (x1, y1), (x2, y2)], 255)
mask_pix = mask.load()
for y in range(im.height):
for x in range(im.width):
if (im_pix[x, y] == (255, 255, 255)) & (mask_pix[x, y] == 255):
im_pix[x, y] = edge_color(i, n)
# Draw lines with defined line color
line_color = (0, 255, 0)
d = min(im.width, im.height) - 10
for i in range(int(n/2)):
angle = 360 / n * i
x1 = math.cos(angle/180*math.pi) * d/2 + center[0]
y1 = math.sin(angle/180*math.pi) * d/2 + center[1]
x2 = math.cos((180+angle)/180*math.pi) * d/2 + center[0]
y2 = math.sin((180+angle)/180*math.pi) * d/2 + center[1]
draw.line([(x1, y1), (x2, y2)], line_color)
# Fill pie segments with defined segment colors
for i in range(n):
angle = 360 / n * i + 360 / n / 2
x = math.cos(angle/180*math.pi) * 20 + center[0]
y = math.sin(angle/180*math.pi) * 20 + center[1]
ImageDraw.floodfill(im, (x, y), segment_color(i, n))
im.save(str(n) + '_pie.png')
Binary masks for each pie segment are created, and all white pixels only within that binary mask are replaced with a defined edge color.
Using NumPy still seems favorable, but I was curious to do that in Pillow only.
I am doing a college class project on image processing. This is my original image:
I want to join nearby/overlapping bounding boxes on individual text line images, but I don't know how. My code looks like this so far (thanks to #HansHirse for the help):
import os
import cv2
import numpy as np
from scipy import stats
image = cv2.imread('example.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
#dilation
kernel = np.ones((5,5), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
#find contours
ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0
# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b: b[1][i], reverse=reverse))
# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)
sortedctrs,sortedbbs=sort_contours(ctrs)
xyminmax=[]
for cnt in sortedctrs:
x, y, w, h = cv2.boundingRect(cnt)
xyminmax.append([x,y,x+w,y+h])
distances=[]
for i in range(len(xyminmax)):
try:
first_xmax = xyminmax[i][2]
second_xmin = xyminmax[i + 1][0]
distance=abs(second_xmin-first_xmax)
distances.append(distance)
except IndexError:
pass
THRESHOLD=stats.mode(distances, axis=None)[0][0]
new_rects=[]
for i in range(len(xyminmax)):
try:
# [xmin,ymin,xmax,ymax]
first_ymin=xyminmax[i][1]
first_ymax=xyminmax[i][3]
second_ymin=xyminmax[i+1][1]
second_ymax=xyminmax[i+1][3]
first_xmax = xyminmax[i][2]
second_xmin = xyminmax[i+1][0]
firstheight=abs(first_ymax-first_ymin)
secondheight=abs(second_ymax-second_ymin)
distance=abs(second_xmin-first_xmax)
if distance<THRESHOLD:
new_xmin=xyminmax[i][0]
new_xmax=xyminmax[i+1][2]
if first_ymin>second_ymin:
new_ymin=second_ymin
else:
new_ymin = first_ymin
if firstheight>secondheight:
new_ymax = first_ymax
else:
new_ymax = second_ymax
new_rects.append([new_xmin,new_ymin,new_xmax,new_ymax])
else:
new_rects.append(xyminmax[i])
except IndexError:
pass
for rect in new_rects:
cv2.rectangle(image, (rect[0], rect[1]), (rect[2], rect[3]), (121, 11, 189), 2)
cv2.imwrite("result.png",image)
which produces this image as a result:
I want to join very close or overlapping bounding boxes such as these
into a single bounding box so the formula doesn't get separated into single characters. I have tried using cv2.groupRectangles but the print results were just NULL.
So, here comes my solution. I partially modified your (initial) code to my preferred naming, etc. Also, I commented all the stuff, I added.
import cv2
import numpy as np
image = cv2.imread('images/example.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
kernel = np.ones((5, 5), np.uint8)
img_dilated = cv2.dilate(thresh, kernel, iterations = 1)
cnts, _ = cv2.findContours(img_dilated.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Array of initial bounding rects
rects = []
# Bool array indicating which initial bounding rect has
# already been used
rectsUsed = []
# Just initialize bounding rects and set all bools to false
for cnt in cnts:
rects.append(cv2.boundingRect(cnt))
rectsUsed.append(False)
# Sort bounding rects by x coordinate
def getXFromRect(item):
return item[0]
rects.sort(key = getXFromRect)
# Array of accepted rects
acceptedRects = []
# Merge threshold for x coordinate distance
xThr = 5
# Iterate all initial bounding rects
for supIdx, supVal in enumerate(rects):
if (rectsUsed[supIdx] == False):
# Initialize current rect
currxMin = supVal[0]
currxMax = supVal[0] + supVal[2]
curryMin = supVal[1]
curryMax = supVal[1] + supVal[3]
# This bounding rect is used
rectsUsed[supIdx] = True
# Iterate all initial bounding rects
# starting from the next
for subIdx, subVal in enumerate(rects[(supIdx+1):], start = (supIdx+1)):
# Initialize merge candidate
candxMin = subVal[0]
candxMax = subVal[0] + subVal[2]
candyMin = subVal[1]
candyMax = subVal[1] + subVal[3]
# Check if x distance between current rect
# and merge candidate is small enough
if (candxMin <= currxMax + xThr):
# Reset coordinates of current rect
currxMax = candxMax
curryMin = min(curryMin, candyMin)
curryMax = max(curryMax, candyMax)
# Merge candidate (bounding rect) is used
rectsUsed[subIdx] = True
else:
break
# No more merge candidates possible, accept current rect
acceptedRects.append([currxMin, curryMin, currxMax - currxMin, curryMax - curryMin])
for rect in acceptedRects:
img = cv2.rectangle(image, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (121, 11, 189), 2)
cv2.imwrite("images/result.png", image)
For your example
I get the following output
Now, you have to find a proper threshold to meet your expectations. Maybe, there is even some more work to do, especially to get the whole formula, since the distances don't vary that much.
Disclaimer: I'm new to Python in general, and specially to the Python API of OpenCV (C++ for the win). Comments, improvements, highlighting Python no-gos are highly welcome!
Here is a slightly different approach, using the OpenCV Wrapper library.
import cv2
import opencv_wrapper as cvw
image = cv2.imread("example.png")
gray = cvw.bgr2gray(image)
thresh = cvw.threshold_otsu(gray, inverse=True)
# dilation
img_dilation = cvw.dilate(thresh, 5)
# Find contours
contours = cvw.find_external_contours(img_dilation)
# Map contours to bounding rectangles, using bounding_rect property
rects = map(lambda c: c.bounding_rect, contours)
# Sort rects by top-left x (rect.x == rect.tl.x)
sorted_rects = sorted(rects, key=lambda r: r.x)
# Distance threshold
dt = 5
# List of final, joined rectangles
final_rects = [sorted_rects[0]]
for rect in sorted_rects[1:]:
prev_rect = final_rects[-1]
# Shift rectangle `dt` back, to find out if they overlap
shifted_rect = cvw.Rect(rect.tl.x - dt, rect.tl.y, rect.width, rect.height)
intersection = cvw.rect_intersection(prev_rect, shifted_rect)
if intersection is not None:
# Join the two rectangles
min_y = min((prev_rect.tl.y, rect.tl.y))
max_y = max((prev_rect.bl.y, rect.bl.y))
max_x = max((prev_rect.br.x, rect.br.x))
width = max_x - prev_rect.tl.x
height = max_y - min_y
new_rect = cvw.Rect(prev_rect.tl.x, min_y, width, height)
# Add new rectangle to final list, making it the new prev_rect
# in the next iteration
final_rects[-1] = new_rect
else:
# If no intersection, add the box
final_rects.append(rect)
for rect in sorted_rects:
cvw.rectangle(image, rect, cvw.Color.MAGENTA, line_style=cvw.LineStyle.DASHED)
for rect in final_rects:
cvw.rectangle(image, rect, cvw.Color.GREEN, thickness=2)
cv2.imwrite("result.png", image)
And the result
The green boxes are the final result, while the magenta boxes are the original ones.
I used the same threshold as #HansHirse.
The equals sign still needs some work. Either a higher dilation kernel size or use the same technique vertically.
Disclosure: I am the author of OpenCV Wrapper.
Easy-to-read solution:
contours = get_contours(frame)
boxes = [cv2.boundingRect(c) for c in contours]
boxes = merge_boxes(boxes, x_val=40, y_val=20) # Where x_val and y_val are axis thresholds
def get_contours(frame): # Returns a list of contours
contours = cv2.findContours(frame, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
return contours
def merge_boxes(boxes, x_val, y_val):
size = len(boxes)
if size < 2:
return boxes
if size == 2:
if boxes_mergeable(boxes[0], boxes[1], x_val, y_val):
boxes[0] = union(boxes[0], boxes[1])
del boxes[1]
return boxes
boxes = sorted(boxes, key=lambda r: r[0])
i = size - 2
while i >= 0:
if boxes_mergeable(boxes[i], boxes[i + 1], x_val, y_val):
boxes[i] = union(boxes[i], boxes[i + 1])
del boxes[i + 1]
i -= 1
return boxes
def boxes_mergeable(box1, box2, x_val, y_val):
(x1, y1, w1, h1) = box1
(x2, y2, w2, h2) = box2
return max(x1, x2) - min(x1, x2) - minx_w(x1, w1, x2, w2) < x_val \
and max(y1, y2) - min(y1, y2) - miny_h(y1, h1, y2, h2) < y_val
def minx_w(x1, w1, x2, w2):
return w1 if x1 <= x2 else w2
def miny_h(y1, h1, y2, h2):
return h1 if y1 <= y2 else h2
def union(a, b):
x = min(a[0], b[0])
y = min(a[1], b[1])
w = max(a[0] + a[2], b[0] + b[2]) - x
h = max(a[1] + a[3], b[1] + b[3]) - y
return x, y, w, h
--> If you have bounding boxes and want to merge along both X and Y directions, use this snippet
--> Adjust x_pixel_value and y_pixel_value to your preferences
--> But for this, you need to have the bounding boxes
import cv2
img = cv2.imread(your image path)
x_pixel_value = 5
y_pixel_value = 6
bboxes_list = [] # your bounding boxes list
rects_used = []
for i in bboxes_list:
rects_used.append(False)
end_bboxes_list = []
for enum,i in enumerate(bboxes_list):
if rects_used[enum] == True:
continue
xmin = i[0]
xmax = i[2]
ymin = i[1]
ymax = i[3]
for enum1,j in enumerate(bboxes_list[(enum+1):], start = (enum+1)):
i_xmin = j[0]
i_xmax = j[2]
i_ymin = j[1]
i_ymax = j[3]
if rects_used[enum1] == False:
if abs(ymin - i_ymin) < x_pixel_value:
if abs(xmin-i_xmax) < y_pixel_value or abs(xmax-i_xmin) < y_pixel_value:
rects_used[enum1] = True
xmin = min(xmin,i_xmin)
xmax = max(xmax,i_xmax)
ymin = min(ymin,i_ymin)
ymax = max(ymax,i_ymax)
final_box = [xmin,ymin,xmax,ymax]
end_bboxes_list.append(final_box)
for i in end_bboxes_list:
cv2.rectangle(img,(i[0],i[1]),(i[2],i[3]), color = [0,255,0], thickness = 2)
cv2.imshow("Image",img)
cv2.waitKey(10000)
cv2.destroyAllWindows()
So, I want to transform an image but can't really find a proper way to do it using OpenCV.
First thing I have image lets say 500x600px inside of which there is a distorted thing I want to "straighten up" see the image:
I'm obtaining the contour of the sudoku like this:
cropped_image, contours, _ =
cv2.findContours(cropped_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
max_contour = max(contours, key=cv2.contourArea)
Then I'm getting the max_contour and image extreme pixels (top-left, top-right, bottom-right and bottom-left) and getting the transform matrix and transforming the image like this:
x, y = cropped_image.shape
image_extreme_pixels = np.array([[0, y], [x, y], [x, 0], [0, 0]], dtype=np.float32)
c_x, c_y = [], []
for i in contour:
c_x.append(i[0][0])
c_y.append(i[0][1])
contour_extreme_pixels = np.array([
[min(c_x), max(c_y)],
[max(c_x), max(c_y)],
[max(c_x), min(c_y)],
[min(c_x), min(c_y)]],
dtype=np.float32)
t_matrix = cv2.getPerspectiveTransform(contour_extreme_pixels, image_extreme_pixels)
transformed_image = cv2.warpPerspective(cropped_image, t_matrix, (y, x))
plt.imshow(cropped_image, interpolation='nearest', cmap=plt.cm.gray)
But when I view the image it's transformed in a weird fashion. I wanted to stretch the top parts of sudoku so that it's contour are straight.
Could you point what's wron with my code?
I'm assuming it might be the fashion in which I'm creating the 4 extreme pixels that are then put into the getPerspectiveTransform to get the transformation matrix but didn't manage to make it work yet.
Assuming that you have found out the corner points of sudoku accurately you can affine transform the input image as:
# Hard coded the points here assuming that you already have 4 corners of sudoku image
sudoku_corner_points = np.float32([[235, 40], [1022, 55], [190, 875], [1090, 880]])
canvas = np.ones((500, 500), dtype=np.uint8)
dst_points = np.float32([[0, 0], [500, 0], [0, 500], [500, 500]])
t_matrix = cv2.getPerspectiveTransform(sudoku_corner_points, dst_points)
transformed_image = cv2.warpPerspective(img, t_matrix, (500, 500))
So turns out the extreme points I've found were incorrect.
The correct way (one of many) to find 4 extreme points in a shape that we expect to be rectangular would be something like this:
def get_contour_extreme_points(img, contour):
m_point = image_center(img)
l1, l2, l3, l4 = 0, 0, 0, 0
p1, p2, p3, p4 = 0, 0, 0, 0
for point in contour:
d = distance(m_point, point[0])
if inside_bottom_right(m_point, point[0]) and l1 < d:
l1 = d
p1 = point[0]
continue
if inside_bottom_left(m_point, point[0]) and l2 < d:
l2 = d
p2 = point[0]
continue
if inside_top_right(m_point, point[0]) and l3 < d:
l3 = d
p3 = point[0]
continue
if inside_top_left(m_point, point[0]) and l4 < d:
l4 = d
p4 = point[0]
continue
return np.float32([p1, p2, p3, p4])
def inside_bottom_right(center, point):
return center[0] < point[0] and center[1] < point[1]
def inside_bottom_left(center, point):
return center[0] > point[0] and center[1] < point[1]
def inside_top_right(center, point):
return center[0] < point[0] and center[1] > point[1]
def inside_top_left(center, point):
return center[0] > point[0] and center[1] > point[1]
def distance(p1, p2):
return math.sqrt( ((p1[0]-p2[0])**2)+((p1[1]-p2[1])**2) )
def image_center(img):
x, y = img.shape
return tuple([x/2, y/2])
then I would have to be careful about the order of the 4 extreme points of the image. Which should look like this:
x, y = img.shape
img_extreme_points = np.float32([[x, y], [0, y], [x, 0], [0, 0]])
so first is the bottom right extreme point, then bottom left, top right and top left. As long as the extreme points index corresponds correctly the matrix will be computed correctly as well.
I want to create a binary image mask, containing only ones and zeros in python. The Region of Interest(white) is non-rectangular, defined by 4 corner points and looks for example as follows:
In my approach, I first calculate the line equation of the upper and lower ROI border and then I check for each mask element, if it's smaller or bigger than the boarders. The code is working, but far to slow. A 2000x1000 mask takes up to 4s of processing my machine.
from matplotlib import pyplot as plt
import cv2
import numpy as np
import time
def line_eq(line):
"""input:
2 points of a line
returns:
slope and intersection of the line
"""
(x1, y1), (x2, y2) = line
slope = (y2 - y1) / float((x2 - x1))
intersect = int(slope * (-x1) + y1)
return slope,intersect
def maskByROI(mask,ROI):
"""
input:
ROI: with 4 corner points e.g. ((x0,y0),(x1,y1),(x2,y2),(x3,y3))
mask:
output:
mask with roi set to 1, rest to 0
"""
line1 = line_eq((ROI[0],ROI[1]))
line2 = line_eq((ROI[2],ROI[3]))
slope1 = line1[0]
intersect1 = line1[1]
#upper line
if slope1>0:
for (x,y), value in np.ndenumerate(mask):
if y > slope1*x +intersect1:
mask[x,y] = 0
else:
for (x,y), value in np.ndenumerate(mask):
if y < slope1*x +intersect1:
mask[x,y] = 0
#lower line
slope2 = line2[0]
intersect2 = line2[1]
if slope2<0:
for (x,y), value in np.ndenumerate(mask):
if y > slope2*x +intersect2:
mask[x,y] = 0
else:
for (x,y), value in np.ndenumerate(mask):
if y < slope2*x +intersect2:
mask[x,y] = 0
return mask
mask = np.ones((2000,1000))
myROI = ((750,0),(900,1000),(1000,1000),(1500,0))
t1 = time.time()
mask = maskByROI(mask,myROI)
t2 = time.time()
print "execution time: ", t2-t1
plt.imshow(mask,cmap='Greys_r')
plt.show()
What is a more efficient way to create a mask like this?
Are there any similar solutions for non-rectangular shapes provided by
numpy, OpenCV or a similar Library?
Draw the mask with fillPoly:
mask = np.ones((1000, 2000)) # (height, width)
myROI = [(750, 0), (900, 1000), (1000, 1000), (1500, 0)] # (x, y)
cv2.fillPoly(mask, [np.array(myROI)], 0)
This should take ~1ms.