How to detect and align tilted images after cropping - python

I have implemented a cropping algorithm on my solution that works pretty good. The problem is when the image is tilted, the crop will work but it will have background space showing as the images will show.
Cropping flow:
First step:
Second step:
Final result:
I have searched/tried multiple solutions but could not get a decent result or I'm not thinking the right way.
The expected result is this:
EDIT [FINAL RESULT]:
import cv2
import numpy as np
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
def get_image_width_height(image):
image_width = image.shape[1] # current image's width
image_height = image.shape[0] # current image's height
return image_width, image_height
def calculate_scaled_dimension(scale, image):
image_width, image_height = get_image_width_height(image)
ratio_of_new_with_to_old = scale / image_width
dimension = (scale, int(image_height * ratio_of_new_with_to_old))
return dimension
def scale_image(image, size):
image_resized_scaled = cv2.resize(
image,
calculate_scaled_dimension(
size,
image
),
interpolation=cv2.INTER_AREA
)
return image_resized_scaled
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
image = cv2.imread('images/damina_cc_back.jpg')
original_image = image.copy()
image = scale_image(image, 500)
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screen_cnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screen_cnt = approx
transformed = perspective_transform(image, screen_cnt)
break
# Draw ROI
cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 1)
(h, w) = transformed.shape[:2]
if (h > w):
rotated = rotate_image(transformed, 90)
else:
rotated = transformed
cv2.imshow("image", original_image)
cv2.imshow("ROI", image)
cv2.imshow("transformed", transformed)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)

To align your image after cropping, we can we can use a perspective transformation. To begin, we separate the four corners of the rectangle into individual points given to us by cv2.approxPolyDP(). We reorder the points into a clockwise orientation (top-left, top-right, bottom-right, bottom-left) using this function:
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
This function gives us the the bounding box coordinates of the ROI
Now with the isolated corner points, we can obtain the transformation matrix using cv2.getPerspectiveTransform() and actually obtain the transformed image using cv2.warpPerspective().
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
Here's the result
We can rotate the image with this function
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
The final result after rotating:
Full code
import cv2
import numpy as np
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
image = cv2.imread('1.PNG')
original_image = image.copy()
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screen_cnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screen_cnt = approx
transformed = perspective_transform(original_image, screen_cnt)
break
# Draw ROI
cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 3)
# Rotate image
rotated = rotate_image(transformed, -90)
cv2.imshow("image", original_image)
cv2.imshow("ROI", image)
cv2.imshow("transformed", transformed)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)

I assume you're looking for the minimum and maximum u and v positions where an edge was found (or maybe certain quantiles) to find the cropped rectangle. That is go over all image pixels that were marked an edge and update the u/v/ min/max values.
If if the computation time is not an issue for you, you can simply keep the algorithm as is and additionally loop over a number of rotations and update special values for each. Pseudocode:
for v
for u
if (u,v) is edge
for rotation_matrix
(ur, vr) = rotation_matrix * (u,v)
update boundary for given rotation matrix
In the end you can select the bounding box for the rotation matrix that is the smallest.
If the above algorithm is too slow for your use case, you can also try extracting the major axes using the opencv HoughLinesP function. This will of course not work for all types of images, but may be good enough for the case of id cards.
Finally, to apply the rotation correction refer to this tutorial.

Related

How to approximate the convex hull of a curved letter to a quadrilateral?

I have an input image such as:
Which will turn out after being preprocessed as:
Now, this image is a little bit inclined and what I want to do is to fix it with cv2.warpPerspective().
So, in order to do that, I generate a quadrilateral approximating (with cv2.approxPolyDP()) the convex hull. This is the actual code that I use:
hull = cv2.convexHull(contour)
# Reduce to quadrilateral
epsilon = 0.1 * cv2.arcLength(hull, True)
approximated = cv2.approxPolyDP(hull, epsilon, True)
The convex hull of that preprocessed image looks like this:
But, when I approximate it, I get the following results:
What I want to achieve is something like this:
Here's an example on how it works with a letter without curves:
Which will get transformed into:
In short, the issue lays when approximating the convex hull to a quadrilateral.
How can I achieve such quadrilateral on a letter with curves?
Here's the full code that I have written to do that:
def _perspective_fix_convex_hull(self):
# RETR_TREE retrieves all of the contours and reconstructs a full hierarchy of nested contours
# used for maximum accuracy (my deduction)
contours, _ = cv2.findContours(self.preprocessed_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Draw all contours to a new image
self.image_contours = cv2.cvtColor(self.preprocessed_image, cv2.COLOR_GRAY2BGR)
cv2.drawContours(self.image_contours, contours, -1, (0, 255, 0), 2)
self.convex_hulls_contours = []
self.approximated_contours = []
for contour in contours:
# Get contour area
area = cv2.contourArea(contour)
if area < 200:
# Too small for a letter
continue
image = cv2.cvtColor(self.preprocessed_image, cv2.COLOR_GRAY2BGR)
hull = cv2.convexHull(contour)
cv2.drawContours(image, [hull], -1, (0, 255, 0), 2)
self.convex_hulls_contours.append(image)
# Reduce to quadrilateral
epsilon = 0.1 * cv2.arcLength(hull, True)
approximated = cv2.approxPolyDP(hull, epsilon, True)
# Draw approximated contour
image = cv2.cvtColor(self.preprocessed_image, cv2.COLOR_GRAY2BGR)
cv2.drawContours(image, [approximated], -1, (255, 0, 0), 1)
self.approximated_contours.append(image)
if len(approximated) == 4:
image = cv2.cvtColor(self.preprocessed_image, cv2.COLOR_GRAY2BGR)
cv2.drawContours(image, [approximated], -1, (0, 0, 255), 2)
self.final_contour = image
letter_hull = [a[0] for a in approximated] # Remove extra dimension [[x, y]] -> [x, y]
break
# Create placeholder for rectangle points
rectangle = np.zeros((4, 2), dtype="float32")
# Top-left point has smallest sum
# Bottom-right point has largest sum
# sum = x + y
s = np.sum(letter_hull, axis=1)
rectangle[0] = letter_hull[np.argmin(s)]
rectangle[2] = letter_hull[np.argmax(s)]
# Top-right point has smallest difference
# Bottom-left point has largest difference
# difference = x - y
d = np.diff(letter_hull, axis=1)
rectangle[1] = letter_hull[np.argmin(d)]
rectangle[3] = letter_hull[np.argmax(d)]
# Compute width and height of new image based on points
(top_left, top_right, bottom_right, bottom_left) = rectangle
# Width may be either top-right to top-left or bottom-right to bottom-left
# TL;DR: Pythagorean theorem
width_top = np.sqrt(((top_right[0] - top_left[0]) ** 2) + ((top_right[1] - top_left[1]) ** 2))
width_bottom = np.sqrt(((bottom_right[0] - bottom_left[0]) ** 2) + ((bottom_right[1] - bottom_left[1]) ** 2))
width = max(int(width_top), int(width_bottom))
# Height may be either top-right to bottom-right or top-left to bottom-left
# TL;DR: Pythagorean theorem
height_right = np.sqrt(((top_right[0] - bottom_right[0]) ** 2) + ((top_right[1] - bottom_right[1]) ** 2))
height_left = np.sqrt(((top_left[0] - bottom_left[0]) ** 2) + ((top_left[1] - bottom_left[1]) ** 2))
height = max(int(height_right), int(height_left))
# Create destination points
destination = np.array([
[0, 0],
[width - 1, 0],
[width - 1, height - 1],
[0, height - 1]
], dtype="float32")
# Compute perspective transform matrix
matrix = cv2.getPerspectiveTransform(rectangle, destination)
warped = cv2.warpPerspective(self.preprocessed_image, matrix, (width, height))
# Add padding
self.warped_image = cv2.copyMakeBorder(warped, 10, 10, 10, 10, cv2.BORDER_CONSTANT, value=(0, 0, 0))

How to segment the text part of a document image when the backgroud is quite similar as the paper?

I am working on a OCR project and the document image needs unwarping operation. But I find that the color of the background is very similar as the paper.
To deal with this issue, my solution is to find the text part and filled as a connected polygon. The code is as follows.
import cv2
import imutils
import numpy as np
#function to order points to proper rectangle
def order_points(pts):
# initialzie a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
rect = np.zeros((4, 2), dtype="float32")
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
#function to transform image to four points
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
# # multiply the rectangle by the original ratio
# rect *= ratio
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
#function to find two largest countours which ones are may be
# full image and our rectangle edged object
def findLargestCountours(cntList, cntWidths):
newCntList = []
newCntWidths = []
#finding 1st largest rectangle
first_largest_cnt_pos = cntWidths.index(max(cntWidths))
# adding it in new
newCntList.append(cntList[first_largest_cnt_pos])
newCntWidths.append(cntWidths[first_largest_cnt_pos])
#removing it from old
cntList.pop(first_largest_cnt_pos)
cntWidths.pop(first_largest_cnt_pos)
#finding second largest rectangle
seccond_largest_cnt_pos = cntWidths.index(max(cntWidths))
# adding it in new
newCntList.append(cntList[seccond_largest_cnt_pos])
newCntWidths.append(cntWidths[seccond_largest_cnt_pos])
#removing it from old
cntList.pop(seccond_largest_cnt_pos)
cntWidths.pop(seccond_largest_cnt_pos)
print('Old Screen Dimentions filtered', cntWidths)
print('Screen Dimentions filtered', newCntWidths)
return newCntList, newCntWidths
#driver function which identifieng 4 corners and doing four point transformation
def convert_object(image, screen_size = None, isDebug = False):
image = imutils.resize(image,height=600)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17) # 11 //TODO 11 FRO OFFLINE MAY NEED TO TUNE TO 5 FOR ONLINE
gray = cv2.medianBlur(gray, 5)
kernel = np.ones((5, 5), np.uint8)
grad = cv2.morphologyEx(gray, cv2.MORPH_GRADIENT, kernel)
if isDebug :
cv2.imshow('grad', grad)
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
if isDebug :
cv2.imshow('bw', bw)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 50))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
kernel=np.ones((5,5),np.uint8)
connected=cv2.dilate(connected,kernel,iterations=3)
# using RETR_EXTERNAL instead of RETR_CCOMP
countours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if isDebug : print('length of countours ', len(countours))
imageCopy = image.copy()
if isDebug : cv2.imshow('drawn countours', cv2.drawContours(imageCopy, countours, -1, (0, 255, 0), 1))
cnts = sorted(countours, key=cv2.contourArea, reverse=True)
hull = np.zeros(image.shape, dtype=np.uint8)
for cnt in cnts:
approx = cv2.convexHull(cnt)
cv2.drawContours(hull, [approx], -1, (255, 0, 0), thickness=cv2.FILLED)
if isDebug : cv2.imshow("hull", hull)
grayhull = cv2.cvtColor(hull, cv2.COLOR_BGR2GRAY)
countours, hierarchy = cv2.findContours(grayhull.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if isDebug : print('length of countours ', len(countours))
imageCopy = image.copy()
if isDebug : cv2.imshow('drawn countours', cv2.drawContours(imageCopy, countours, -1, (0, 255, 0), 3))
mask = np.zeros(bw.shape, dtype=np.uint8)
rgb = image.copy()
for idx in range(len(countours)):
x, y, w, h = cv2.boundingRect(countours[idx])
mask[y:y+h, x:x+w] = 0
cv2.drawContours(mask, countours, idx, (255, 255, 255), -1)
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
if r > 0.45 and w > 8 and h > 8:
cv2.rectangle(rgb, (x, y), (x+w-1, y+h-1), (0, 255, 0), 2)
if isDebug : cv2.imshow("rgb", rgb)
# approximate the contour
cnts = sorted(countours, key=cv2.contourArea, reverse=True)
screenCntList = []
scrWidths = []
for cnt in cnts:
peri = cv2.arcLength(cnt, True) # cnts[1] always rectangle O.o
approx = cv2.approxPolyDP(cnt, 0.05 * peri, True)
screenCnt = approx
print(len(approx))
if (len(screenCnt) == 4):
(X, Y, W, H) = cv2.boundingRect(cnt)
# print('X Y W H', (X, Y, W, H))
screenCntList.append(screenCnt)
scrWidths.append(W)
print('Screens found :', len(screenCntList))
print('Screen Dimentions', scrWidths)
if len(screenCntList) > 1:
screenCntList, scrWidths = findLargestCountours(screenCntList, scrWidths)
print(screenCntList)
if isDebug : cv2.imshow(" Screen", cv2.drawContours(image.copy(), [screenCntList[0]], -1, (0, 255, 0), 3))
pts = screenCntList[0].reshape(4, 2)
print('Found bill rectagle at ', pts)
rect = order_points(pts)
print(rect)
# apply the four point tranform to obtain a "birds eye view" of
# the image
warp = four_point_transform(image, pts)
# show the original and warped images
if(isDebug):
cv2.imshow("Original", image)
cv2.imshow("warp", warp)
cv2.waitKey(0)
cv2.imwrite("result.png",warp.astype(np.uint8))
convert_object(cv2.imread('abc.jpg'), isDebug=True)
But because of the paragram indentation, the approxPolyDP can not find the boundary well.
Is there any effective solution for this case? thanks in advance.

Straighten largest line in image

I have a large number of images of food items on a trey, the tray is the largest thing in the picture and it always contains the largest line. i.e.
I had to Photoshop out the items on the trey
i want a script that can align and straighten the trey, like so:
Is this possible using python?
Here's an approach
Convert image to grayscale
Otsu's threshold to get a binary image
Find contours and filter using approximate contour
Perspective transform for top-down view
Rotate image to get correct orientation
After converting to grayscale, we Otsu's threshold to obtain a binary image
Now we find contours on this image and filter using cv2.arcLength() and cv2.approxPolyDP(). The idea is that if the contour has approximately 4 corners, then it must be our desired object. Additional filtering steps could be to use cv2.contourArea() to ensure that only the largest contour is used. Here's the detected contour
From here we perform a perspective transform to get a top-down view
Finally we rotate the image depending on the desired orientation. Here's the result
import cv2
import numpy as np
import imutils
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
cv2.drawContours(image,[c], 0, (36,255,12), 3)
transformed = perspective_transform(original, approx)
rotated = imutils.rotate_bound(transformed, angle=-90)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.imshow('transformed', transformed)
cv2.imshow('rotated', rotated)
cv2.waitKey()

How to detect Sudoku grid board in OpenCV

I'm working on a personal project using opencv in python. Want to detect a sudoku grid.
The original image is:
So far I have created this:
Then tried to select a big blob. Result may be similar to this:
I got a black image as result:
The code is:
import cv2
import numpy as np
def find_biggest_blob(outerBox):
max = -1
maxPt = (0, 0)
h, w = outerBox.shape[:2]
mask = np.zeros((h + 2, w + 2), np.uint8)
for y in range(0, h):
for x in range(0, w):
if outerBox[y, x] >= 128:
area = cv2.floodFill(outerBox, mask, (x, y), (0, 0, 64))
#cv2.floodFill(outerBox, mask, maxPt, (255, 255, 255))
image_path = 'Images/Results/sudoku-find-biggest-blob.jpg'
cv2.imwrite(image_path, outerBox)
cv2.imshow(image_path, outerBox)
def main():
image = cv2.imread('Images/Test/sudoku-grid-detection.jpg', 0)
find_biggest_blob(image)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
The code in repl is: https://repl.it/#gmunumel/SudokuSolver
Any idea?
Here's an approach:
Convert image to grayscale and median blur to smooth image
Adaptive threshold to obtain binary image
Find contours and filter for largest contour
Perform perspective transform to obtain top-down view
After converting to grayscale and median blurring, we adaptive threshold to obtain a binary image
Next we find contours and filter using contour area. Here's the detected board
Now to get a top-down view of the image, we perform a perspective transform. Here's the result
import cv2
import numpy as np
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,3)
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
transformed = perspective_transform(original, approx)
break
cv2.imshow('transformed', transformed)
cv2.imwrite('board.png', transformed)
cv2.waitKey()
Here is my solution that will generalize to any image whether it is warped or not.
Convert the image to grayscale
Apply adaptive thresholding to convert the image to binary
(Adaptive thresholding works better than normal thresholding because the original image can have different lighting at different areas)
Identify the Corners of the large square
Perspective transform of the image to the final square image
Depending on the amount of skewness of the original image the corners identified may be out of order, do we need to arrange them in the correct order. the method used here is to identify the centroid of the large square and identify the order of the corners from there
Here is the code:
import cv2
import numpy as np
# Helper functions for getting square image
def euclidian_distance(point1, point2):
# Calcuates the euclidian distance between the point1 and point2
#used to calculate the length of the four sides of the square
distance = np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)
return distance
def order_corner_points(corners):
# The points obtained from contours may not be in order because of the skewness of the image, or
# because of the camera angle. This function returns a list of corners in the right order
sort_corners = [(corner[0][0], corner[0][1]) for corner in corners]
sort_corners = [list(ele) for ele in sort_corners]
x, y = [], []
for i in range(len(sort_corners[:])):
x.append(sort_corners[i][0])
y.append(sort_corners[i][1])
centroid = [sum(x) / len(x), sum(y) / len(y)]
for _, item in enumerate(sort_corners):
if item[0] < centroid[0]:
if item[1] < centroid[1]:
top_left = item
else:
bottom_left = item
elif item[0] > centroid[0]:
if item[1] < centroid[1]:
top_right = item
else:
bottom_right = item
ordered_corners = [top_left, top_right, bottom_right, bottom_left]
return np.array(ordered_corners, dtype="float32")
def image_preprocessing(image, corners):
# This function undertakes all the preprocessing of the image and return
ordered_corners = order_corner_points(corners)
print("ordered corners: ", ordered_corners)
top_left, top_right, bottom_right, bottom_left = ordered_corners
# Determine the widths and heights ( Top and bottom ) of the image and find the max of them for transform
width1 = euclidian_distance(bottom_right, bottom_left)
width2 = euclidian_distance(top_right, top_left)
height1 = euclidian_distance(top_right, bottom_right)
height2 = euclidian_distance(top_left, bottom_right)
width = max(int(width1), int(width2))
height = max(int(height1), int(height2))
# To find the matrix for warp perspective function we need dimensions and matrix parameters
dimensions = np.array([[0, 0], [width, 0], [width, width],
[0, width]], dtype="float32")
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
transformed_image = cv2.warpPerspective(image, matrix, (width, width))
#Now, chances are, you may want to return your image into a specific size. If not, you may ignore the following line
transformed_image = cv2.resize(transformed_image, (252, 252), interpolation=cv2.INTER_AREA)
return transformed_image
# main function
def get_square_box_from_image(image):
# This function returns the top-down view of the puzzle in grayscale.
#
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
adaptive_threshold = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 3)
corners = cv2.findContours(adaptive_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
corners = corners[0] if len(corners) == 2 else corners[1]
corners = sorted(corners, key=cv2.contourArea, reverse=True)
for corner in corners:
length = cv2.arcLength(corner, True)
approx = cv2.approxPolyDP(corner, 0.015 * length, True)
print(approx)
puzzle_image = image_preprocessing(image, approx)
break
return puzzle_image
# Call the get_square_box_from_image method on any sudoku image to get the top view of the puzzle
original = cv2.imread("large_puzzle.jpg")
sudoku = get_square_box_from_image(original)
Here are the results from the given image and a custom example

How to compute coordinates of rectange and then do perspective transformation using opencv and python

Original pic
after canny pic
pic after perspective tranform
Hi,
I am doing a small OCR POC.See the original pic, I only care about the content inside the nameplate.Before recognising characters, I need to do perspective transformation for this pictures to increase correctness. See the second picutures, I already did canny to get the contours of the rectangle.
I want to get the coordinates of the 4 corners of the rectangle(labled in red),so that I can derived the matrix and do perspective transfromation. The last picture is the output I want.
I am new to opencv... can anybody give me some idea regarding how to get the coordinates of 4 corners? I have googled and learn some terms, such as hough transformation?
Is it a good way to detect line and then computer the position of the corners?
It would be great if somebody can show me some python code to do this, thanks in advance.
/* below is my currunt code
# coding:utf8
import cv2
import numpy as np
import sys
if __name__ == '__main__':
imagePath = sys.argv[1]
img = cv2.imread(imagePath)
img = cv2.GaussianBlur(img,(3,3),0)
canny = cv2.Canny(img, 50, 150)
#element2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
#dilation = cv2.dilate(canny, element2, iterations = 1)
cv2.imwrite("canny.jpg", dilation)
cv2.waitKey(0)
cv2.destroyAllWindows()
I adapted the code from pyimagesearch to work in python 3.5 and opencv 3.3
import os
import cv2
import imutils
import numpy as np
import pytesseract
from PIL import Image
def order_points(pts):
# initialzie a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
rect = np.zeros((4, 2), dtype="float32")
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
def image_process(image_path):
# Open image
image = cv2.imread(image_path)
ratio = image.shape[0] / 500.0
orig = image.copy()
image = imutils.resize(image, height=500)
# Canny edge detect
edged = cv2.Canny(image, 75, 200)
# Find the countours
img, cnts, hierarchy = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# Find the contours that are the largest (not sure if applies to this project) and has four components (is a rectangle)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
screenCnt = approx
break
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
return warped
def main():
image_path = None # You're going to need to change this
image = image_process(image_path)
cv2.imshow('image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
main()

Categories

Resources