Straighten largest line in image - python

I have a large number of images of food items on a trey, the tray is the largest thing in the picture and it always contains the largest line. i.e.
I had to Photoshop out the items on the trey
i want a script that can align and straighten the trey, like so:
Is this possible using python?

Here's an approach
Convert image to grayscale
Otsu's threshold to get a binary image
Find contours and filter using approximate contour
Perspective transform for top-down view
Rotate image to get correct orientation
After converting to grayscale, we Otsu's threshold to obtain a binary image
Now we find contours on this image and filter using cv2.arcLength() and cv2.approxPolyDP(). The idea is that if the contour has approximately 4 corners, then it must be our desired object. Additional filtering steps could be to use cv2.contourArea() to ensure that only the largest contour is used. Here's the detected contour
From here we perform a perspective transform to get a top-down view
Finally we rotate the image depending on the desired orientation. Here's the result
import cv2
import numpy as np
import imutils
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
cv2.drawContours(image,[c], 0, (36,255,12), 3)
transformed = perspective_transform(original, approx)
rotated = imutils.rotate_bound(transformed, angle=-90)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.imshow('transformed', transformed)
cv2.imshow('rotated', rotated)
cv2.waitKey()

Related

How to fix perspective of an image using OpenCV

I have an input image which contains a letter inside a box, what I want to do is to fix the perspective of the image so I can easily use it with my letter recognition model.
This is the code that I use for pre-processing:
def preprocess(image):
# Convert to grayscale
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# NOISE REMOVAL - START
# Apply CLAHE (makes the image lightness more uniform)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
image = clahe.apply(image)
cv2.imwrite("preprocess_stages/0_clahe.png", image)
# Apply NlMeansDenoising (removes noise but loses some details)
image = cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
cv2.imwrite("preprocess_stages/1_nl_means_denoising.png", image)
# Apply GaussianBlur with a medium kernel (removes noise but loses some edges)
image = cv2.GaussianBlur(image, (5, 5), 0)
cv2.imwrite("preprocess_stages/2_gaussian_blur.png", image)
# NOISE REMOVAL - END
# Apply OTSU thresholding (we need the letter to be white)
_, image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
cv2.imwrite("preprocess_stages/3_otsu.png", image)
# Morphological erosion (removes noise and small objects)
kernel = np.ones((10, 10), np.uint8)
image = cv2.erode(image, kernel, iterations=1)
cv2.imwrite("preprocess_stages/4_morphological_erosion.png", image)
return image
I have seen many example on the internet and most of them require the 4 corner points to be provided as input too... there are few examples on how to calculate them and most of them don't work for my case, nonetheless I've tried writing something:
UPDATE: I successfully wrote a working code, even though I changed the Morphological Opening to a simple Erosion since it works better for me.
def fix_perspective_convex_hull(preprocessed):
# RETR_TREE retrieves all of the contours and reconstructs a full hierarchy of nested contours
# used for maximum accuracy (my deduction)
contours, _ = cv2.findContours(preprocessed, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Draw all contours to a new image
image = cv2.cvtColor(preprocessed, cv2.COLOR_GRAY2BGR)
cv2.drawContours(image, contours, -1, (0, 255, 0), 2)
cv2.imwrite("perspective_stages/0_contours.png", image)
for i, contour in enumerate(contours):
hull = cv2.convexHull(contour)
# Reduce to quadrilateral
epsilon = 0.1 * cv2.arcLength(hull, True)
approximated = cv2.approxPolyDP(hull, epsilon, True)
# Draw approximated contour
image = cv2.cvtColor(preprocessed, cv2.COLOR_GRAY2BGR)
cv2.drawContours(image, [approximated], -1, (255, 0, 0), 1)
cv2.imwrite("perspective_stages/1_approximated_contour_{}.png".format(i), image)
if len(approximated) == 4:
image = cv2.cvtColor(preprocessed, cv2.COLOR_GRAY2BGR)
cv2.drawContours(image, [approximated], -1, (0, 0, 255), 2)
cv2.imwrite("perspective_stages/2_final_contour.png", image)
letter_hull = [a[0] for a in approximated] # Remove extra dimension [[x, y]] -> [x, y]
break
# Create placeholder for rectangle points
rectangle = np.zeros((4, 2), dtype="float32")
# Top-left point has smallest sum
# Bottom-right point has largest sum
# sum = x + y
s = np.sum(letter_hull, axis=1)
rectangle[0] = letter_hull[np.argmin(s)]
rectangle[2] = letter_hull[np.argmax(s)]
# Top-right point has smallest difference
# Bottom-left point has largest difference
# difference = x - y
d = np.diff(letter_hull, axis=1)
rectangle[1] = letter_hull[np.argmin(d)]
rectangle[3] = letter_hull[np.argmax(d)]
# Compute width and height of new image based on points
(top_left, top_right, bottom_right, bottom_left) = rectangle
# Width may be either top-right to top-left or bottom-right to bottom-left
# TL;DR: Pythagorean theorem
width_top = np.sqrt(((top_right[0] - top_left[0]) ** 2) + ((top_right[1] - top_left[1]) ** 2))
width_bottom = np.sqrt(((bottom_right[0] - bottom_left[0]) ** 2) + ((bottom_right[1] - bottom_left[1]) ** 2))
width = max(int(width_top), int(width_bottom))
# Height may be either top-right to bottom-right or top-left to bottom-left
# TL;DR: Pythagorean theorem
height_right = np.sqrt(((top_right[0] - bottom_right[0]) ** 2) + ((top_right[1] - bottom_right[1]) ** 2))
height_left = np.sqrt(((top_left[0] - bottom_left[0]) ** 2) + ((top_left[1] - bottom_left[1]) ** 2))
height = max(int(height_right), int(height_left))
# Create destination points
destination = np.array([
[0, 0],
[width - 1, 0],
[width - 1, height - 1],
[0, height - 1]
], dtype="float32")
# Compute perspective transform matrix
matrix = cv2.getPerspectiveTransform(rectangle, destination)
warped = cv2.warpPerspective(preprocessed, matrix, (width, height))
# Add padding
warped = cv2.copyMakeBorder(warped, 10, 10, 10, 10, cv2.BORDER_CONSTANT, value=(0, 0, 0))
# Return warped image
return warped
Results:
Original
Preprocessed
Finding a quadrilateral
Warped
Inverted and resized for the neural network
UPDATE 2: My code only works for letters without curves, such as the H. When I try to run the code on curve letters (such as the S) I get the following results:

Is there a way of loading images better with OpenCV

I want to load images and use hough transformation to capture the black bounded area inside the paper and then perform a few counting operations inside the boxes. Naturally, this requires that I am able to load the images in relatively good quality.
Issue is when I am loading using openCV's cv2.imread() I get a very watered down version of the picture which I can't quickly process. To make it worse, I can't render the image using cv2.imshow(), my IDE hangs every time I try to see it. So I have to use matplotlib to render and see the image step by step.
I don't know any other packages for image processing (maybe pillow, but I don't know whether it will do what I need it to do).
My original image is this:
img = cv2.imread("img1-min.jpg")
Because the cv2.imshow() method results in a window that crashes, I resorted to matplotlib:
plt.imshow(img)
plt.title('my picture')
plt.show()
The result is:
After that:
gray = cv2.cvtColor (img, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 75, 150)
lines = cv2.HoughLinesP(edges, 1, np.pi/180, 50, 50, 5)
if lines is not None:
for line in lines:
x1, y1, x2, y2 = line[0]
cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0), 5)
plt.imshow(img)
plt.title('my picture')
plt.show()
The output is:
As you can see, very messy. My hunch is that this is because of the way the original image is loaded. Is there a way I can improve the loading process so that it makes it easier to apply the Hough Lines?
I believe that the image is loading fine with cv2.imread() but its so large with a dimension of 2976x3838, your IDE has trouble displaying the image. I believe you're applying cv2.HoughLinesP() incorrectly. Instead of using cv2.HoughLinesP(), here's an alternative approach to detect the lines
The idea is to threshold then find the bounding box of the board to create a mask. From this mask, we perform a perspective transform to obtain a top down image. This will allow us to detect lines better
Once we have the detected board, we can extract the ROI
Then we simply detect vertical and horizontal lines
Result
import cv2
import numpy as np
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
blur = cv2.bilateralFilter(image,9,75,75)
gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray,0,255, cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
mask = np.zeros(image.shape, dtype=np.uint8)
for c in cnts:
area = cv2.contourArea(c)
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if area > 150000 and len(approx) == 4:
cv2.drawContours(image,[c], 0, (36,255,12), 3)
cv2.drawContours(mask,[c], 0, (255,255,255), -1)
transformed = perspective_transform(original, approx)
mask = cv2.bitwise_and(mask, original)
# Remove horizontal lines
gray = cv2.cvtColor(transformed, cv2.COLOR_BGR2GRAY)
board_thresh = cv2.threshold(gray,0,255, cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)[1]
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (55,1))
detect_horizontal = cv2.morphologyEx(board_thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(transformed, [c], -1, (36,255,12), 9)
pass
# Remove vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,55))
detect_vertical = cv2.morphologyEx(board_thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(transformed, [c], -1, (36,255,12), 9)
cv2.imwrite('thresh.png', thresh)
cv2.imwrite('image.png', image)
cv2.imwrite('mask.png', mask)
cv2.imwrite('transformed.png', transformed)
cv2.waitKey()

How to detect Sudoku grid board in OpenCV

I'm working on a personal project using opencv in python. Want to detect a sudoku grid.
The original image is:
So far I have created this:
Then tried to select a big blob. Result may be similar to this:
I got a black image as result:
The code is:
import cv2
import numpy as np
def find_biggest_blob(outerBox):
max = -1
maxPt = (0, 0)
h, w = outerBox.shape[:2]
mask = np.zeros((h + 2, w + 2), np.uint8)
for y in range(0, h):
for x in range(0, w):
if outerBox[y, x] >= 128:
area = cv2.floodFill(outerBox, mask, (x, y), (0, 0, 64))
#cv2.floodFill(outerBox, mask, maxPt, (255, 255, 255))
image_path = 'Images/Results/sudoku-find-biggest-blob.jpg'
cv2.imwrite(image_path, outerBox)
cv2.imshow(image_path, outerBox)
def main():
image = cv2.imread('Images/Test/sudoku-grid-detection.jpg', 0)
find_biggest_blob(image)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
The code in repl is: https://repl.it/#gmunumel/SudokuSolver
Any idea?
Here's an approach:
Convert image to grayscale and median blur to smooth image
Adaptive threshold to obtain binary image
Find contours and filter for largest contour
Perform perspective transform to obtain top-down view
After converting to grayscale and median blurring, we adaptive threshold to obtain a binary image
Next we find contours and filter using contour area. Here's the detected board
Now to get a top-down view of the image, we perform a perspective transform. Here's the result
import cv2
import numpy as np
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,3)
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
transformed = perspective_transform(original, approx)
break
cv2.imshow('transformed', transformed)
cv2.imwrite('board.png', transformed)
cv2.waitKey()
Here is my solution that will generalize to any image whether it is warped or not.
Convert the image to grayscale
Apply adaptive thresholding to convert the image to binary
(Adaptive thresholding works better than normal thresholding because the original image can have different lighting at different areas)
Identify the Corners of the large square
Perspective transform of the image to the final square image
Depending on the amount of skewness of the original image the corners identified may be out of order, do we need to arrange them in the correct order. the method used here is to identify the centroid of the large square and identify the order of the corners from there
Here is the code:
import cv2
import numpy as np
# Helper functions for getting square image
def euclidian_distance(point1, point2):
# Calcuates the euclidian distance between the point1 and point2
#used to calculate the length of the four sides of the square
distance = np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)
return distance
def order_corner_points(corners):
# The points obtained from contours may not be in order because of the skewness of the image, or
# because of the camera angle. This function returns a list of corners in the right order
sort_corners = [(corner[0][0], corner[0][1]) for corner in corners]
sort_corners = [list(ele) for ele in sort_corners]
x, y = [], []
for i in range(len(sort_corners[:])):
x.append(sort_corners[i][0])
y.append(sort_corners[i][1])
centroid = [sum(x) / len(x), sum(y) / len(y)]
for _, item in enumerate(sort_corners):
if item[0] < centroid[0]:
if item[1] < centroid[1]:
top_left = item
else:
bottom_left = item
elif item[0] > centroid[0]:
if item[1] < centroid[1]:
top_right = item
else:
bottom_right = item
ordered_corners = [top_left, top_right, bottom_right, bottom_left]
return np.array(ordered_corners, dtype="float32")
def image_preprocessing(image, corners):
# This function undertakes all the preprocessing of the image and return
ordered_corners = order_corner_points(corners)
print("ordered corners: ", ordered_corners)
top_left, top_right, bottom_right, bottom_left = ordered_corners
# Determine the widths and heights ( Top and bottom ) of the image and find the max of them for transform
width1 = euclidian_distance(bottom_right, bottom_left)
width2 = euclidian_distance(top_right, top_left)
height1 = euclidian_distance(top_right, bottom_right)
height2 = euclidian_distance(top_left, bottom_right)
width = max(int(width1), int(width2))
height = max(int(height1), int(height2))
# To find the matrix for warp perspective function we need dimensions and matrix parameters
dimensions = np.array([[0, 0], [width, 0], [width, width],
[0, width]], dtype="float32")
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
transformed_image = cv2.warpPerspective(image, matrix, (width, width))
#Now, chances are, you may want to return your image into a specific size. If not, you may ignore the following line
transformed_image = cv2.resize(transformed_image, (252, 252), interpolation=cv2.INTER_AREA)
return transformed_image
# main function
def get_square_box_from_image(image):
# This function returns the top-down view of the puzzle in grayscale.
#
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
adaptive_threshold = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 3)
corners = cv2.findContours(adaptive_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
corners = corners[0] if len(corners) == 2 else corners[1]
corners = sorted(corners, key=cv2.contourArea, reverse=True)
for corner in corners:
length = cv2.arcLength(corner, True)
approx = cv2.approxPolyDP(corner, 0.015 * length, True)
print(approx)
puzzle_image = image_preprocessing(image, approx)
break
return puzzle_image
# Call the get_square_box_from_image method on any sudoku image to get the top view of the puzzle
original = cv2.imread("large_puzzle.jpg")
sudoku = get_square_box_from_image(original)
Here are the results from the given image and a custom example

How to detect and align tilted images after cropping

I have implemented a cropping algorithm on my solution that works pretty good. The problem is when the image is tilted, the crop will work but it will have background space showing as the images will show.
Cropping flow:
First step:
Second step:
Final result:
I have searched/tried multiple solutions but could not get a decent result or I'm not thinking the right way.
The expected result is this:
EDIT [FINAL RESULT]:
import cv2
import numpy as np
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
def get_image_width_height(image):
image_width = image.shape[1] # current image's width
image_height = image.shape[0] # current image's height
return image_width, image_height
def calculate_scaled_dimension(scale, image):
image_width, image_height = get_image_width_height(image)
ratio_of_new_with_to_old = scale / image_width
dimension = (scale, int(image_height * ratio_of_new_with_to_old))
return dimension
def scale_image(image, size):
image_resized_scaled = cv2.resize(
image,
calculate_scaled_dimension(
size,
image
),
interpolation=cv2.INTER_AREA
)
return image_resized_scaled
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
image = cv2.imread('images/damina_cc_back.jpg')
original_image = image.copy()
image = scale_image(image, 500)
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screen_cnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screen_cnt = approx
transformed = perspective_transform(image, screen_cnt)
break
# Draw ROI
cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 1)
(h, w) = transformed.shape[:2]
if (h > w):
rotated = rotate_image(transformed, 90)
else:
rotated = transformed
cv2.imshow("image", original_image)
cv2.imshow("ROI", image)
cv2.imshow("transformed", transformed)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)
To align your image after cropping, we can we can use a perspective transformation. To begin, we separate the four corners of the rectangle into individual points given to us by cv2.approxPolyDP(). We reorder the points into a clockwise orientation (top-left, top-right, bottom-right, bottom-left) using this function:
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
This function gives us the the bounding box coordinates of the ROI
Now with the isolated corner points, we can obtain the transformation matrix using cv2.getPerspectiveTransform() and actually obtain the transformed image using cv2.warpPerspective().
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
Here's the result
We can rotate the image with this function
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
The final result after rotating:
Full code
import cv2
import numpy as np
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
image = cv2.imread('1.PNG')
original_image = image.copy()
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screen_cnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screen_cnt = approx
transformed = perspective_transform(original_image, screen_cnt)
break
# Draw ROI
cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 3)
# Rotate image
rotated = rotate_image(transformed, -90)
cv2.imshow("image", original_image)
cv2.imshow("ROI", image)
cv2.imshow("transformed", transformed)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)
I assume you're looking for the minimum and maximum u and v positions where an edge was found (or maybe certain quantiles) to find the cropped rectangle. That is go over all image pixels that were marked an edge and update the u/v/ min/max values.
If if the computation time is not an issue for you, you can simply keep the algorithm as is and additionally loop over a number of rotations and update special values for each. Pseudocode:
for v
for u
if (u,v) is edge
for rotation_matrix
(ur, vr) = rotation_matrix * (u,v)
update boundary for given rotation matrix
In the end you can select the bounding box for the rotation matrix that is the smallest.
If the above algorithm is too slow for your use case, you can also try extracting the major axes using the opencv HoughLinesP function. This will of course not work for all types of images, but may be good enough for the case of id cards.
Finally, to apply the rotation correction refer to this tutorial.

How to compute coordinates of rectange and then do perspective transformation using opencv and python

Original pic
after canny pic
pic after perspective tranform
Hi,
I am doing a small OCR POC.See the original pic, I only care about the content inside the nameplate.Before recognising characters, I need to do perspective transformation for this pictures to increase correctness. See the second picutures, I already did canny to get the contours of the rectangle.
I want to get the coordinates of the 4 corners of the rectangle(labled in red),so that I can derived the matrix and do perspective transfromation. The last picture is the output I want.
I am new to opencv... can anybody give me some idea regarding how to get the coordinates of 4 corners? I have googled and learn some terms, such as hough transformation?
Is it a good way to detect line and then computer the position of the corners?
It would be great if somebody can show me some python code to do this, thanks in advance.
/* below is my currunt code
# coding:utf8
import cv2
import numpy as np
import sys
if __name__ == '__main__':
imagePath = sys.argv[1]
img = cv2.imread(imagePath)
img = cv2.GaussianBlur(img,(3,3),0)
canny = cv2.Canny(img, 50, 150)
#element2 = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
#dilation = cv2.dilate(canny, element2, iterations = 1)
cv2.imwrite("canny.jpg", dilation)
cv2.waitKey(0)
cv2.destroyAllWindows()
I adapted the code from pyimagesearch to work in python 3.5 and opencv 3.3
import os
import cv2
import imutils
import numpy as np
import pytesseract
from PIL import Image
def order_points(pts):
# initialzie a list of coordinates that will be ordered
# such that the first entry in the list is the top-left,
# the second entry is the top-right, the third is the
# bottom-right, and the fourth is the bottom-left
rect = np.zeros((4, 2), dtype="float32")
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
# return the ordered coordinates
return rect
def four_point_transform(image, pts):
# obtain a consistent order of the points and unpack them
# individually
rect = order_points(pts)
(tl, tr, br, bl) = rect
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
# now that we have the dimensions of the new image, construct
# the set of destination points to obtain a "birds eye view",
# (i.e. top-down view) of the image, again specifying points
# in the top-left, top-right, bottom-right, and bottom-left
# order
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
# compute the perspective transform matrix and then apply it
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
# return the warped image
return warped
def image_process(image_path):
# Open image
image = cv2.imread(image_path)
ratio = image.shape[0] / 500.0
orig = image.copy()
image = imutils.resize(image, height=500)
# Canny edge detect
edged = cv2.Canny(image, 75, 200)
# Find the countours
img, cnts, hierarchy = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# Find the contours that are the largest (not sure if applies to this project) and has four components (is a rectangle)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
screenCnt = approx
break
warped = four_point_transform(orig, screenCnt.reshape(4, 2) * ratio)
return warped
def main():
image_path = None # You're going to need to change this
image = image_process(image_path)
cv2.imshow('image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
main()

Categories

Resources