I've been trying to do a 4 point perspective transform in order to start doing some OCR.
Starting with the following image I can detect the number plate
and crop it out with the green box being the bounding box and the red dots being the corners of the rectangle I want to square up.
This is the output of the transform.
At a first look it seams to have done the transform inside out (taking the parts either side rather than between the points).
I'm using the imutils package to do the transform and working from this and this as a guide. I'm sure it's something relatively simple I'm missing.
#!/usr/bin/python
import numpy as np
import cv2
import imutils
from imutils import contours
from imutils.perspective import four_point_transform
img = cv2.imread("sample7-smaller.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blurred = cv2.bilateralFilter(gray,15,75,75)
v = np.median(blurred)
lower = int(max(0, (1.0 - 0.33) * v))
upper = int(min(255, (1.0 + 0.33) * v))
edged = cv2.Canny(blurred, lower, upper, 255)
conts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
conts = conts[0] if imutils.is_cv2() else conts[1]
conts = sorted(conts, key=cv2.contourArea, reverse=True)
for cnt in conts:
approx = cv2.approxPolyDP(cnt,0.01*cv2.arcLength(cnt,True),True)
if len(approx) == 4:
x,y,w,h = cv2.boundingRect(cnt)
cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)
for i in approx:
cv2.circle(img,(i[0][0], i[0][1]),2,(0,0,255), thickness=4)
warped = four_point_transform(img, approx.reshape(4,2))
cv2.imshow("crop",img[y:y+h,x:x+w])
cv2.imshow("warped", warped)
cv2.waitKey(0)
I would recommend you to use the OpenCV Perspective Transform method, to get the desired results, as per the given image:
First mark the position of src points:
src_pts = np.array([[8, 136], [415, 52], [420, 152], [14, 244]], dtype=np.float32)
And suppose you want to fit this number plate in a matrix of shape 50x200, so destination points would be:
dst_pts = np.array([[0, 0], [200, 0], [200, 50], [0, 50]], dtype=np.float32)
Find the perspective Transform Matrix as :
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warp = cv2.warpPerspective(img, M, (200, 50))
EDIT: As you didn't wanted to hard code the final width, height of plate, So in order to make the calculations more flexible you can calculate the width and height of the plate from the 4 marker points as:
def get_euler_distance(pt1, pt2):
return ((pt1[0] - pt2[0])**2 + (pt1[1] - pt2[1])**2)**0.5
src_pts = np.array([[8, 136], [415, 52], [420, 152], [14, 244]], dtype=np.float32)
width = get_euler_distance(src_pts[0][0], src_pts[0][1])
height = get_euler_distance(src_pts[0][0], src_pts[0][3])
dst_pts = np.array([[0, 0], [width, 0], [width, height], [0, height]], dtype=np.float32)
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warp = cv2.warpPerspective(img, M, (width, height))
Related
I'd like to know is it possible with Opencv to as an input image for instance use this one
and based on the input image generate another 3D image with the same shape as input but just an 3D empty room.
I tried following steps
Load image
Convert to gray scale
Apply Gaussian Blur to reduce noise
Added Canny edge detector
Drawing white(255,255,255) contours on the detected edges
Rest of the space filled with gray(43, 43, 43) look color
Added white (255, 255, 255) borders on the corners.
I thought that if will be able to detect all the edges in correct order, then i can just connect them with lines or contours and it will produce 3D image (But i didnt achieved that cuz i couldnt sort edges in the correct order)
I know that in order to achieve 3D image, i need x,y,z coordinates, but i dont know what is the correct way to do that.
As a result image i need to generate something like this one (ofcourse shape will depend on the input image).
helper.py
import cv2
import numpy as np
def apply_canny_edge_detector(sobel_gray_image_with_gaussian_blur, threshold1=0.33, threshold2=0.5, weak_th=None, strong_th=None):
return cv2.Canny(sobel_gray_image_with_gaussian_blur, threshold1, threshold2)
def calc_gaussian_kernel(img_size, sigma=1):
size = int(img_size) // 2
x, y = np.mgrid[-size:size + 1, -size:size + 1]
normal = 1 / (2.0 * np.pi * sigma ** 2)
g = np.exp(-(x ** 2 + y ** 2) / (2.0 * sigma ** 2)) * normal
return g
def apply_contours(original_img, gray_img, color=(0, 0, 255)):
_, threshold = cv2.threshold(gray_img, 127, 255, cv2.THRESH_BINARY)
contours, _ = cv2.findContours(threshold, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# list for storing names of shapes
for contour in contours:
approx = cv2.approxPolyDP(
contour, 0.01 * cv2.arcLength(contour, True), True
)
cv2.drawContours(original_img, [approx], -2, color, 1, cv2.LINE_8)
return original_img
def fill_space_with_color(pixels: [], color=(197, 194, 199)):
pixels[np.all(pixels != (10, 255, 255), axis=-1)] = color
return pixels
def apply_borders(image, color=(53, 11, 248)):
bordered = cv2.copyMakeBorder(image, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=color)
return bordered
main.py
from cv2_scripts.helper import *
if __name__ == '__main__':
for i in range(10, 11):
# Read the image
img_file = "p/0000{0}.jpg".format(str(i))
original_img = cv2.imread(img_file)
sobel_gray_image = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
gaussian_kernel = calc_gaussian_kernel(11, 2)
sobel_gray_image_with_gaussian_blur = cv2.filter2D(sobel_gray_image, -1, gaussian_kernel)
canny_image = apply_canny_edge_detector(sobel_gray_image_with_gaussian_blur, 100)
contoured_image = apply_contours(original_img, canny_image, (255,255,255))
filled_image = fill_space_with_color(contoured_image, [43, 43, 43])
room_shape = apply_borders(filled_image, [255, 255, 255])
cv2.imshow("Room Shape", room_shape)
cv2.waitKey(0)
cv2.destroyAllWindows()
I want to crop images according to their right frame. I have about 10000 of hand X-ray images to preprocess, and what I have done so far:
Apply Gaussian Blur and Threshold (Binary + Otsu) on the image.
Apply dilation to get a single object (in this case a hand).
Used cv2.findContours() to draw outline along the edges around the hand.
Used cv2.boundingRect() to find the right frame, and then cv2.minAreaRect() and cv2.boxPoints to get the right points for the bounding box.
Used cv2.warpPerspective to adjust image according to height and width.
The code below describes the above:
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt
# Load image, create mask, grayscale, Gaussian blur, Otsu's threshold
img_path = "sample_image.png"
image = cv2.imread(image_path)
original = image.copy()
blank = np.zeros(image.shape[:2], dtype = np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (33,33), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Merge text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations = 3)
# Find contours
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = lambda x: cv2.boundingRect(x)[0])
for c in cnts:
# Filter using contour area and aspect ratio (x1 = width, y1 = height)
x, y, x1, y1 = cv2.boundingRect(c)
if (x1 > 500) and (y1 > 700):
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
box = np.int0(box)
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1], [0, 0],
[width-1, 0], [width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(image, M, (width, height))
plt.imshow(warped)
If you have a look at some of the images in the folder, those are the inputs. When I run these images through the code above, I get an output like this. Some of them are cropped nicely (straightened), however, some of them are cropped with 90 degree rotations. Is there a code to counter the 'rotating 90 degrees output' problem?
Here are some images:
Image Inputs: Four X-ray examples
Image Outputs: Returns images that are 90 degrees rotated
Image Outputs wanted: Straightened image (Just used Photoshop to straighten them. Dont want to do this for 10000 images...)
UPDATE:
I edited the code according to below-mentioned suggestions. After running the some samples, it now returns images that are now 90 degrees slanted to the right.
Input images:
Output images:
I doubt it's because of the quality of the images. Maybe it's got to do with OpenCV's minAreaRect()? or boxPoints?
FINAL UPDATE:
According to #Prashant Maurya, the code was updated with a function added to detect whether the position of the hand is left or right. And then mapping src_pts to right dst_pts. Full code is shown below.
Hi there are two changes which will correct the output:
The width and height taken in the code is in the wrong order ie: width: 1470 & height: 1118 just switch the values:
Map src_pts to right dst_pts the current code is mapping top left
corner to bottom left therefore the image is being rotated.
Added function to detect whether image is right tilted or left and rotate and rotate it accordingly
Full code with changes is:
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt
# Load image, create mask, grayscale, Gaussian blur, Otsu's threshold
img_path = "xray1.png"
image = cv2.imread(img_path)
cv2.imshow("image original", image)
cv2.waitKey(10000)
original = image.copy()
blank = np.zeros(image.shape[:2], dtype = np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (33,33), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Merge text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations = 3)
# Find contours
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = lambda x: cv2.boundingRect(x)[0])
def get_tilt(box):
tilt = "Left"
x_list = [coord[0] for coord in box]
y_list = [coord[1] for coord in box]
print(x_list)
print(y_list)
x_list = sorted(x_list)
y_list = sorted(y_list)
print(x_list)
print(y_list)
for coord in box:
if coord[0] == x_list[0]:
index = y_list.index(coord[1])
print("Index: ", index)
if index == 1:
tilt = "Left"
else:
tilt = "Right"
return tilt
for c in cnts:
# Filter using contour area and aspect ratio (x1 = width, y1 = height)
x, y, x1, y1 = cv2.boundingRect(c)
if (x1 > 500) and (y1 > 700):
rect = cv2.minAreaRect(c)
print("rect",rect)
box = cv2.boxPoints(rect)
box = np.int0(box)
# print("rect:", box)
tilt = get_tilt(box)
src_pts = box.astype("float32")
if tilt == "Left":
width = int(rect[1][1])
height = int(rect[1][0])
dst_pts = np.array([[0, 0],
[width-1, 0], [width-1, height-1], [0, height-1]], dtype="float32")
else:
width = int(rect[1][0])
height = int(rect[1][1])
dst_pts = np.array([[0, height-1], [0, 0],
[width-1, 0], [width-1, height-1]], dtype="float32")
print("Src pts:", src_pts)
print("Dst pts:", dst_pts)
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(image, M, (width, height))
print("Showing image ..")
# plt.imshow(warped)
cv2.imshow("image crop", warped)
cv2.waitKey(10000)
Does anyone know whether it's possible to transform image A into image B contour if their shapes are random, using OpenCV or any other python libraries that work with images?
Here is what I have so far with 2 images:
I've been able to find draw contours of the bulb and insert a fox in it using bitwise_and method, but what it does is it crops the second image, whereas I need it to transform its shape into the bulb contour.
import cv2
src1 = cv2.imread('fox.png')
src2 = cv2.imread('bulb-contour-filled.png')
src2 = cv2.resize(src2, src1.shape[1::-1])
dst = cv2.bitwise_and(src1, src2)
cv2.imwrite('img_fin.jpg', dst)
Bulb original image:
Fox original image:
Bulb contour:
The Concept
For this we are going to need to slice the into triangles, and warp each triangle individually. The starting points of the image should be along the outline of the original image, and the ending points should be along the outline of the destination shape.
Although below I have hard-coded the 2 sets of points, you'll just need to figure out the optimal processing to retrieve the contours of the 2 images (each needs have the same number of points and in the same order). Also, I have programmed an interactive OpenCV program that will allow us to easily retrieve the coordinates.
The Code
import cv2
import numpy as np
def triangles(points):
points = np.where(points, points, 1)
subdiv = cv2.Subdiv2D((*points.min(0), *points.max(0)))
for pt in points:
subdiv.insert(tuple(map(int, pt)))
for pts in subdiv.getTriangleList().reshape(-1, 3, 2):
yield [np.where(np.all(points == pt, 1))[0][0] for pt in pts]
def crop(img, pts):
x, y, w, h = cv2.boundingRect(pts)
img_cropped = img[y: y + h, x: x + w]
pts[:, 0] -= x
pts[:, 1] -= y
return img_cropped, pts
def warp(img1, img2, pts1, pts2):
img2 = img2.copy()
for indices in triangles(pts1):
img1_cropped, triangle1 = crop(img1, pts1[indices])
img2_cropped, triangle2 = crop(img2, pts2[indices])
transform = cv2.getAffineTransform(np.float32(triangle1), np.float32(triangle2))
img2_warped = cv2.warpAffine(img1_cropped, transform, img2_cropped.shape[:2][::-1], None, cv2.INTER_LINEAR, cv2.BORDER_REFLECT_101)
mask = np.zeros_like(img2_cropped)
cv2.fillConvexPoly(mask, np.int32(triangle2), (1, 1, 1), 16, 0)
img2_cropped *= 1 - mask
img2_cropped += img2_warped * mask
return img2
def resize(img, size):
h, w = img.shape[:2]
return cv2.resize(img, (int(w * size), int(h * size)))
img1 = resize(cv2.imread("dog.png"), 0.8)
img2 = resize(cv2.imread("bulb.png"), 0.8)
pts1 = np.array([[322, 508], [390, 475], [413, 425], [440, 367], [453, 305], [458, 289], [446, 202], [434, 139], [392, 104], [324, 94], [246, 97], [194, 101], [111, 127], [98, 185], [88, 240], [95, 306], [90, 363], [123, 431], [160, 487], [223, 508]])
pts2 = np.array([[459, 793], [513, 715], [541, 580], [552, 470], [583, 398], [633, 323], [643, 233], [616, 144], [557, 71], [470, 28], [354, 27], [264, 72], [206, 138], [179, 225], [178, 302], [236, 401], [266, 480], [278, 564], [297, 707], [357, 792]])
cv2.imshow("result", warp(img1, img2, pts1, pts2))
cv2.waitKey(0)
cv2.destroyAllWindows()
The Output
The Explanation
Import the necessary libraries:
import cv2
import numpy as np
Define a function, triangles, that will take in an array of coordinates, points, and yield lists of 3 indices of the array for triangles that will cover the area of the original array of coordinates:
def triangles(points):
points = np.where(points, points, 1)
subdiv = cv2.Subdiv2D((*points.min(0), *points.max(0)))
for pt in points:
subdiv.insert(tuple(map(int, pt)))
for pts in subdiv.getTriangleList().reshape(-1, 3, 2):
yield [np.where(np.all(points == pt, 1))[0][0] for pt in pts]
Define a function, crop, that will take in an image array, img, and an array of three coordinates, pts. It will return a rectangular segment of the image just large enough to fit the triangle formed by the three point, and return the array of three coordinates transferred to the top-left corner of image:
def crop(img, pts):
x, y, w, h = cv2.boundingRect(pts)
img_cropped = img[y: y + h, x: x + w]
pts[:, 0] -= x
pts[:, 1] -= y
return img_cropped, pts
Define a function, warp, that will take in 2 image arrays, img1 and img2, and 2 arrays of coordinates, pts1 and pts2. It will utilize the triangles function defined before iterate through the triangles from the first array of coordinates, the crop function defined before to crop both images at coordinates corresponding to the triangle indices and use the cv2.warpAffine() method to warp the image at the current triangle of the iterations:
def warp(img1, img2, pts1, pts2):
img2 = img2.copy()
for indices in triangles(pts1):
img1_cropped, triangle1 = crop(img1, pts1[indices])
img2_cropped, triangle2 = crop(img2, pts2[indices])
transform = cv2.getAffineTransform(np.float32(triangle1), np.float32(triangle2))
img2_warped = cv2.warpAffine(img1_cropped, transform, img2_cropped.shape[:2][::-1], None, cv2.INTER_LINEAR, cv2.BORDER_REFLECT_101)
mask = np.zeros_like(img2_cropped)
cv2.fillConvexPoly(mask, np.int32(triangle2), (1, 1, 1), 16, 0)
img2_cropped *= 1 - mask
img2_cropped += img2_warped * mask
return img2
Read in your images. Note that I have resized the images to better fit my screen. If you remove the resizing part, you'll need to use the program below to re-adjust the points and get the corrected sets of points:
def resize(img, size):
h, w = img.shape[:2]
return cv2.resize(img, (int(w * size), int(h * size)))
img1 = resize(cv2.imread("dog.png"), 0.8)
img2 = resize(cv2.imread("bulb.png"), 0.8)
Finally, define the 2 sets of points; the first set outlining the first image, and the second one outlining the second. Use the warp function defined before to warp img1 to have its keypoints overlap with the kewpoints of img2 and show the resulting image:
pts1 = np.array([[0, 0], [286, 0], [286, 198], [174, 198], [158, 116], [0, 97]])
pts2 = np.array([[80, 37], [409, 42], [416, 390], [331, 384], [291, 119], [111, 311]])
cv2.imshow("result", warp(img1, img2, pts1, pts2))
cv2.waitKey(0)
cv2.destroyAllWindows()
Tools
Use the program below to manually drag the point onto each image, and see the warp effect in real-time. Of course, rather than manually doing this, you can detect the contours of the two images (make sure that they have the same number of points and are in the same order):
import cv2
import numpy as np
def triangles(points):
points = np.where(points, points, 1)
subdiv = cv2.Subdiv2D((*points.min(0), *points.max(0)))
for pt in points:
subdiv.insert(tuple(map(int, pt)))
for pts in subdiv.getTriangleList().reshape(-1, 3, 2):
yield [np.where(np.all(points == pt, 1))[0][0] for pt in pts]
def crop(img, pts):
x, y, w, h = cv2.boundingRect(pts)
img_cropped = img[y: y + h, x: x + w]
pts[:, 0] -= x
pts[:, 1] -= y
return img_cropped, pts
def warp(img1, img2, pts1, pts2):
img2 = img2.copy()
for indices in triangles(pts1):
img1_cropped, triangle1 = crop(img1, pts1[indices])
img2_cropped, triangle2 = crop(img2, pts2[indices])
transform = cv2.getAffineTransform(np.float32(triangle1), np.float32(triangle2))
img2_warped = cv2.warpAffine(img1_cropped, transform, img2_cropped.shape[:2][::-1], None, cv2.INTER_LINEAR, cv2.BORDER_REFLECT_101)
mask = np.zeros_like(img2_cropped)
cv2.fillConvexPoly(mask, np.int32(triangle2), (1, 1, 1), 16, 0)
img2_cropped *= 1 - mask
img2_cropped += img2_warped * mask
return img2
def draw_circle(event, x, y, flags, param):
pts = param
if event == cv2.EVENT_LBUTTONDOWN:
for pt in pts:
dist = (pt[0] - x) ** 2 + (pt[1] - y) ** 2
if dist < 225:
active_pt[:] = pt
elif event == cv2.EVENT_LBUTTONUP:
active_pt[:] = 0
elif event == cv2.EVENT_MOUSEMOVE:
if np.any(active_pt):
for pt in pts:
if np.all(active_pt == pt):
pt[:] = active_pt[:] = x, y
def draw_circles(img, pts):
img = img.copy()
for i, (x, y) in enumerate(pts):
cv2.circle(img, (x, y), 15, (0, 0, 255), -1)
cv2.putText(img, str(i), (x - 10, y + 10), cv2.FONT_HERSHEY_COMPLEX, 0.8, (0, 0, 0), 2)
return img
def resize(img, size):
h, w = img.shape[:2]
return cv2.resize(img, (int(w * size), int(h * size)))
img1 = resize(cv2.imread("dog.png"), 0.8)
img2 = resize(cv2.imread("bulb.png"), 0.8)
pts_count = 20
pts1 = np.arange(pts_count * 2).reshape((pts_count, 2))
pts2 = np.arange(pts_count * 2).reshape((pts_count, 2))
active_pt = np.array([0, 0])
cv2.namedWindow("image 1")
cv2.setMouseCallback('image 1', draw_circle, pts1)
cv2.namedWindow("image 2")
cv2.setMouseCallback('image 2', draw_circle, pts2)
pause = False
while True:
cv2.imshow('image 1', draw_circles(img1, pts1))
cv2.imshow('image 2', draw_circles(img2, pts2))
if not pause:
try:
cv2.imshow("result", warp(img1, img2, pts1, pts2))
except:
pass
key = cv2.waitKey(20)
if key & 0xFF == ord("q"):
break
if key & 0xFF == ord("p"):
pause = not pause
cv2.waitKey(0)
cv2.destroyAllWindows()
Here is a rough demonstration of how it works (sped up x4):
If the real-time warping is too slow on your computer, simply press the p key to pause the warp updating, and press it again to resume it.
I would like to photograph a LEGO building instruction from different perspectives and determine the perspective of the entire sheet based on the LEGO logo depicted on it.
As a first approach, I detected the square of the logo using a mask in the color space in order to then recognize the edges and use the edges to display the entire image from the correct perspective. (The goal later is to be able to read the ID reliably, regardless of the perspective from which the picture is taken.)
https://i.stack.imgur.com/U5ZkE.png
filterLegoLogo(image):
img = image.copy()
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
lower1 = np.array([0, 100, 20])
upper1 = np.array([10, 255, 255])
lower2 = np.array([170, 200, 20])
upper2 = np.array([179, 255, 255])
lower_mask = cv2.inRange(hsv, lower1, upper1)
upper_mask = cv2.inRange(hsv, lower2, upper2)
full_mask = lower_mask + upper_mask;
return full_mask
Output:
https://i.stack.imgur.com/SL2Op.png
What I want to achieve:
https://i.stack.imgur.com/1DmDj.png
Later I want to transform the recognized area and read out the ID.
I use this to transform the image:
image = cv2.imread('images/lego.png')
w, h, _ = np.shape(image)
pts1 = np.float32([[170, 337], [714, 379], [199, 600], [841, 621]])
# Size of transformed image
pts2 = np.float32([[200, 200], [500, 200], [200, 500], [500, 500]])
for val in pts1:
cv2.circle(image, (int(val[0]), int(val[1])), 5, (0, 255, 0), -1)
M = cv2.getPerspectiveTransform(pts1, pts2)
dst = cv2.warpPerspective(image, M, (h, w))
plt.imshow(dst)
plt.title('Transformed Image')
plt.show()
And as a result I get this:
https://i.stack.imgur.com/4473V.png
The points for pts1 are entered manually. I would like to automate edge detection.
Does anyone have an idea?
Thank you in advance!
Here is one way to do that in Python/OpenCV.
Read the input
Threshold using inRange()
Apply morphology to clean it up
Get the convex hull
Get the 4 corner points using approxPolyDP()
Specify the corresponding output corner points
Warp the image
Save the results
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('lego.png')
hh, ww = img.shape[:2]
# threshold on red color
lower=(90,100,170)
upper=(130,150,220)
thresh = cv2.inRange(img, lower, upper)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7))
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
# get convex hull
points = np.column_stack(np.where(morph.transpose() > 0))
hull = cv2.convexHull(points)
peri = cv2.arcLength(hull, True)
hullimg = img.copy()
hullimg = cv2.polylines(hullimg, [hull], True, (0,255,0), 1)
# get 4 corners
poly = cv2.approxPolyDP(hull, 0.01 * peri, True)
plist = poly.tolist()
print(plist)
# plist ordered cw from bottom right
cornerimg = img.copy()
cv2.polylines(cornerimg, [np.int32(poly)], True, (0,0,255), 1, cv2.LINE_AA)
# specify control point pairs
# order pts cw from bottom right
inpts = np.float32(poly)
outpts = np.float32([[500, 500], [200, 500], [200, 200], [500, 200]])
# warp image
M = cv2.getPerspectiveTransform(inpts, outpts)
warpimg = cv2.warpPerspective(img, M, (ww,hh))
# save results
cv2.imwrite("lego_thresh.png", thresh)
cv2.imwrite("lego_morph.png", morph)
cv2.imwrite("lego_convexhull.png", hullimg)
cv2.imwrite("lego_corners.png", cornerimg)
cv2.imwrite("lego_warped.png", warpimg)
cv2.imshow('thresh',thresh)
cv2.imshow('morph',morph)
cv2.imshow('hullimg',hullimg)
cv2.imshow('cornerimg',cornerimg)
cv2.imshow('warpimg',warpimg)
cv2.waitKey(0)
cv2.destroyAllWindows()
Threshold Image:
Morphology Cleaned Image:
Convex Hull Image:
4 Corner Image:
Warped Result:
The Mask image gives a good starting point. Extract the contours and find a way to filter out the interesting one (base on size, length, straightness or whatever criterion that works). Then find the point farthest from the center, and the point farthest from the first point (these define the longest diagonal). Next find the two points farthest form the diagonal, on both sides. Now you have the four interesting corners.
Im working on a school project, where I need create a semantic map from the camera picture. The picture is of an agricultural land. The goal is to create it without the help of CNN or learning methods, just classical methods of detection.
I searched for similar methods and algorithm and this is my code and the result, but the algorithm is not good detecting the edges of the various fields in the image.
from __future__ import print_function
import cv2 as cv
import numpy as np
import imutils
import argparse
import random as rng
rng.seed(12345)
src = cv.imread('field.jpg')
src = imutils.resize(src, width=600)
# Show source image
cv.imshow('Source Image', src)
src[np.all(src == 255, axis=2)] = 0
# Show output image
cv.imshow('Black Background Image', src)
kernel = np.array([[1, 1, 1], [1, -8, 1], [1, 1, 1]], dtype=np.float32)
# do the laplacian filtering as it is
# well, we need to convert everything in something more deeper then CV_8U
# because the kernel has some negative values,
# and we can expect in general to have a Laplacian image with negative values
# BUT a 8bits unsigned int (the one we are working with) can contain values from 0 to 255
# so the possible negative number will be truncated
imgLaplacian = cv.filter2D(src, cv.CV_32F, kernel)
sharp = np.float32(src)
imgResult = sharp - imgLaplacian
# convert back to 8bits gray scale
imgResult = np.clip(imgResult, 0, 255)
imgResult = imgResult.astype('uint8')
imgLaplacian = np.clip(imgLaplacian, 0, 255)
imgLaplacian = np.uint8(imgLaplacian)
#cv.imshow('Laplace Filtered Image', imgLaplacian)
cv.imshow('New Sharped Image', imgResult)
bw = cv.cvtColor(imgResult, cv.COLOR_BGR2GRAY)
_, bw = cv.threshold(bw, 125, 255, cv.THRESH_BINARY | cv.THRESH_OTSU)
cv.imshow('Binary Image', bw)
dist = cv.distanceTransform(bw, cv.DIST_L2, 3)
# Normalize the distance image for range = {0.0, 1.0}
# so we can visualize and threshold it
cv.normalize(dist, dist, 0, 1.0, cv.NORM_MINMAX)
cv.imshow('Distance Transform Image', dist)
_, dist = cv.threshold(dist, 0.4, 1.0, cv.THRESH_BINARY)
# Dilate a bit the dist image
kernel1 = np.ones((3,3), dtype=np.uint8)
dist = cv.dilate(dist, kernel1)
cv.imshow('Peaks', dist)
dist_8u = dist.astype('uint8')
# Find total markers
_, contours, _ = cv.findContours(dist_8u, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
# Create the marker image for the watershed algorithm
markers = np.zeros(dist.shape, dtype=np.int32)
# Draw the foreground markers
for i in range(len(contours)):
cv.drawContours(markers, contours, i, (i+1), -1)
# Draw the background marker
cv.circle(markers, (5,5), 3, (255,255,255), -1)
cv.imshow('Markers', markers*10000)
cv.watershed(imgResult, markers)
#mark = np.zeros(markers.shape, dtype=np.uint8)
mark = markers.astype('uint8')
mark = cv.bitwise_not(mark)
# uncomment this if you want to see how the mark
# image looks like at that point
#cv.imshow('Markers_v2', mark)
# Generate random colors
colors = []
for contour in contours:
colors.append((rng.randint(0,256), rng.randint(0,256), rng.randint(0,256)))
# Create the result image
dst = np.zeros((markers.shape[0], markers.shape[1], 3), dtype=np.uint8)
# Fill labeled objects with random colors
for i in range(markers.shape[0]):
for j in range(markers.shape[1]):
index = markers[i,j]
if index > 0 and index <= len(contours):
dst[i,j,:] = colors[index-1]
# Visualize the final image
cv.imshow('Final Result', dst)
cv.waitKey()
``
[1]: https://i.stack.imgur.com/qpE4s.jpg