Pick random pixels' centroids within raster area - Python+gdal - python

I have a raster file in WGS84 projection and I am trying to get the coordinates of random pixels within the raster GeoTIFF area down left in picture. At first, I calculate the coordinates of each pixel's centroid (in WGS84 again), then I pick 100 random of them and export them to a csv.
Problem: I expect points to be within the raster area (down left in picture) but they are way off of it. Is it a projection error or coordinates miscalculation? What is wrong in my code?
Here is the code
# Get coordinates for each pixel centroid
geotiff = gdal.Open(path)
gt = geotiff.GetGeoTransform()
column_numbers, row_numbers, band_numbers = geotiff.RasterXSize, geotiff.RasterYSize, geotiff.RasterCount
minx = gt[0]
miny = gt[3] + column_numbers*gt[4] + row_numbers*gt[5]
maxx = gt[0] + column_numbers*gt[1] + row_numbers*gt[2]
maxy = gt[3]
pixelWidth = gt[1]
pixelHeight = -gt[5]
lonPxSz = (maxy - miny) / row_numbers
latPxSz = (maxx - minx) / column_numbers
total = np.array(geotiff.ReadAsArray())
res = []
for i in range(row_numbers):
for j in range(column_numbers):
res.append([[i,j]] + [data[i][j] for data in total])
coords = pd.DataFrame(res, columns=['Pair', 'Col1', 'Col2', 'Col3', 'Col4', 'Col5', 'Col6'])
coords[['Lat', 'Lon']] = pd.DataFrame(coords['Pair'].tolist(), index=coords.index)
coords["Lat"] = (coords["Lat"] + 0.5) * 10 * latPxSz + miny
coords["Lon"] = (coords["Lon"] + 0.5) * 10 * lonPxSz + minx
coords = coords.sample(n = 100)
coords[['Lat', 'Lon']].to_csv("coords.csv", sep=";")

If you only want to pick 100 random points on the image:
from osgeo import gdal
import numpy as np
import pandas as pd
import random
path = "image.tif"
geotiff = gdal.Open(path)
gt = geotiff.GetGeoTransform()
column_numbers, row_numbers, band_numbers = geotiff.RasterXSize, geotiff.RasterYSize, geotiff.RasterCount
minx = gt[0]
miny = gt[3] + column_numbers * gt[4] + row_numbers * gt[5]
maxx = gt[0] + column_numbers * gt[1] + row_numbers * gt[2]
maxy = gt[3]
pixelWidth = gt[1]
pixelHeight = -gt[5]
halfPixelWidth = pixelWidth / 2
halfPixelHeight = pixelHeight / 2
rand_point_x = random.sample([i for i in range(column_numbers)], 100)
rand_point_y = random.sample([i for i in range(row_numbers)], 100)
rand_points = np.vstack((rand_point_y, rand_point_x)).T
coords = pd.DataFrame(rand_points, columns=['Lat', 'Lon'])
coords["Lat"] = miny + (coords["Lat"] * pixelHeight) + halfPixelHeight
coords["Lon"] = minx + (coords["Lon"] * pixelWidth) + halfPixelWidth
coords.to_csv("coords.csv", sep=',')
You may use the coordinates of these random points to retrieve pixel values afterward.

You can try using image processing techniques to get the coordinates of the raster. For example, here is how it can be done using the cv2 (OpenCV) library (purpose of each function commented in code):
import cv2
import numpy as np
def process(img): # Function to process image for optimal contour detection
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(img_gray, (5, 5), 1)
img_canny = cv2.Canny(img_blur, 350, 150)
kernel = np.ones((3, 3))
img_dilate = cv2.dilate(img_canny, kernel, iterations=1)
return cv2.erode(img_dilate, kernel, iterations=1)
def get_raster(img): # Function that uses process function to detect contour of raster
contours, _ = cv2.findContours(process(img), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnt = max(contours, key=cv2.contourArea)
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.05 * peri, True)
return cv2.boundingRect(approx)
def get_random(img, num=100): # Function that uses get_raster to get random points within raster
x, y, w, h = get_raster(img)
return np.vstack((np.random.randint(x, x + w, num),
np.random.randint(y, y + h, num))).T
img = cv2.imread("map.png") # Read in image
pts = get_random(img) # Get random points witin raster
cv2.drawContours(img, pts[:, None], -1, (0, 255, 0), 2) # Draw points onto image
cv2.imshow("Image", img)
cv2.waitKey(0)
Output:
As you can see, randomly positioned green points have been drawn onto the image with in raster projection. If you only need the coordinates of the raster, you can just do x, y, w, h = get_raster(img).

Related

Correct the object orientation in the image. Calculate the correct angle of rotation and correct the alignment of the object in the image

I have cropped images of electronic meter reading. Those readings are taken in random style. I need the orientation of the object(not the image) in the image to be aligned.
The detection of contours is not working. As there are lots of contours are formed in the image and in order to calculate the angle I need to select the right contour. Some times contour is not formed.
2.I want set of rotated images as shown in figure above. I tried some code of rotating image from the OpenCV. But due to two type of use case ( as we don't know from code that the reading style may be any of the two) The images are turned out as below.
Using the code below I am able to find the angle of rotation but for any one case. I need it to be done automatically for both type of cases. Also see the data set I have attached for other type of examples.
import cv2
import numpy as np
debug = True
# Display image
def display(img, frameName="OpenCV Image"):
if not debug:
return
h, w = img.shape[0:2]
neww = 800
newh = int(neww*(h/w))
img = cv2.resize(img, (neww, newh))
plt.imshow(img)
plt.show()
# cv2.imshow(frameName, img)
# cv2.waitKey(0)
#rotate the image with given theta value
def rotate(img, theta):
rows, cols = img.shape[0], img.shape[1]
image_center = (cols/2, rows/2)
M = cv2.getRotationMatrix2D(image_center,theta,1)
abs_cos = abs(M[0,0])
abs_sin = abs(M[0,1])
bound_w = int(rows * abs_sin + cols * abs_cos)
bound_h = int(rows * abs_cos + cols * abs_sin)
M[0, 2] += bound_w/2 - image_center[0]
M[1, 2] += bound_h/2 - image_center[1]
# rotate orignal image to show transformation
rotated = cv2.warpAffine(img,M,(bound_w,bound_h),borderValue=(255,255,255))
return rotated
def slope(x1, y1, x2, y2):
if x1 == x2:
return 0
slope = (y2-y1)/(x2-x1)
theta = np.rad2deg(np.arctan(slope))
return theta
def main(filePath):
img = cv2.imread(filePath)
(hi, wi) = img.shape[:2]
textImg = img.copy()
small = cv2.cvtColor(textImg, cv2.COLOR_BGR2GRAY)
# find the gradient map
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
display(grad)
# Binarize the gradient image
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
display(bw)
# connect horizontally oriented regions
# kernal value (9,1) can be changed to improved the text detection
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
display(connected)
# using RETR_EXTERNAL instead of RETR_CCOMP
# _ , contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) #opencv >= 4.0
mask = np.zeros(bw.shape, dtype=np.uint8)
display(mask)
# cumulative theta value
cummTheta = 0
# number of detected text regions
ct = 0
flag=False
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y+h, x:x+w] = 0
# fill the contour
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
display(mask)
# ratio of non-zero pixels in the filled region
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
# assume at least 45% of the area is filled if it contains text
# if r > 0.39 and w > 8 and h > 8:
if (h/hi)>0.4 and (w/wi)>0.4:
flag=True
print(r,w,h)
# cv2.rectangle(textImg, (x1, y), (x+w-1, y+h-1), (0, 255, 0), 2)
rect = cv2.minAreaRect(contours[idx])
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(textImg,[box],0,(0,0,255),2)
center = (int(rect[0][0]),int(rect[0][1]))
width = int(rect[1][0])
height = int(rect[1][1])
angle = int(rect[2])
print(angle)
print(width,height)
if width < height:
angle = 90+angle
print(angle,'final')
# we can filter theta as outlier based on other theta values
# this will help in excluding the rare text region with different orientation from ususla value
theta = slope(box[0][0], box[0][1], box[1][0], box[1][1])
cummTheta += theta
ct +=1
# print("Theta", theta)
# find the average of all cumulative theta value
# orientation = cummTheta/ct
print("Image orientation in degress: ", angle)
finalImage = rotate(img, angle)
display(textImg, "Detectd Text minimum bounding box")
display(finalImage)
out_path='cropped_corrected/rotated/'+filePath.split('\\')[-1]
print(out_path)
cv2.imwrite(out_path,finalImage)
print('image svaed here in rotated')
break
if not flag:
out_path='cropped_corrected/not_rotated/'+filePath.split('\\')[-1]
print(out_path)
cv2.imwrite(out_path,img)
print('image svaed here without rotated')
if __name__ == "__main__":
filePath = 'cropped/N3963001963.jpg'
main(filePath)
I am attaching some sample images that need to be rotated and the object inside the image needs to be aligned:

Compute angle between two objects in an image

I have some specific images of two objects (a phone and a TV remote) and I want to calculate the angle between two edges that intersect of these. I used Canny to detect the edges and Hough line for the angle, but the hough_line() function found too many angles that doesnt match the requirement.
Original image:
This is the requirement:
And this is which I made:
My code:
import cv2
from skimage.transform import hough_line, hough_line_peaks
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
def edge_detection(img, blur_ksize=5, threshold1=100, threshold2=200):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_gaussian = cv2.GaussianBlur(gray, (blur_ksize, blur_ksize), 0)
img_canny = cv2.Canny(img_gaussian, threshold1, threshold2)
return img_canny
image = edge_detection(cv2.imread('img1.png'))
h, theta, d = hough_line(image)
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
ax = axes.ravel()
ax[0].imshow(image)
ax[0].set_title('Input image')
ax[0].set_axis_off()
ax[1].imshow(image, cmap=cm.gray)
for _, angle, dist in zip(*hough_line_peaks(h, theta, d)):
y0 = (dist - 0 * np.cos(angle)) / np.sin(angle)
y1 = (dist - image.shape[1] * np.cos(angle)) / np.sin(angle)
ax[1].plot((0, image.shape[1]), (y0, y1), '-r')
ax[1].set_xlim((0, image.shape[1]))
ax[1].set_ylim((image.shape[0], 0))
ax[1].set_axis_off()
ax[1].set_title('Detected lines')
plt.tight_layout()
plt.show()
angle = []
dist = []
for _, a , d in zip(*hough_line_peaks(h, theta, d)):
angle.append(a)
dist.append(d)
angle = [a*180/np.pi for a in angle]
print(angle)
Are there any ways to detect and calculate exactly one angle I need in opencv? Thanks a lot
Update
I tried different values of blur_ksize, threshold1 and threshold2 in Canny detection, it's seem like I could remove redundant lines, but now the angles those hough_line_peaks() return are negative. Can anyone explain this for me? And I also want to put the angle values to the peaks in plot, to see which angle has which value
here is a sample solution, but I don't know whether it works for all images. You have to tune the hough transform parameters.
import cv2
import numpy as np
import matplotlib.pyplot as plt
def edge_detection(img, blur_ksize=5, threshold1=70, threshold2=200):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_gaussian = cv2.GaussianBlur(gray, (blur_ksize, blur_ksize), 0)
img_canny = cv2.Canny(img_gaussian, threshold1, threshold2)
return img_canny
img = cv2.imread('stack.png')
image = edge_detection(img)
minLineLength = 300
maxLineGap = 80
lines = cv2.HoughLinesP(image,1,np.pi/180,50,minLineLength,maxLineGap)
equations = []
for line in lines:
x1,y1,x2,y2 = line[0]
equations.append(np.cross([x1,y1,1],[x2,y2,1]))
cv2.line(img,(x1,y1),(x2,y2),(255,0,0),2)
font = cv2.FONT_HERSHEY_SIMPLEX
thetas = []
N = len(equations)
for ii in range(1,N):
a1,b1,c1 = equations[0]
a2,b2,c2 = equations[ii]
# intersection point
pt = np.cross([a1,b1,c1],[a2,b2,c2])
pt = np.int16(pt/pt[-1])
# angle between two lines
num = a1*b2 - b1*a2
den = a1*a2 + b1*b2
if den != 0:
theta = abs(np.arctan(num/den))*180/3.1416
# show angle and intersection point
cv2.circle(img, (pt[0],pt[1]), 5, (255,0,0), -1)
cv2.putText(img, str(round(theta, 1)), (pt[0]-20,pt[1]-20), font, 0.8, (255,0,0), 2, 0)
thetas.append(theta)
plt.imshow(img)
plt.show()

How to detect Sudoku grid board in OpenCV

I'm working on a personal project using opencv in python. Want to detect a sudoku grid.
The original image is:
So far I have created this:
Then tried to select a big blob. Result may be similar to this:
I got a black image as result:
The code is:
import cv2
import numpy as np
def find_biggest_blob(outerBox):
max = -1
maxPt = (0, 0)
h, w = outerBox.shape[:2]
mask = np.zeros((h + 2, w + 2), np.uint8)
for y in range(0, h):
for x in range(0, w):
if outerBox[y, x] >= 128:
area = cv2.floodFill(outerBox, mask, (x, y), (0, 0, 64))
#cv2.floodFill(outerBox, mask, maxPt, (255, 255, 255))
image_path = 'Images/Results/sudoku-find-biggest-blob.jpg'
cv2.imwrite(image_path, outerBox)
cv2.imshow(image_path, outerBox)
def main():
image = cv2.imread('Images/Test/sudoku-grid-detection.jpg', 0)
find_biggest_blob(image)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
The code in repl is: https://repl.it/#gmunumel/SudokuSolver
Any idea?
Here's an approach:
Convert image to grayscale and median blur to smooth image
Adaptive threshold to obtain binary image
Find contours and filter for largest contour
Perform perspective transform to obtain top-down view
After converting to grayscale and median blurring, we adaptive threshold to obtain a binary image
Next we find contours and filter using contour area. Here's the detected board
Now to get a top-down view of the image, we perform a perspective transform. Here's the result
import cv2
import numpy as np
def perspective_transform(image, corners):
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,3)
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
transformed = perspective_transform(original, approx)
break
cv2.imshow('transformed', transformed)
cv2.imwrite('board.png', transformed)
cv2.waitKey()
Here is my solution that will generalize to any image whether it is warped or not.
Convert the image to grayscale
Apply adaptive thresholding to convert the image to binary
(Adaptive thresholding works better than normal thresholding because the original image can have different lighting at different areas)
Identify the Corners of the large square
Perspective transform of the image to the final square image
Depending on the amount of skewness of the original image the corners identified may be out of order, do we need to arrange them in the correct order. the method used here is to identify the centroid of the large square and identify the order of the corners from there
Here is the code:
import cv2
import numpy as np
# Helper functions for getting square image
def euclidian_distance(point1, point2):
# Calcuates the euclidian distance between the point1 and point2
#used to calculate the length of the four sides of the square
distance = np.sqrt((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2)
return distance
def order_corner_points(corners):
# The points obtained from contours may not be in order because of the skewness of the image, or
# because of the camera angle. This function returns a list of corners in the right order
sort_corners = [(corner[0][0], corner[0][1]) for corner in corners]
sort_corners = [list(ele) for ele in sort_corners]
x, y = [], []
for i in range(len(sort_corners[:])):
x.append(sort_corners[i][0])
y.append(sort_corners[i][1])
centroid = [sum(x) / len(x), sum(y) / len(y)]
for _, item in enumerate(sort_corners):
if item[0] < centroid[0]:
if item[1] < centroid[1]:
top_left = item
else:
bottom_left = item
elif item[0] > centroid[0]:
if item[1] < centroid[1]:
top_right = item
else:
bottom_right = item
ordered_corners = [top_left, top_right, bottom_right, bottom_left]
return np.array(ordered_corners, dtype="float32")
def image_preprocessing(image, corners):
# This function undertakes all the preprocessing of the image and return
ordered_corners = order_corner_points(corners)
print("ordered corners: ", ordered_corners)
top_left, top_right, bottom_right, bottom_left = ordered_corners
# Determine the widths and heights ( Top and bottom ) of the image and find the max of them for transform
width1 = euclidian_distance(bottom_right, bottom_left)
width2 = euclidian_distance(top_right, top_left)
height1 = euclidian_distance(top_right, bottom_right)
height2 = euclidian_distance(top_left, bottom_right)
width = max(int(width1), int(width2))
height = max(int(height1), int(height2))
# To find the matrix for warp perspective function we need dimensions and matrix parameters
dimensions = np.array([[0, 0], [width, 0], [width, width],
[0, width]], dtype="float32")
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
transformed_image = cv2.warpPerspective(image, matrix, (width, width))
#Now, chances are, you may want to return your image into a specific size. If not, you may ignore the following line
transformed_image = cv2.resize(transformed_image, (252, 252), interpolation=cv2.INTER_AREA)
return transformed_image
# main function
def get_square_box_from_image(image):
# This function returns the top-down view of the puzzle in grayscale.
#
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 3)
adaptive_threshold = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 3)
corners = cv2.findContours(adaptive_threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
corners = corners[0] if len(corners) == 2 else corners[1]
corners = sorted(corners, key=cv2.contourArea, reverse=True)
for corner in corners:
length = cv2.arcLength(corner, True)
approx = cv2.approxPolyDP(corner, 0.015 * length, True)
print(approx)
puzzle_image = image_preprocessing(image, approx)
break
return puzzle_image
# Call the get_square_box_from_image method on any sudoku image to get the top view of the puzzle
original = cv2.imread("large_puzzle.jpg")
sudoku = get_square_box_from_image(original)
Here are the results from the given image and a custom example

How to detect and align tilted images after cropping

I have implemented a cropping algorithm on my solution that works pretty good. The problem is when the image is tilted, the crop will work but it will have background space showing as the images will show.
Cropping flow:
First step:
Second step:
Final result:
I have searched/tried multiple solutions but could not get a decent result or I'm not thinking the right way.
The expected result is this:
EDIT [FINAL RESULT]:
import cv2
import numpy as np
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
def get_image_width_height(image):
image_width = image.shape[1] # current image's width
image_height = image.shape[0] # current image's height
return image_width, image_height
def calculate_scaled_dimension(scale, image):
image_width, image_height = get_image_width_height(image)
ratio_of_new_with_to_old = scale / image_width
dimension = (scale, int(image_height * ratio_of_new_with_to_old))
return dimension
def scale_image(image, size):
image_resized_scaled = cv2.resize(
image,
calculate_scaled_dimension(
size,
image
),
interpolation=cv2.INTER_AREA
)
return image_resized_scaled
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
image = cv2.imread('images/damina_cc_back.jpg')
original_image = image.copy()
image = scale_image(image, 500)
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screen_cnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screen_cnt = approx
transformed = perspective_transform(image, screen_cnt)
break
# Draw ROI
cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 1)
(h, w) = transformed.shape[:2]
if (h > w):
rotated = rotate_image(transformed, 90)
else:
rotated = transformed
cv2.imshow("image", original_image)
cv2.imshow("ROI", image)
cv2.imshow("transformed", transformed)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)
To align your image after cropping, we can we can use a perspective transformation. To begin, we separate the four corners of the rectangle into individual points given to us by cv2.approxPolyDP(). We reorder the points into a clockwise orientation (top-left, top-right, bottom-right, bottom-left) using this function:
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
This function gives us the the bounding box coordinates of the ROI
Now with the isolated corner points, we can obtain the transformation matrix using cv2.getPerspectiveTransform() and actually obtain the transformed image using cv2.warpPerspective().
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
Here's the result
We can rotate the image with this function
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
The final result after rotating:
Full code
import cv2
import numpy as np
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height))
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
image = cv2.imread('1.PNG')
original_image = image.copy()
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screen_cnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screen_cnt = approx
transformed = perspective_transform(original_image, screen_cnt)
break
# Draw ROI
cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 3)
# Rotate image
rotated = rotate_image(transformed, -90)
cv2.imshow("image", original_image)
cv2.imshow("ROI", image)
cv2.imshow("transformed", transformed)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)
I assume you're looking for the minimum and maximum u and v positions where an edge was found (or maybe certain quantiles) to find the cropped rectangle. That is go over all image pixels that were marked an edge and update the u/v/ min/max values.
If if the computation time is not an issue for you, you can simply keep the algorithm as is and additionally loop over a number of rotations and update special values for each. Pseudocode:
for v
for u
if (u,v) is edge
for rotation_matrix
(ur, vr) = rotation_matrix * (u,v)
update boundary for given rotation matrix
In the end you can select the bounding box for the rotation matrix that is the smallest.
If the above algorithm is too slow for your use case, you can also try extracting the major axes using the opencv HoughLinesP function. This will of course not work for all types of images, but may be good enough for the case of id cards.
Finally, to apply the rotation correction refer to this tutorial.

how to write the script for image corner degree calculation?

I have over 100 images that each of them is in different angle. I have written an basic python code to rotate each of the image one by one from any angle to zero degree ( I mean making them flat). The python code that I have attached is a simple code and unfortunately it doesn't automatically find the angle and it does not make it exact zero. Any time for any of the image I should find the angle and run the code for many time to make it zero (sometimes I am not able to make it exactly flat or what we celled zero degree). According to the images that I have attached, image1 is one of the sample image as an input and image_2 is the rotated image that I want to have at the end as an output.
I would like to ask anyone that can help me to modify the current code or provide me with the new python code (which I prefer the new code)that I can make my images rotate from any angle to zero degree.
please feel free to ask me for more explanation if you want.
my opencv-python code is:
import cv2
import numpy as np
img = cv2.imread('image1.tif')
num_rows, num_cols = img.shape[:2]
rotation_matrix = cv2.getRotationMatrix2D((num_cols/2, num_rows/2),69.4, 1)
img_rotation = cv2.warpAffine(img, rotation_matrix, (num_cols, num_rows))
cv2.imshow('Rotation', img_rotation)
cv2.imwrite('image_2.tif',img_rotation)
cv2.waitKey()
Note: input and output images deleted.
it's definitely not the most robust method, but perhaps an option would be to:
assume that the boundary is all black
identify the top-most (x0,y0) / right-most (x1,y1) corners of the image
calculate the angle of rotation as alpha = math.atan2(x1-x0,y1-y0)
I downloaded your figure (it was converted to png on imgur) and tested the procedure with:
#!/usr/bin/env python
import cv2
import math
import numpy as np
img = cv2.imread('test.png')
H, W = img.shape[:2]
x0,y0 = None,None
x1,y1 = None,None
#scan all rows starting with the first
for i in range(0, H):
row = img[i].sum(axis=1)
s = np.sum(row)
if s:
#if there is at least one non-black pixel, mark
#its position
x0 = np.max(np.where(row>0))
y0 = i
break
#scan all columns starting with the right-most one
for j in range(W-1,-1,-1):
col = img[:,j,:].sum(axis=1)
s = np.sum(col)
if s:
#mark the position of the first non-black pixel
x1 = j
y1 = np.min(np.where(col>0))
break
dx = x1 - x0
dy = y1 - y0
alpha = math.atan2(dx, dy) / math.pi * 180
rotation_matrix = cv2.getRotationMatrix2D((W/2, H/2), -alpha, 1)
img_rotation = cv2.warpAffine(img, rotation_matrix, (W, H))
cv2.imwrite('image_2.tif',img_rotation)
EDIT:
The previous method can be inaccurate in case the "corner" pixel is black as well so that the calculated angle is then biased. Slightly more accurate approach could be as follows:
determine the "upper" boundary of the rectangle (i.e., the coordinates of the pixels defining the edges)
take the edge the projection of which on the x-axis is longer
fit the coordinates in order to calculate the slope of the line defining the edge
The implementation:
#!/usr/bin/env python
import cv2
import math
import numpy as np
img = cv2.imread('test.png')
H, W = img.shape[:2]
data = []
for j in range(0, W):
col = img[:,j,:].sum(axis=1)
s = np.sum(col)
if not s:
continue
for i in range(0, H):
if col[i] > 0:
data.append((j, i))
break
y_min, min_pos = None, None
for idx, (x, y) in enumerate(data):
if y_min is None or y < y_min:
y_min = y
min_pos = idx
N = len(data)
if min_pos > N - min_pos:
data = data[:min_pos]
else:
data = data[min_pos:]
data = np.asarray(data).T
coeffs = np.polyfit(data[0], data[1], 1)
alpha = math.atan(coeffs[0]) / math.pi * 180
print(alpha)
rotation_matrix = cv2.getRotationMatrix2D((W/2, H/2), alpha, 1)
img_rotation = cv2.warpAffine(img, rotation_matrix, (W, H))
cv2.imwrite('image_2.tif',img_rotation)
another way to find the angle is (asuming that the image is over a black background):
Convert the image to grayscale
Segmentate the image using a threshold
Find the contours of the image
Find the parameters of the ellipse that fit the contour
import cv2
import numpy as np
image = cv2.imread("DlYEa.png")
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
num_rows, num_cols = image.shape[:2]
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)
img, contours, hier = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnt = contours[0]
(x, y), (Ma, ma), angle = cv2.fitEllipse(cnt)
angle = int(angle - 90)
rotation_matrix = cv2.getRotationMatrix2D((num_cols/2, num_rows/2), angle, 1)
img_rotation = cv2.warpAffine(image, rotation_matrix, (num_cols, num_rows))
cv2.imshow("rotation", img_rotation)
cv2.waitKey()
cv2.destroyAllWindows()

Categories

Resources