Perspective Transform OMR Sheet - OpenCV Python - python

I am trying to make a perspective transformation with OpenCV in python. I want to align the image and find the coordinates of top left top right and bottom left and bottom right contours. here is my code so far where I can identify all the contours.
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(3,3),0)
edges = cv2.Canny(blur,50,100)
contours, hierarchy = cv2.findContours(edges,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) #find contours
cv2.drawContours(img,contours,-1,(0,255,0),2)
cv2.imshow('Contours',img)
cv2.waitKey(0)
Here is an image of what I get.
I'd be very thankful if you could try to help me solve this problem!
Original Image
Demo Inputs:
Demo Inputs
Demo Input 1:
Demo Input 1
Demo Input 2:
Demo Input 2
Demo Input 3:
Demo Input 3
Demo Input 4:
Demo Input 4
Desired Output:
Desired Output

Here is one way to do that in Python/OpenCV.
Read the input
Read the template (for its dimension)
Convert the input to gray and threshold
Pad the threshold (to preserve the corners when apply morphology)
Apply morphology close
Remove the padding
Get the largest external contour
Get its perimeter and approximate to 4 corners as the input corners for the warping
Get the output corners for the warping from the dimensions of the template
Get the perspective transformation matrix
Warp the input to match the template
Save the results
Input:
Template:
import cv2
import numpy as np
# read image
img = cv2.imread("omr_test.jpg")
hh, ww = img.shape[:2]
# read template
template = cv2.imread("omr_template.jpg")
ht, wd = template.shape[:2]
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# do otsu threshold on gray image
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# pad thresh with black to preserve corners when apply morphology
pad = cv2.copyMakeBorder(thresh, 20, 20, 20, 20, borderType=cv2.BORDER_CONSTANT, value=0)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
morph = cv2.morphologyEx(pad, cv2.MORPH_CLOSE, kernel)
# remove padding
morph = morph[20:hh+20, 20:ww+20]
# get largest external contour
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
big_contour = max(contours, key=cv2.contourArea)
# get perimeter and approximate a polygon
peri = cv2.arcLength(big_contour, True)
corners = cv2.approxPolyDP(big_contour, 0.04 * peri, True)
# draw polygon on input image from detected corners
polygon = img.copy()
cv2.polylines(polygon, [corners], True, (0,255,0), 2, cv2.LINE_AA)
# Alternate: cv2.drawContours(page,[corners],0,(0,0,255),1)
# print the number of found corners and the corner coordinates
# They seem to be listed counter-clockwise from the top most corner
print(len(corners))
print(corners)
# reformat input corners to x,y list
icorners = []
for corner in corners:
pt = [ corner[0][0],corner[0][1] ]
icorners.append(pt)
icorners = np.float32(icorners)
# get corresponding output corners form width and height
ocorners = [ [0,0], [0,ht], [wd,ht], [wd,0] ]
ocorners = np.float32(ocorners)
# get perspective tranformation matrix
M = cv2.getPerspectiveTransform(icorners, ocorners)
# do perspective
warped = cv2.warpPerspective(img, M, (wd, ht))
# write results
cv2.imwrite("omr_test_thresh.jpg", thresh)
cv2.imwrite("omr_test_morph.jpg", morph)
cv2.imwrite("omr_test_polygon.jpg", polygon)
cv2.imwrite("omr_test_warped.jpg", warped)
# display it
cv2.imshow("thresh", thresh)
cv2.imshow("pad", pad)
cv2.imshow("morph", morph)
cv2.imshow("polygon", polygon)
cv2.imshow("warped", warped)
cv2.waitKey(0)
Threshold Image:
Morphology Image:
Polygon Image:
Warped Input:

Here is a correction that solves whether the input image is rotated cw or ccw in Python/OpenCV.
Read the input
Read the template (for its dimensions)
Convert the input to gray and threshold
Pad the threshold (to preserve the corners when applying morphology)
Apply morphology close
Remove the padding
Get the largest external contour
Get its perimeter and approximate to 4 corners as the input corners for the warping
Sort the corners by Y and test the first two sorted corners for X. If diff=X2-X1 is negative, then the output corners need to be list in a slightly differently order.
Get the output corners for the warping from the dimensions of the template and the diff of the sorted corners.
Get the perspective transformation matrix
Warp the input to match the template
Save the results
Input 1 (cw rotated):
:
Input 2 (ccw rotated):
import cv2
import numpy as np
# read image
#img = cv2.imread("omr_test.jpg")
img = cv2.imread("omr_test2.jpg")
hh, ww = img.shape[:2]
# read template
template = cv2.imread("omr_template.jpg")
ht, wd = template.shape[:2]
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# do otsu threshold on gray image
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# pad thresh with black to preserve corners when apply morphology
pad = cv2.copyMakeBorder(thresh, 20, 20, 20, 20, borderType=cv2.BORDER_CONSTANT, value=0)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15,15))
morph = cv2.morphologyEx(pad, cv2.MORPH_CLOSE, kernel)
# remove padding
morph = morph[20:hh+20, 20:ww+20]
# get largest external contour
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
big_contour = max(contours, key=cv2.contourArea)
# get perimeter and approximate a polygon
peri = cv2.arcLength(big_contour, True)
corners = cv2.approxPolyDP(big_contour, 0.04 * peri, True)
# draw polygon on input image from detected corners
polygon = img.copy()
cv2.polylines(polygon, [corners], True, (0,255,0), 2, cv2.LINE_AA)
# print the number of found corners and the corner coordinates
# They seem to be listed counter-clockwise from the top most corner
print(len(corners))
print(corners)
# reformat input corners to x,y list
sortcorners = []
for corner in corners:
pt = [ corner[0][0],corner[0][1] ]
sortcorners.append(pt)
icorners = np.float32(sortcorners)
# sort corners on y
def takeSecond(elem):
return elem[1]
sortcorners.sort(key=takeSecond)
# check if second corner x is left or right of first corner x
x1 = sortcorners[0][0]
x2 = sortcorners[1][0]
diff = x2 - x1
print(x1, x2)
# get corresponding output corners from width and height
if diff >= 0:
ocorners = [ [0,0], [0,ht], [wd,ht], [wd,0] ]
else:
ocorners = [ [wd,0], [0,0], [0,ht], [wd,ht]]
ocorners = np.float32(ocorners)
# get perspective tranformation matrix
M = cv2.getPerspectiveTransform(icorners, ocorners)
# do perspective
warped = cv2.warpPerspective(img, M, (wd, ht))
# write results
cv2.imwrite("omr_test2_thresh.jpg", thresh)
cv2.imwrite("omr_test2_morph.jpg", morph)
cv2.imwrite("omr_test2_polygon.jpg", polygon)
cv2.imwrite("omr_test2_warped.jpg", warped)
# display it
cv2.imshow("thresh", thresh)
cv2.imshow("pad", pad)
cv2.imshow("morph", morph)
cv2.imshow("polygon", polygon)
cv2.imshow("warped", warped)
cv2.waitKey(0)
Result for first input (cw rotated):
Result for second input (ccw rotated):

Related

Crop is not working for this image using OpenCV

i need to crop the below image by detecting co-ordinates of the image using opencv.
i tried below code but it's not working as expected.
import cv2
#reading image
image = cv2.imread("input.PNG")
#converting to gray scale
gray=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
#applying canny edge detection
edged = cv2.Canny(image, 10, 250)
#finding contours
(_, cnts, _) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
idx = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
if w>50 and h>50:
idx+=1
new_img=image[y:y+h,x:x+w]
#cropping images
cv2.imwrite("cropped/"+str(idx) + '.png', new_img)
#cv2.imshow("Original Image",image)
#cv2.imshow("Canny Edge",edged)
#cv2.waitKey(0)
Input Image:
Output image:
Cropping the image based on coordinates returned from cv2.boundingRect() will not give you the desired result. You need to obtain the 4 corners of the image and orient it accordingly. This can be done using the perspective transformation matrix
Also, detecting edges and later finding contours is not the right way to go. You need to find a contour large enough to enclose the entire page. To do so, binarize the image and find the largest external contour.
Code:
# read image and binarize
img = cv2.imread(r'C:\Users\524316\Desktop\Stack\dc.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
th = cv2.threshold(gray,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# find the largest contour
contours, hierarchy = cv2.findContours(th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
c = max(contours, key = cv2.contourArea)
# To find the 4 corners of the contour
rect = cv2.minAreaRect(c)
input_corners = cv2.boxPoints(rect)
input_corners = np.int0(input_corners)
# We need 4 new points onto which the 4 input points need to be warped
# which will be done on an image with the same size as the input image
ht, wd = img.shape[:2]
output_corners = [[0,0], [wd,0], [wd,ht], [0,ht]]
# converting to float data type
input_corners = np.float32(input_corners)
output_corners = np.float32(output_corners)
# get the transformation matrix
M = cv2.getPerspectiveTransform(input_corners, output_corners)
# perform warping
warped = cv2.warpPerspective(img, M, (wd, ht))
cv2.imshow('Warped output', warped)
Result:

Get the location of all contours present in image using opencv, but skipping text

I want to retrieve all contours of the image below, but ignore text.
Image:
When I try to find the contours of the current image I get the following:
I have no idea how to go about this as I am new to using OpenCV and image processing. I want to get ignore the text, how can I achieve this? If ignoring is not possible but making a single bounding box surrounding the text is, than that would be good too.
Edit:
Criteria that I need to match:
The contours may very in size and shape.
The colors from the image may differ.
The colors and size of the text inside the image may differ.
Here is one way to do that in Python/OpenCV.
Read the input
Convert to grayscale
Get Canny edges
Apply morphology close to ensure they are closed
Get all contour hierarchy
Filter contours to keep only those above threshold in perimeter
Draw contours on input
Draw each contour on a black background
Save results
Input:
import numpy as np
import cv2
# read input
img = cv2.imread('short_title.png')
# convert to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# get canny edges
edges = cv2.Canny(gray, 1, 50)
# apply morphology close to ensure they are closed
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
# get contours
contours = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
contours = contours[0] if len(contours) == 2 else contours[1]
# filter contours to keep only large ones
result = img.copy()
i = 1
for c in contours:
perimeter = cv2.arcLength(c, True)
if perimeter > 500:
cv2.drawContours(result, c, -1, (0,0,255), 1)
contour_img = np.zeros_like(img, dtype=np.uint8)
cv2.drawContours(contour_img, c, -1, (0,0,255), 1)
cv2.imwrite("short_title_contour_{0}.jpg".format(i),contour_img)
i = i + 1
# save results
cv2.imwrite("short_title_gray.jpg", gray)
cv2.imwrite("short_title_edges.jpg", edges)
cv2.imwrite("short_title_contours.jpg", result)
# show images
cv2.imshow("gray", gray)
cv2.imshow("edges", edges)
cv2.imshow("result", result)
cv2.waitKey(0)
Grayscale:
Edges:
All contours on input:
Contour 1:
Contour 2:
Contour 3:
Contour 4:
Here are two options for erasing the text:
Using pytesseract OCR.
Finding white (and small) connected components.
Both solution build a mask, dilate the mask and use cv2.inpaint for erasing the text.
Using pytesseract:
Find text boxes using pytesseract.image_to_boxes.
Fill the boxes in the mask with 255.
Code sample:
import cv2
import numpy as np
from pytesseract import pytesseract, Output
# Tesseract path
pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
img = cv2.imread('ShortAndInteresting.png')
# https://stackoverflow.com/questions/20831612/getting-the-bounding-box-of-the-recognized-words-using-python-tesseract
boxes = pytesseract.image_to_boxes(img, lang='eng', config=' --psm 6') # Run tesseract, returning the bounding boxes
h, w, _ = img.shape # assumes color image
mask = np.zeros((h, w), np.uint8)
# Fill the bounding boxes on the image
for b in boxes.splitlines():
b = b.split(' ')
mask = cv2.rectangle(mask, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), 255, -1)
mask = cv2.dilate(mask, np.ones((5, 5), np.uint8)) # Dilate the boxes in the mask
clean_img = cv2.inpaint(img, mask, 2, cv2.INPAINT_NS) # Remove the text using inpaint (replace the masked pixels with the neighbor pixels).
# Show mask and clean_img for testing
cv2.imshow('mask', mask)
cv2.imshow('clean_img', clean_img)
cv2.waitKey()
cv2.destroyAllWindows()
Mask:
Finding white (and small) connected components:
Use mask = cv2.inRange(img, (230, 230, 230), (255, 255, 255)) for finding the text (assume the text is white).
Finding connected components in the mask using cv2.connectedComponentsWithStats(mask, 4)
Remove large components from the mask - fill components with large area with zeros.
Code sample:
import cv2
import numpy as np
img = cv2.imread('ShortAndInteresting.png')
mask = cv2.inRange(img, (230, 230, 230), (255, 255, 255))
nlabel, labels, stats, centroids = cv2.connectedComponentsWithStats(mask, 4) # Finding connected components with statistics
# Remove large components from the mask (fill components with large area with zeros).
for i in range(1, nlabel):
area = stats[i, cv2.CC_STAT_AREA] # Get area
if area > 1000:
mask[labels == i] = 0 # Remove large connected components from the mask (fill with zero)
mask = cv2.dilate(mask, np.ones((5, 5), np.uint8)) # Dilate the text in the maks
cv2.imwrite('mask2.png', mask)
clean_img = cv2.inpaint(img, mask, 2, cv2.INPAINT_NS) # Remove the text using inpaint (replace the masked pixels with the neighbor pixels).
# Show mask and clean_img for testing
cv2.imshow('mask', mask)
cv2.imshow('clean_img', clean_img)
cv2.waitKey()
cv2.destroyAllWindows()
Mask:
Clean image:
Note:
My assumption is that you know how to split the image into contours, and the only issue is the present of the text.
I would recommend using flood fill, find the seed point for each color region, flood fill it to ignore the text values within. Hope that helps!
Refer to example of using floodfill here: https://www.programcreek.com/python/example/89425/cv2.floodFill
Example below copied from link above
def fillhole(input_image):
'''
input gray binary image get the filled image by floodfill method
Note: only holes surrounded in the connected regions will be filled.
:param input_image:
:return:
'''
im_flood_fill = input_image.copy()
h, w = input_image.shape[:2]
mask = np.zeros((h + 2, w + 2), np.uint8)
im_flood_fill = im_flood_fill.astype("uint8")
cv.floodFill(im_flood_fill, mask, (0, 0), 255)
im_flood_fill_inv = cv.bitwise_not(im_flood_fill)
img_out = input_image | im_flood_fill_inv
return img_out

OpenCV Segmentation of Largest contour in Breast Mammograms

This might be a bit too "general" question, but how do I perform GRAYSCALE image segmentation and keep the largest contour? I am trying to remove background noise (i.e. labels) from breast mammograms, but I am not successful. Here is the original image:
First, I applied AGCWD algorithm (based on paper "Efficient Contrast Enhancement Using Adaptive Gamma Correction With Weighting Distribution") in order to get better contrast of the image pixels, like so:
Afterwards, I tried executing following steps:
Image segmentation using OpenCV's KMeans clustering algorithm:
enhanced_image_cpy = enhanced_image.copy()
reshaped_image = np.float32(enhanced_image_cpy.reshape(-1, 1))
number_of_clusters = 10
stop_criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.1)
ret, labels, clusters = cv2.kmeans(reshaped_image, number_of_clusters, None, stop_criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
clusters = np.uint8(clusters)
Canny Edge Detection:
removed_cluster = 1
canny_image = np.copy(enhanced_image_cpy).reshape((-1, 1))
canny_image[labels.flatten() == removed_cluster] = [0]
canny_image = cv2.Canny(canny_image,100,200).reshape(enhanced_image_cpy.shape)
show_images([canny_image])
Find and Draw Contours:
initial_contours_image = np.copy(canny_image)
initial_contours_image_bgr = cv2.cvtColor(initial_contours_image, cv2.COLOR_GRAY2BGR)
_, thresh = cv2.threshold(initial_contours_image, 50, 255, 0)
contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(initial_contours_image_bgr, contours, -1, (255,0,0), cv2.CHAIN_APPROX_SIMPLE)
show_images([initial_contours_image_bgr])
Here is how image looks after I draw 44004 contours:
I am not sure how can I get one BIG contour, instead of 44004 small ones. Any ideas how to fix my approach, or possibly any ideas on using alternative approach to get rid of label in top right corner.
Thanks in advance!
Here is one way to do that in Python OpenCV
Read the image
Threshold and invert so the borders are black
Remove the borders of the image as follows (so as to make it easier to get the relevant contours later):
Count the number of non-zero pixels in each column and find the first and last column that have counts greater than 0
Count the number of non-zero pixels in each row and find the first and last row that have counts greater than 0
Crop the image to remove the borders
Crop thresh1 and invert to make thresh2
Get the external contours from thresh2
Find the largest contour and draw as white filled on a black background as a mask
Make all pixels in the cropped image black where the mask is black
Save the results -
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('xray3.png')
# convert to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# threshold and invert
thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY)[1]
thresh1 = 255 - thresh1
# remove borders
# count number of white pixels in columns as new 1D array
count_cols = np.count_nonzero(thresh1, axis=0)
# get first and last x coordinate where black
first_x = np.where(count_cols>0)[0][0]
last_x = np.where(count_cols>0)[0][-1]
print(first_x,last_x)
# count number of white pixels in rows as new 1D array
count_rows = np.count_nonzero(thresh1, axis=1)
# get first and last y coordinate where black
first_y = np.where(count_rows>0)[0][0]
last_y = np.where(count_rows>0)[0][-1]
print(first_y,last_y)
# crop image
crop = img[first_y:last_y+1, first_x:last_x+1]
# crop thresh1 and invert
thresh2 = thresh1[first_y:last_y+1, first_x:last_x+1]
thresh2 = 255 - thresh2
# get external contours and keep largest one
contours = cv2.findContours(thresh2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
big_contour = max(contours, key=cv2.contourArea)
# make mask from contour
mask = np.zeros_like(thresh2 , dtype=np.uint8)
cv2.drawContours(mask, [big_contour], 0, 255, -1)
# make crop black everywhere except where largest contour is white in mask
result = crop.copy()
result[mask==0] = (0,0,0)
# write result to disk
cv2.imwrite("xray3_thresh1.jpg", thresh1)
cv2.imwrite("xray3_crop.jpg", crop)
cv2.imwrite("xray3_thresh2.jpg", thresh2)
cv2.imwrite("xray3_mask.jpg", mask)
cv2.imwrite("xray3_result.png", result)
# display it
cv2.imshow("thresh1", thresh1)
cv2.imshow("crop", crop)
cv2.imshow("thresh2", thresh2)
cv2.imshow("mask", mask)
cv2.imshow("result", result)
cv2.waitKey(0)
Threshold 1 image:
Cropped image:
Threshold 2 image:
Mask image:
Result:

to detect patches in binary images using opencv python

I want to detect all the patches in the enter image description hereimage, I attached the code used to detect them:
import cv2
import numpy as np
import matplotlib.pyplot as plt
image=cv2.imread("bw2.jpg",0)
# convert to RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# create a binary thresholded image
_, binary = cv2.threshold(gray, 0, 500, cv2.THRESH_BINARY_INV)
# show it
plt.imshow(gray, cmap="gray")
plt.show()
# find the contours from the thresholded image
contours, hierarchy = cv2.findContours(gray, cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
print("contours:",contours)
# draw all contours
for c in contours:
if cv2.contourArea(c) < 3000:
continue
(x, y, w, h) = cv2.boundingRect(c)
#cv2.rectangle(image, (x,y), (x+w,y+h), (0, 255, 0), 2)
## BEGIN - draw rotated rectangle
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image,[box],0,(255,51,255),2)
# show the image with the drawn contours
plt.imshow(image)
#plt.imshow(im3)
cv2.imwrite("detectImg2.png",image)
plt.show()
I get output image as hereenter image description here
I want to detect all of them, can anyone tell me how to achieve this I new to image processing
Here is how I would extract and rotate each blob in your image using Python OpenCV.
Read the input
Convert to gray
Threshold
Apply morphology open and close to clean small spots
Get all the external contours
Loop over each contour and do the following:
Draw the contour on a copy of the input image
Get the rotated rectangle of the contour and extract its center, dimensions and rotation angle
Get the corners of the rotated rectangle
Draw the rotated rectangle on another copy of the input
Correct the rotation angle for image unrotation
Generate a mask image with the filled rotated rectangle
Apply the mask image to the morphology cleaned image to remove near-by other white regions
Get the affine warp matrix using the center and corrected rotation angle
Unrotated the the masked image using warpAffine
Get the contour of the one blob in the unrotated image
Get the contours bounding box
Crop the masked image (or alternately crop the input image)
Save the cropped image
Exit the loop
Save the contour and rotrect images
Input:
import cv2
import numpy as np
image = cv2.imread("bw2.jpg")
hh, ww = image.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# create a binary thresholded image
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# apply morphology
kernel = np.ones((7,7), np.uint8)
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = np.ones((13,13), np.uint8)
clean = cv2.morphologyEx(clean, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
contour_img = image.copy()
rotrect_img = image.copy()
i = 1
for c in contours:
# draw contour on input
cv2.drawContours(contour_img,[c],0,(0,0,255),2)
# get rotated rectangle from contour
# get its dimensions
# get angle relative to horizontal from rotated rectangle
rotrect = cv2.minAreaRect(c)
(center), (width,height), angle = rotrect
box = cv2.boxPoints(rotrect)
boxpts = np.int0(box)
# draw rotated rectangle on copy of image
cv2.drawContours(rotrect_img,[boxpts],0,(0,255,0),2)
# from https://www.pyimagesearch.com/2017/02/20/text-skew-correction-opencv-python/
# the `cv2.minAreaRect` function returns values in the
# range [-90, 0); as the rectangle rotates clockwise the
# returned angle tends to 0 -- in this special case we
# need to add 90 degrees to the angle
if angle < -45:
angle = -(90 + angle)
# otherwise, check width vs height
else:
if width > height:
angle = -(90 + angle)
else:
angle = -angle
# negate the angle for deskewing
neg_angle = -angle
# draw mask as filled rotated rectangle on black background the size of the input
mask = np.zeros_like(clean)
cv2.drawContours(mask,[boxpts],0,255,-1)
# apply mask to cleaned image
blob_img = cv2.bitwise_and(clean, mask)
# Get rotation matrix
#center = (width // 2, height // 2)
M = cv2.getRotationMatrix2D(center, neg_angle, scale=1.0)
#print('m: ',M)
# deskew (unrotate) the rotated rectangle
deskewed = cv2.warpAffine(blob_img, M, (ww, hh), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
# threshold it again
deskewed = cv2.threshold(deskewed, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# get bounding box of contour of deskewed rectangle
cntrs = cv2.findContours(deskewed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
cntr = cntrs[0]
x,y,w,h = cv2.boundingRect(cntr)
# crop to white region
crop = deskewed[y:y+h, x:x+w]
# alternately crop the input
#crop = image[y:y+h, x:x+w]
# save deskewed image
cv2.imwrite("bw2_deskewed_{0}.png".format(i),crop)
print("")
i = i + 1
# save contour and rot rect images
cv2.imwrite("bw2_contours.png",contour_img)
cv2.imwrite("bw2_rotrects.png",rotrect_img)
# display result, though it won't show transparency
cv2.imshow("thresh", thresh)
cv2.imshow("clean", clean)
cv2.imshow("contours", contour_img)
cv2.imshow("rectangles", rotrect_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Contour image:
Rotated rectangles images:
First 3 unrotated images:
Affine warp rotation angles:
13.916877746582031
-42.87890625
18.8118896484375
-44.333797454833984
-38.65980911254883
-37.25965881347656
8.806793212890625
14.931419372558594
-37.405357360839844
-34.99202346801758
35.537681579589844
-35.350345611572266
-42.3245735168457
50.12316131591797
-42.969085693359375
52.750038146972656
45.0
your code is correct for detecting those patches, only a minor mistake is here
if cv2.contourArea(c) < 3000:
continue
reduce 3000 to 100 or below values, because your are giving a condition as contours below 3000 to be neglect

How to detect text on an X-Ray image with OpenCV

I want to detect text on x-ray images. The goal is to extract the oriented bounding boxes as a matrix where each row is a detected bounding box and each row contains the coordinates of all four edges i.e. [x1, x2, y1, y2]. I'm using python 3 and OpenCV 4.2.0.
Here is a sample image:
The string "test word", "a" and "b" should be detected.
I followed this OpenCV tutorial about creating rotated boxes for contours and this stackoverflow answer about detecting a text area in an image.
The resulting boundary boxes should look something like this:
I was able to detect the text, but the result included a lot of boxes without text.
Here is what I tried so far:
img = cv2.imread(file_name)
## Open the image, convert it into grayscale and blur it to get rid of the noise.
img2gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
ret, mask = cv2.threshold(img2gray, 180, 255, cv2.THRESH_BINARY)
image_final = cv2.bitwise_and(img2gray, img2gray, mask=mask)
ret, new_img = cv2.threshold(image_final, 180, 255, cv2.THRESH_BINARY) # for black text , cv.THRESH_BINARY_INV
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
dilated = cv2.dilate(new_img, kernel, iterations=6)
canny_output = cv2.Canny(dilated, 100, 100 * 2)
cv2.imshow('Canny', canny_output)
## Finds contours and saves them to the vectors contour and hierarchy.
contours, hierarchy = cv2.findContours(canny_output, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Find the rotated rectangles and ellipses for each contour
minRect = [None] * len(contours)
for i, c in enumerate(contours):
minRect[i] = cv2.minAreaRect(c)
# Draw contours + rotated rects + ellipses
drawing = np.zeros((canny_output.shape[0], canny_output.shape[1], 3), dtype=np.uint8)
for i, c in enumerate(contours):
color = (255, 0, 255)
# contour
cv2.drawContours(drawing, contours, i, color)
# rotated rectangle
box = cv2.boxPoints(minRect[i])
box = np.intp(box) # np.intp: Integer used for indexing (same as C ssize_t; normally either int32 or int64)
cv2.drawContours(img, [box], 0, color)
cv2.imshow('Result', img)
cv2.waitKey()
Do I need to run the results through OCR to make sure whether it is text or not? What other approaches should I try?
PS: I'm quite new to computer vision and not familiar with most concepts yet.
Here's a simple approach:
Obtain binary image. Load image, create blank mask, convert to grayscale, Gaussian blur, then Otsu's threshold
Merge text into a single contour. Since we want to extract the text as one piece, we perform morphological operations to connect individual text contours into a single contour.
Extract text. We find contours then filter using contour area with cv2.contourArea and aspect ratio using cv2.arcLength + cv2.approxPolyDP. If a contour passes the filter, we find the rotated bounding box and draw this onto our mask.
Isolate text. We perform an cv2.bitwise_and operation to extract the text.
Here's a visualization of the process. Using this screenshotted input image (since your provided input image was connected as one image):
Input image -> Binary image
Morph close -> Detected text
Isolated text
Results with the other image
Input image -> Binary image + morph close
Detected text -> Isolated text
Code
import cv2
import numpy as np
# Load image, create mask, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
blank = np.zeros(image.shape[:2], dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Merge text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
# Find contours
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
# Filter using contour area and aspect ratio
x,y,w,h = cv2.boundingRect(c)
area = cv2.contourArea(c)
ar = w / float(h)
if (ar > 1.4 and ar < 4) or ar < .85 and area > 10 and area < 500:
# Find rotated bounding box
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image,[box],0,(36,255,12),2)
cv2.drawContours(blank,[box],0,(255,255,255),-1)
# Bitwise operations to isolate text
extract = cv2.bitwise_and(thresh, blank)
extract = cv2.bitwise_and(original, original, mask=extract)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.imshow('close', close)
cv2.imshow('extract', extract)
cv2.waitKey()
I removed the text using the following comand (after the code of above):
gray2 = cv2.cvtColor(extract, cv2.COLOR_BGR2GRAY)
blur2 = cv2.GaussianBlur(gray2, (5,5), 0)
thresh2 = cv2.threshold(blur2, 0, 255, cv2.THRESH_BINARY)[1]
test = cv2.inpaint(original, thresh2, 7, cv2.INPAINT_TELEA)

Categories

Resources