Finding text between two lines using Python OpenCV - python

I want to identify and highlight / crop the text between two lines using Python (cv2).
One line is a wavy line at the top, and the second line somewhere in the page. This line can appear at any height on the page, ranging from just after 1 line to just before the last line.
An example,
I believe I need to use HoughLinesP() somehow with proper parameters for this.
I've tried some examples involving a combination of erode + dilate + HoughLinesP.
e.g.
img = cv2.imread(image)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
kernel_size = 5
blur_gray = cv2.GaussianBlur(gray, (kernel_size, kernel_size), 0)
# erode / dilate
erode_kernel_param = (5, 200) # (5, 50)
dilate_kernel_param = (5, 5) # (5, 75)
img_erode = cv2.erode(blur_gray, np.ones(erode_kernel_param))
img_dilate = cv2.dilate(img_erode, np.ones(dilate_kernel_param))
# %% Second, process edge detection use Canny.
low_threshold = 50
high_threshold = 150
edges = cv2.Canny(img_dilate, low_threshold, high_threshold)
# %% Then, use HoughLinesP to get the lines.
# Adjust the parameters for better performance.
rho = 1 # distance resolution in pixels of the Hough grid
theta = np.pi / 180 # angular resolution in radians of the Hough grid
threshold = 15 # min number of votes (intersections in Hough grid cell)
min_line_length = 600 # min number of pixels making up a line
max_line_gap = 20 # max gap in pixels between connectable line segments
line_image = np.copy(img) * 0 # creating a blank to draw lines on
# %% Run Hough on edge detected image
# Output "lines" is an array containing endpoints of detected line segments
lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]),
min_line_length, max_line_gap)
if lines is not None:
for line in lines:
for x1, y1, x2, y2 in line:
cv2.line(line_image, (x1, y1), (x2, y2), (255, 0, 0), 5)
# %% Draw the lines on the image
lines_edges = cv2.addWeighted(img, 0.8, line_image, 1, 0)
However, in many cases the lines dont get identified propery.
Some examples of errors being,
Too many lines being identified (ones in the text as well)
Lines not being identified completely
Lines not being identified at all
Am I on the right track? Do I just need to hit the correct combination of parameters for this purpose? or is there a simpler way / trick which will let me reliably crop the text between these two lines?
In case it's relevant, I need to do this for ~450 pages.
Here's the link to the book, in case someone wants to examine more examples of pages.
https://archive.org/details/in.ernet.dli.2015.553713/page/n13/mode/2up
Thank you.
Solution
I've made minor modifications to the answer by Ari (Thank you), and made the code a bit more comprehensible for my own sake, here's my code.
The core idea is,
Find contours and their bounding rectangles.
Two "widest" contours would represent the two lines.
Thereafter, take the lower side of the top rectangle and upper side of the bottom rectangle to bound the area (text) we are interested in.
for image in images:
base_img = cv2.imread(image)
height, width, channels = base_img.shape
img = cv2.cvtColor(base_img, cv2.COLOR_BGR2GRAY)
ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
img = cv2.bitwise_not(img)
contours, hierarchy = cv2.findContours(
img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
)
# Get rectangle bounding contour
rects = [cv2.boundingRect(contour) for contour in contours]
# Rectangle is (x, y, w, h)
# Top-Left point of the image is (0, 0), rightwards X, downwards Y
# Sort the contours bigger width first
rects.sort(key=lambda r: r[2], reverse=True)
# Get the 2 "widest" rectangles
line_rects = rects[:2]
line_rects.sort(key=lambda r: r[1])
# If at least two rectangles (contours) were found
if len(line_rects) >= 2:
top_x, top_y, top_w, top_h = line_rects[0]
bot_x, bot_y, bot_w, bot_h = line_rects[1]
# Cropping the img
# Crop between bottom y of the upper rectangle (i.e. top_y + top_h)
# and the top y of lower rectangle (i.e. bot_y)
crop_img = base_img[top_y+top_h:bot_y]
# Highlight the area by drawing the rectangle
# For full width, 0 and width can be used, while
# For exact width (erroneous) top_x and bot_x + bot_w can be used
rect_img = cv2.rectangle(
base_img,
pt1=(0, top_y + top_h),
pt2=(width, bot_y),
color=(0, 255, 0),
thickness=2
)
cv2.imwrite(image.replace('.jpg', '.rect.jpg'), rect_img)
cv2.imwrite(image.replace('.jpg', '.crop.jpg'), crop_img)
else:
print(f"Insufficient contours in {image}")

You can find the Contours, and then take the two with the biggest width.
base_img = cv2.imread('a.png')
img = cv2.cvtColor(base_img, cv2.COLOR_BGR2GRAY)
ret, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
img = cv2.bitwise_not(img)
cnts, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# sort the cnts bigger width first
cnts.sort(key=lambda c: cv2.boundingRect(c)[2], reverse=True)
# get the 2 big lines
lines = [cv2.boundingRect(cnts[0]), cv2.boundingRect(cnts[1])]
# higher line first
lines.sort(key=lambda c: c[1])
# croping the img
crop_img = base_img[lines[0][1]:lines[1][1]]

Related

How can I rotate the bounding boxes from findcontours function in Python OpenCV?

I have the following image:
I am using OpenCV to find the contours in this image in order to separate the "122" into "1","2", and "2". I am using OCR to classify the numbers after.
The code I am using to do this is as follows:
invert = cv2.bitwise_not(image)
gray = cv2.cvtColor(invert, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
# perform edge detection, find contours in the edge map, and sort the
# resulting contours from left-to-right
edged = cv2.Canny(blurred, 30, 150)
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sort_contours(cnts, method="left-to-right")[0]
# initialize the list of contour bounding boxes and associated
# characters that we'll be OCR'ing
chars = []
preds = []
for c in cnts:
# compute the bounding box of the contour
(x, y, w, h) = cv2.boundingRect(c)
# filter out bounding boxes, ensuring they are neither too small
# nor too large
if (w >= 5 and w <= 150) and (h >= 15 and h <= 120):
# extract the character and threshold it to make the character
# appear as *white* (foreground) on a *black* background, then
# grab the width and height of the thresholded image
roi = gray[y:y + h, x:x + w]
thresh = cv2.threshold(roi, 0, 255,
cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
(tH, tW) = thresh.shape
# if the width is greater than the height, resize along the
# width dimension
if tW > tH:
thresh = imutils.resize(thresh, width=32)
# otherwise, resize along the height
else:
thresh = imutils.resize(thresh, height=32)
# re-grab the image dimensions (now that its been resized)
# and then determine how much we need to pad the width and
# height such that our image will be 32x32
(tH, tW) = thresh.shape
dX = int(max(0, 32 - tW) / 2.0)
dY = int(max(0, 32 - tH) / 2.0)
# pad the image and force 32x32 dimensions
padded = cv2.copyMakeBorder(thresh, top=dY, bottom=dY,
left=dX, right=dX, borderType=cv2.BORDER_CONSTANT,
value=(0, 0, 0))
padded = cv2.resize(padded, (28, 28))
# prepare the padded image for classification via our
# handwriting OCR model
padded = padded.astype("float32") / 255.0
padded = np.expand_dims(padded, axis=-1)
# update our list of characters that will be OCR'd
chars.append((padded, (x, y, w, h)))
x,y,w,h = cv2.boundingRect(c)
roi=image[y:y+h,x:x+w]
plt.imshow(roi)
This code works great for numbers that are not written at an angle and are spaced generously apart, however in this image we see that the "1" is angled slightly. The resulting bounding box around the one also includes a portion of the adjacent "2".
Does anyone have a suggestion on how I can slightly rotate the bounding box to exclude the portion of the two?
It's hard to give specific recommendations without understanding how the bounding box will be used downstream.
Easiest method would be to use the boxPoints function. That will return the coordinates of the corners for the minimum bounding box around the contour. Alternatively, you could fit a line to the contour and use the angle of the line to rotate your your bounding box.

Finding length of each line segment passing through centroid , and how to constrict the line till outer contour

Input Image
Processed Image
import numpy as np
import cv2
img = cv2.imread('Image(i).png', 0)
ret, img =cv2.threshold(img, 128, 255, cv2.THRESH_BINARY)
img_bw = img<=120
img_bw =img_bw.astype('uint8')
#Fit the ellipses
contours0, hierarchy = cv2.findContours( img.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
outer_ellipse = [cv2.approxPolyDP(contours0[0], 0.1, True)]
inner_ellipse = [cv2.approxPolyDP(contours0[0], 0.1, True)]
ref = np.zeros_like(img_bw)
out=img.copy()
h, w = img.shape[:2]
vis = np.zeros((h, w, 3), np.uint8)
cv2.drawContours( vis, outer_ellipse, -1, (255,0,0), 1)
cv2.drawContours( vis, inner_ellipse, -1, (0,0,255), 1)
##Extract contour of ellipses
cnt_outer = np.vstack(outer_ellipse).squeeze()
cnt_inner = np.vstack(inner_ellipse).squeeze()
#Determine centroid
M = cv2.moments(cnt_inner)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
print (cx, cy)
#Draw full segment lines
#cv2.line(vis,(cx,0),(cx,w),(150,0,0),1)
width = img.shape[1]
height = img.shape[0]
N = 20
for i in range(N):
tmp = np.zeros_like(img_bw)
theta = i*(360/N)
theta *= np.pi/180.0
cv2.line(tmp, (cx, cy),
(int(cx-np.cos(theta)*w),
int(cy+np.sin(theta)*h)), (150,0,0), 1)
(row,col) = np.nonzero(np.logical_and(tmp, ref))
#cv2.line(out, (cx, cy), (col,row),(255,0,0), 1)
# Show the image
cv2.imshow('Output', out)
cv2.waitKey(0)
cv2.destroyAllWindows()
As seen in processed image the lines passing through centroid are not constricted till outer contour and are passing trough it.
I want the lines to be stopped at the outer contour so as that I can measure distance from centroid to the outer contour.
First image is the input image and second image is of line segments passing through centroid.
Here's a possible approach:
draw your outer contour filled with black on a white background
You now have a black ellipse. Then, without actually drawing anything:
use skimage.draw.line to get the list of points along all your radii
use Numpy argmax() to get first white pixel along radii
Here is the code:
#!/usr/bin/env python3
import cv2
import math
from skimage.draw import line
import numpy as np
# Load image as greyscale
img = cv2.imread('ellipses.png', cv2.IMREAD_GRAYSCALE)
_, img = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY)
h, w = img.shape
#Fit the ellipses
contours, hierarchy = cv2.findContours( img, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
outer_ellipse = [cv2.approxPolyDP(contours[0], 0.1, True)]
# Draw outer contour filled with black on white background
vis = np.zeros_like(img) + 255
cnt = cv2.drawContours(vis, outer_ellipse, -1, 0, -1)
# Centroid by existing method
cx, cy = 365, 335
maxThickness = 0
# Take 10 points along top
for x in range(0,w,int(w/10)):
# ... and bottom
for y in 0, h-1:
# Get y and x of all pixels between centroid and top and bottom edge
yy, xx = line(cy, cx, 0, x)
firstWhiteIndex = np.argmax(vis[yy,xx])
fx, fy = xx[firstWhiteIndex], yy[firstWhiteIndex]
# Get length of this radial line
length = np.sqrt((cx-fx)**2 + (cy-fy)**2)
# Remember if longer than all others so far seen
if length > maxThickness:
maxThickness = length
fxMax, fyMax = fx, fy
# Take 10 points down left side
for y in range(0,h,int(h/10)):
# ... and right
for x in 0, w-1:
# Get y and x of all pixels between centroid and left and right edge
yy, xx = line(cy, cx, 0, x)
firstWhiteIndex = np.argmax(vis[yy,xx])
fx, fy = xx[firstWhiteIndex], yy[firstWhiteIndex]
# Get length of this radial line
length = np.sqrt((cx-fx)**2 + (cy-fy)**2)
# Remember if longer than all others so far seen
if length > maxThickness:
maxThickness = length
fxMax, fyMax = fx, fy
print(f'Max thickness: {maxThickness}')
# Draw thickest radius in mid-grey
cv2.line(img, (cx,cy), (fxMax, fyMax), 128, 5)
cv2.imwrite('result.png', img)
I have an approach that is not the best but this is what I can think of now.
While drawing lines in the above image, modify the code and do the following:
Before the for loop, draw a binary image of the same size containing only the outer contour circle. Save this image for later use.
Now in the for loop, draw each line in a separate binary blank image. Thus, now you will have two images, first the image having only the outer circle, and second image will only contain the line.
Now perform a bitwise_and operation on these 2 images.
Now you will get a white pixel only that is the point of intersection of the line and the outer circle.
Now find the coordinates of the white pixel in the image found and hence you will have the coordinate of point of intersection.
Obviously this is not the most efficient way but it is real time. Also, keep this in mind that the outer circle width should be atleast 2 in the image and the lines should be of width 1. You may get more than one point of intersection in some cases, take any 1 of them. The difference in them will be only of 1-2 pixels that can be neglected.

How should I remove noise in this image using OpenCV?

I'm trying to use cv2.HoughLines to identify the skew angle of the words in this image.
However, after edge detection, it clearly has too much noise.
I've tried using cv2.medianBlur to remove the noise.
However, there is even more noise.
This means that I'm unable to set the minimum line length threshold for hough transform.
What other functions should I be looking at?
Image:
After edge detection:
Edit: After Rotem's help, my code now identifies images with skew angles between 90 to -90 degrees including 90 but excluding -90.
import numpy as np
import imutils
import math
import pytesseract
img = cv2.imread('omezole.jpg')
resized = imutils.resize(img, width=300)
gray = cv2.cvtColor(resized,cv2.COLOR_BGR2GRAY)
th3 = cv2.threshold(gray, 80, 255, cv2.THRESH_BINARY_INV)[1]
minLineLength = 50
maxLineGap = 3
lines = cv2.HoughLinesP(th3, rho=1, theta=np.pi/180, threshold=100, minLineLength=minLineLength, maxLineGap=maxLineGap)
colLineCopy = cv2.cvtColor(th3,cv2.COLOR_GRAY2BGR)
#Draw but remove all vertical lines, add corresponding angle to ls
ls = []
for line in lines:
if line is None:
angle = 0
else:
x1, y1, x2, y2 = line[0].tolist()
print(line)
#check for vertical lines since you can't find tan90
if (x2-x1==0):
ls.append(-90)
else:
ls.append((math.degrees(math.atan((y2-y1)/(x2-x1)))))
cv2.line(colLineCopy, (x1,y1), (x2,y2), (0,0,250), 2)
#special case of strictly vertical words, if more than 0.2 of the lines are vertical assume, words are vertical
if ls.count(-90)>len(ls)//5:
angle = 90
else:
for angle in ls:
if angle < -80:
ls.remove(angle)
angle = sum(ls)/len(ls)
rotated = imutils.rotate_bound(resized, -angle)
cv2.imshow("HoughLinesP", colLineCopy)
cv2.imshow("rotated", rotated)
gray = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
threshINV = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY_INV)[1]
cv2.imshow("final", threshINV)
#Run OCR
pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
custom_config = r'--psm 11'
print(pytesseract.image_to_string(threshINV, config = custom_config))
cv2.waitKey(0)
cv2.destroyAllWindows
``
A useful way for removing the "noise", before using edge detection is applying a threshold that converts the image from Grayscale to binary image.
Finding the correct threshold (automatically) is not always an easy task.
I manually set the threshold value to 50.
Solution using HoughLinesP code sample:
import numpy as np
import cv2
# Read input image
img = cv2.imread('omezole.jpg')
# Convert from RGB to Grayscale.
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply threshold - all values below 50 goes to 0, and values above 50 goes to 1.
ret, thresh_gray = cv2.threshold(gray, 50, 255, cv2.THRESH_BINARY)
# https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_houghlines/py_houghlines.html
edges = cv2.Canny(thresh_gray, 50, 150, apertureSize = 3)
minLineLength = 100
maxLineGap = 5
lines = cv2.HoughLinesP(edges, rho=1, theta=np.pi/180, threshold=100, minLineLength=minLineLength, maxLineGap=maxLineGap)
# Draw lines
for line in lines:
x1, y1, x2, y2 = line[0].tolist()
cv2.line(img, (x1,y1), (x2,y2), (0,255,0), 2)
cv2.imwrite('houghlines.png',img)
Result:
The HoughLines solution is not so robust.
I suggest another solution using findContours:
img = cv2.imread('omezole.jpg')
# Inverse polarity:
thresh_gray = 255 - thresh_gray;
# Use "open" morphological operation to remove some rough edges
thresh_gray = cv2.morphologyEx(thresh_gray, cv2.MORPH_OPEN, np.ones((5, 5)))
# Find contours over thresh_gray
cnts = cv2.findContours(thresh_gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
# Iterate contours
for c in cnts:
# Only if contour area is large enough:
if cv2.contourArea(c) > 2000:
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
# convert all coordinates floating point values to int
box = np.int0(box)
cv2.drawContours(img, [box], 0, (0, 255, 0), thickness=2)
angle = rect[2]
print('angle = ' + str(angle))
cv2.imwrite('findcontours.png', img)
# Show result (for testing).
cv2.imshow('thresh_gray', thresh_gray)
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
angle = -21.801406860351562
angle = -21.44773292541504
angle = -21.370620727539062
angle = -21.801406860351562
angle = -22.520565032958984
angle = -22.56700897216797
angle = -23.198591232299805

Difficulty in detecting the outer circle with cv2.HoughCircles

I am trying to detect the outer boundary of the circular object in the images below:
I tried OpenCV's Hough Circle, but the code is not working for every image. I also tried to adjust parameters such as minRadius and maxRadius in Hough Circle but its not working on every image.
The aim is to detect the object from the image and crop it.
Expected output:
Source code:
import imutils
import cv2
import numpy as np
from matplotlib import pyplot as plt
image = cv2.imread("path to the image i have provided")
r = 600.0 / image.shape[1]
dim = (600, int(image.shape[0] * r))
resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
cv2.imwrite("path to were we want to save downscaled image", resized)
image = cv2.imread('path of downscaled image')
image1 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image2 = cv2.GaussianBlur(image1, (5, 5), 0)
edged = cv2.Canny(image2, 30, 150)
img = cv2.medianBlur(image2,5)
cimg = cv2.cvtColor(img,cv2.COLOR_GRAY2BGR)
circles = cv2.HoughCircles(edged,cv2.HOUGH_GRADIENT,1,20,
param1=50,param2=30,minRadius=200,maxRadius=280)
circles = np.uint16(np.around(circles))
max_circle = max(circles[0,:], key=lambda x:x[2])
# print(max_circle)
# # Create mask
height,width = image1.shape
mask = np.zeros((height,width), np.uint8)
for i in [max_circle]:
cv2.circle(mask,(i[0],i[1]),i[2],(255,255,255),thickness=-1)
masked_data = cv2.bitwise_and(image, image, mask=mask)
_,thresh = cv2.threshold(mask,1,255,cv2.THRESH_BINARY)
# Find Contour
contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[0]
x,y,w,h = cv2.boundingRect(contours[0])
# Crop masked_data
crop = masked_data[y:y+h,x:x+w]
#Code to close Window
cv2.imshow('OG',image)
cv2.imshow('Cropped ROI',crop)
cv2.imwrite("path to save roi image", crop)
cv2.waitKey(0)
cv2.destroyAllWindows()
Second Answer: an approach based on color segmentation.
While I was editing the question to improve it's readability and was inserting and resizing all the images from the link you shared to make it easier for everyone to visualize what you are trying to do, it occurred to me that this problem might be a better candidate for an approach based on segmentation by color:
This simpler (but clever) approach assumes that the reel appears pretty much in the same location and has more or less the same dimensions every time:
To discover the approximate color of the reel in the image, define a list of Regions of Interest (ROIs) to sample pixels from and determine the min and max color of that area in the HSV color space. The location and size of the ROI are values derived from the size of the image. In the images below, you can see the ROIs as draw as blue-ish rectangles:
Once the min and max HSV colors have been found, a threshold operation with cv2.inRange() can be executed to segment the reel:
Then, iterate though all the contours in the binary image and assume that the largest one represents the reel. Use this contour and draw it in a separate mask to be able to extract the pixels from original image:
At this stage, it is also possible to compute a bounding box for the contour and extract it's precise location to be able to perform a crop operation later and completely isolate the reel in the image:
This approach works for EVERY image shared on the question.
Source code:
import cv2
import numpy as np
import sys
# initialize global H, S, V values
min_global_h = 179
min_global_s = 255
min_global_v = 255
max_global_h = 0
max_global_s = 0
max_global_v = 0
# load input image from the cmd-line
filename = sys.argv[1]
img = cv2.imread(sys.argv[1])
if (img is None):
print('!!! Failed imread')
sys.exit(-1)
# create an auxiliary image for debugging purposes
dbg_img = img.copy()
# initiailize a list of Regions of Interest that need to be scanned to identify good HSV values to threhsold by color
w = img.shape[1]
h = img.shape[0]
roi_w = int(w * 0.10)
roi_h = int(h * 0.10)
roi_list = []
roi_list.append( (int(w*0.25), int(h*0.15), roi_w, roi_h) )
roi_list.append( (int(w*0.25), int(h*0.60), roi_w, roi_h) )
# convert image to HSV color space
hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# iterate through the ROIs to determine the min/max HSV color of the reel
for rect in roi_list:
x, y, w, h = rect
x2 = x + w
y2 = y + h
print('ROI rect=', rect)
cropped_hsv_img = hsv_img[y:y+h, x:x+w]
h, s, v = cv2.split(cropped_hsv_img)
min_h = np.min(h)
min_s = np.min(s)
min_v = np.min(v)
if (min_h < min_global_h):
min_global_h = min_h
if (min_s < min_global_s):
min_global_s = min_s
if (min_v < min_global_v):
min_global_v = min_v
max_h = np.max(h)
max_s = np.max(s)
max_v = np.max(v)
if (max_h > max_global_h):
max_global_h = max_h
if (max_s > max_global_s):
max_global_s = max_s
if (max_v > max_global_v):
max_global_v = max_v
# debug: draw ROI in original image
cv2.rectangle(dbg_img, (x, y), (x2, y2), (255,165,0), 4) # red
cv2.imshow('ROIs', cv2.resize(dbg_img, dsize=(0, 0), fx=0.5, fy=0.5))
#cv2.waitKey(0)
cv2.imwrite(filename[:-4] + '_rois.png', dbg_img)
# define min/max color for threshold
low_hsv = np.array([min_h, min_s, min_v])
max_hsv = np.array([max_h, max_s, max_v])
#print('low_hsv=', low_hsv)
#print('max_hsv=', max_hsv)
# threshold image by color
img_bin = cv2.inRange(hsv_img, low_hsv, max_hsv)
cv2.imshow('binary', cv2.resize(img_bin, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_binary.png', img_bin)
#cv2.imshow('img_bin', cv2.resize(img_bin, dsize=(0, 0), fx=0.5, fy=0.5))
#cv2.waitKey(0)
# create a mask to store the contour of the reel (hopefully)
mask = np.zeros((img_bin.shape[0], img_bin.shape[1]), np.uint8)
crop_x, crop_y, crop_w, crop_h = (0, 0, 0, 0)
# iterate throw all the contours in the binary image:
# assume that the first contour with an area larger than 100k belongs to the reel
contours, hierarchy = cv2.findContours(img_bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
area = cv2.contourArea(contours[contourIdx])
print('contourIdx=', contourIdx, 'area=', area)
# draw potential reel blob on the mask (in white)
if (area > 100000):
crop_x, crop_y, crop_w, crop_h = cv2.boundingRect(cnt)
centers, radius = cv2.minEnclosingCircle(cnt)
cv2.circle(mask, (int(centers[0]), int(centers[1])), int(radius), (255), -1) # fill with white
break
cv2.imshow('mask', cv2.resize(mask, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_mask.png', mask)
# copy just the reel area into its own image
reel_img = cv2.bitwise_and(img, img, mask=mask)
cv2.imshow('reel_img', cv2.resize(reel_img, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_reel.png', reel_img)
# crop the reel to a smaller image
if (crop_w != 0 and crop_h != 0):
cropped_reel_img = reel_img[crop_y:crop_y+crop_h, crop_x:crop_x+crop_w]
cv2.imshow('cropped_reel_img', cv2.resize(cropped_reel_img, dsize=(0, 0), fx=0.5, fy=0.5))
output_filename = filename[:-4] + '_crop.png'
cv2.imwrite(output_filename, cropped_reel_img)
cv2.waitKey(0)
First answer: an approach based on pre-processing the image and executing an adaptiveThreshold operation.
There might be other ways of solving this problem that are not based on Hough Circles. Here is the result of an approach that is not:
Preprocess the image! Decreasing the size of the image and executing a blur helps with segmentation:
The segmentation method uses a cv2.adaptiveThreshold() to create a binary image that preserves the most important objects: the center of the reel and the external edge of the reel. This is an important step since we are only interested in what exists between these two objects. However, life is not perfect and neither is this segmentation. The shadow of reel on the table became part of the binary objects detected. Also, the outer edge is not fully connected as you can see on the resulting image on the right (look at the top left of the circumference):
To join broken segments, a morphological operation can be executed:
Finally, the entire reel area can be exposed by iterating through the contours of the image above and discarding those whose area is larger than what is expected for a reel. The resulting binary image (on the left) can then be used as a mask to identify the reel location on the original image:
Keep in mind that I'm not trying to find an universal solution for your problem. I'm merely showing that there might be other solutions that don't depend on Hough Circles.
Also, this code might need some adjustments to work on a larger number of cases.
Source code:
import cv2
import numpy as np
import sys
img = cv2.imread("test_images/reel.jpg")
if (img is None):
print('!!! Failed imread')
sys.exit(-1)
# create output image
output_img = img.copy()
# 1. Preprocess the image: downscale to speed up processing and execute a blur
SCALE_FACTOR = 0.5
smaller_img = cv2.resize(img, dsize=(0, 0), fx=SCALE_FACTOR, fy=SCALE_FACTOR)
blur_img = cv2.medianBlur(smaller_img, 9)
cv2.imwrite('reel1_blur_img.png', blur_img)
# 2. Segment the image to identify the 2 most important contours: the center of the reel and the outter edge
gray_img = cv2.cvtColor(blur_img, cv2.COLOR_BGR2GRAY)
img_bin = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 19, 4)
cv2.imwrite('reel2_img_bin.png', img_bin)
green_mask = np.zeros((img_bin.shape[0], img_bin.shape[1]), np.uint8)
#green_mask = cv2.cvtColor(img_bin, cv2.COLOR_GRAY2RGB) # debug
contours, hierarchy = cv2.findContours(img_bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(contours[contourIdx])
#print('contourIdx=', contourIdx, 'w=', w, 'h=', h, 'area=', area)
# filter out tiny segments
if (area < 5000):
#cv2.fillPoly(green_mask, pts=[cnt], color=(0, 0, 255)) # red
continue
# draw green contour (filled)
#cv2.fillPoly(green_mask, pts=[cnt], color=(0, 255, 0)) # green
cv2.fillPoly(green_mask, pts=[cnt], color=(255)) # white
# debug:
#cv2.imshow('green_mask', green_mask)
#cv2.waitKey(0)
cv2.imshow('green_mask', green_mask)
cv2.imwrite('reel2_green_mask.png', green_mask)
# 3. Fix mask: join segments nearby
kernel = np.ones((3,3), np.uint8)
img_dilation = cv2.dilate(green_mask, kernel, iterations=1)
green_mask = cv2.erode(img_dilation, kernel, iterations=1)
cv2.imshow('fixed green_mask', green_mask)
cv2.imwrite('reel3_img.png', green_mask)
# 4. Extract the reel area from the green mask
reel_mask = np.zeros((green_mask.shape[0], green_mask.shape[1]), np.uint8)
#reel_mask = cv2.cvtColor(green_mask, cv2.COLOR_GRAY2RGB) # debug
contours, hierarchy = cv2.findContours(green_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(contours[contourIdx])
print('contourIdx=', contourIdx, 'w=', w, 'h=', h, 'area=', area)
# filter out smaller segments
if (area > 110000):
#cv2.fillPoly(reel_mask, pts=[cnt], color=(0, 0, 255)) # red
continue
# draw green contour (filled)
#cv2.fillPoly(reel_mask, pts=[cnt], color=(0, 255, 0)) # green
cv2.fillPoly(reel_mask, pts=[cnt], color=(255)) # white
# debug:
#cv2.imshow('reel_mask', reel_mask)
#cv2.waitKey(0)
cv2.imshow('reel_mask', reel_mask)
cv2.imwrite('reel4_reel_mask.png', reel_mask)
# 5. Draw the reel area on the original image
contours, hierarchy = cv2.findContours(reel_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
centers, radius = cv2.minEnclosingCircle(cnt)
# rescale these values back to the original image size
centers_orig = (centers[0] // SCALE_FACTOR, centers[1] // SCALE_FACTOR)
radius_orig = radius // SCALE_FACTOR
print('centers=', centers_orig, 'radius=', radius_orig)
cv2.circle(output_img, (int(centers_orig[0]), int(centers_orig[1])), int(radius_orig), (128,0,255), 5) # magenta
cv2.imshow('output_img', output_img)
cv2.imwrite('reel5_output.png', output_img)
# display just the pixels from the original image
larger_reel_mask = cv2.resize(reel_mask, (int(img.shape[1]), int(img.shape[0])))
output_reel_img = cv2.bitwise_and(img, img, mask=larger_reel_mask)
cv2.imshow('output_reel_img', output_reel_img)
cv2.imwrite('reel5_output_reel.png', output_reel_img)
cv2.waitKey(0)
At this point, its possible to use larger_reel_maskand compute a minimal enclosing circle, draw it over this mask to make it a little bit more round and allow us to retrieve the area of the reel more accurately:
But the 4 lines of code that achieve this improvement I leave as an exercise for the reader.

Calculating center of an object in an image

I was reading this post to calculate the center of an image using OpenCV which uses Moments. But I am trying to calculate the center of an object I detected using HoughLinesP. Is there a way with OpenCV I could do this?
Here is the image for which I am trying to calculate the centers.
The line segments were found and the output image looks like:
import cv2
import numpy as np
import math
img = cv2.imread("./images/octa.jpg")
b,g,r = cv2.split(img)
smoothed = cv2.GaussianBlur(g, (3,3), 0)
edges = cv2.Canny(smoothed, 15, 60, apertureSize = 3)
lines = cv2.HoughLinesP(edges,1,np.pi/180,35, 30, 20)
print("length of lines detected ", lines.shape)
for line in lines:
for x1,y1,x2,y2 in line:
cv2.line(img,(x1,y1),(x2,y2),(255,0,0),2)
print("x1,y1", x1,",",y1, " --- ", "x2,y2", x2,",",y2)
cv2.imshow('detected',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Using the coordinates how could I calculate the center of this image? How could I use Moments here?
One constraint I have is that I cannot use Contour methods included with OpenCV.
The following code was used with cv2 version of 3.3.1.
I closely followed the opencv docs and it worked fine.
import cv2
img = cv2.imread("octa.jpg", 0)
ret,thresh = cv2.threshold(img,100,255,0)
im2, contours, hierachy = cv2.findContours(thresh, 1, 2)
cnt = contours[0]
M = cv2.moments(cnt)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
im2 = cv2.cvtColor(im2, cv2.COLOR_GRAY2RGB)
cv2.polylines(im2, cnt, True, (0, 0, 255), 2)
cv2.circle(im2, (cx, cy), 5, (0, 0, 255), 1)
cv2.imshow("res", im2)
Two notes:
you need to add the argument 0 to imread otherwise the contour finding would not work
I set the threshold just a little bit lower, so only the contours of the octagon were found
Result:
If you use a different version of cv2, you can just change the docs to your version; the documentation is really good.
You also may want to blur your image a bit or do some other preprocessing, but in this case, there was no need for it.
EDIT Without contour:
I took the helpful comments from this post and tinkered around a bit. This does not use contours. It finds lines and uses them to find the center
import cv2
import numpy as np
mg = cv2.imread('octa.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
kernel_size = 5
blur_gray = cv2.GaussianBlur(gray,(kernel_size, kernel_size),0)
ret,thresh = cv2.threshold(blur_gray,100,255,0)
low_threshold = 50
high_threshold = 150
edges = cv2.Canny(thresh, low_threshold, high_threshold)
rho = 1 # distance resolution in pixels of the Hough grid
theta = np.pi / 180 # angular resolution in radians of the Hough grid
threshold = 15 # minimum number of votes (intersections in Hough grid cell)
min_line_length = 50 # minimum number of pixels making up a line
max_line_gap = 50 # maximum gap in pixels between connectable line segments
line_image = np.copy(img) * 0 # creating a blank to draw lines on
# Run Hough on edge detected image
# Output "lines" is an array containing endpoints of detected line segments
lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]),
min_line_length, max_line_gap)
for line in lines:
for x1,y1,x2,y2 in line:
cv2.line(line_image,(x1,y1),(x2,y2),(255,0,0),2)
lines_edges = cv2.addWeighted(img, 0.5, line_image, 1, 0)
line_image_gray = cv2.cvtColor(line_image, cv2.COLOR_RGB2GRAY)
M = cv2.moments(line_image_gray)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
cv2.circle(lines_edges, (cx, cy), 5, (0, 0, 255), 1)
cv2.imshow("res", lines_edges)
Result:
Found lines are drawn in blue; the center in red

Categories

Resources