Apologies as I'm very new to OpenCV and the world of image processing in general.
I'm using OpenCV in Python to detect contours/boxes in this image.
It almost manages to detect all contours, but for some odd reason it doesn't pick up the last row and column which are obvious contours. This image shows the bounding boxes for contours it manages to identify.
Not entirely sure why it's not able to easily pick up the remaining contours. I've researched similar questions but haven't found a suitable answer.
Here's my code.
import numpy as np
import cv2
import math
import matplotlib.pyplot as plt
#load image
img = cv2.imread(path)
#remove noise
img = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)
#convert to gray scale
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#make pixels darker
_, img = cv2.threshold(img, 240, 255, cv2.THRESH_TOZERO)
#thresholding the image to a binary image
thresh, img_bin = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
#inverting the image
img_bin = 255 - img_bin
# countcol(width) of kernel as 100th of total width
kernel_len = np.array(img).shape[1]//100
# Defining a vertical kernel to detect all vertical lines of image
ver_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_len))
# Defining a horizontal kernel to detect all horizontal lines of image
hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_len, 1))
# A kernel of 2x2
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
#Use vertical kernel to detect and save the vertical lines in a jpg
image_1 = cv2.erode(img_bin, ver_kernel, iterations = 3)
vertical_lines = cv2.dilate(image_1, np.ones((10, 4),np.uint8), iterations = 30)
vertical_lines = cv2.erode(vertical_lines, np.ones((10, 4),np.uint8), iterations = 29)
#Use horizontal kernel to detect and save the horizontal lines in a jpg
image_2 = cv2.erode(img_bin, np.ones((1, 5),np.uint8), iterations = 5)
horizontal_lines = cv2.dilate(image_2, np.ones((2, 40),np.uint8), iterations = 20)
horizontal_lines = cv2.erode(horizontal_lines, np.ones((2, 39),np.uint8), iterations = 19)
# Combine horizontal and vertical lines in a new third image, with both having same weight.
img_vh = cv2.addWeighted(vertical_lines, 0.5, horizontal_lines, 0.5, 0.0)
rows, cols = img_vh.shape
#shift image so the enhanced lines overlap with original image
M = np.float32([[1,0,-30],[0,1,-21]])
img_vh = cv2.warpAffine(img_vh ,M,(cols,rows))
#Eroding and thesholding the image
img_vh = cv2.erode(~img_vh, kernel, iterations = 2)
thresh, img_vh = cv2.threshold(img_vh, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
bitxor = cv2.bitwise_xor(img, img_vh)
bitnot = cv2.bitwise_not(bitxor)
#find contours
contours, _ = cv2.findContours(img_vh, cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
#create list empty list to append with contours less than a specified area
new_contours = []
for contour in contours:
if cv2.contourArea(contour) < 4000000:
new_contours.append(contour)
#get bounding boxes
bounding_boxes = [cv2.boundingRect(contour) for contour in new_contours]
#plot detected bounding boxes
img_og = cv2.imread(path)
for bounding_box in bounding_boxes:
x,y,w,h = bounding_box
img_plot = cv2.rectangle(img_og, (x, y), (x+w, y+h), (255, 0, 0) , 2)
plotting = plt.imshow(img_plot, cmap='gray')
plt.show()
Like #ypnos was suggesting, the dilation and erosion has most likely pushed the last line off the image in the "saving horizontal lines" section. So the image_vh wouldn't have the last row when it was being searched for contours. I tested (Note:1) this by viewing the image after each of your transformations.
Specifically, the number of iterations had been too much. You had used a reasonably sized kernel as it is. It gave perfect results with iterations = 2 on lines 43 and 44 of your code.
After modifying them to :
horizontal_lines = cv2.dilate(image_2, np.ones((2, 40), np.uint8), iterations=2)
horizontal_lines = cv2.erode(horizontal_lines, np.ones((2, 39), np.uint8), iterations=2)
the bounding box rectangles had shifted off the image a bit. That was fixed by changing line 51 of the code to:
M = np.float32([[1, 0, -30], [0, 1, -5]])
This was the result.
Note:
I test/debug using this function usually.
def test(image, title):
cv2.imshow(title, image)
cv2.waitKey(0)
cv2.destroyWindow(title)
The variable position and the handy waitkey calms me down.
Related
I want to remove the text on the edged in the image
I have used the following code but it does not work it also remove the text in the center
Input:
Output:
import cv2
import matplotlib.pyplot as plt
import glob
import os
import numpy as np
def crop_buttom_text(img):
""" Remove the text from the bottom edge of img """
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#blur = cv2.GaussianBlur(gray, (9,9), 0) # No need for blurring
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Create rectangular structuring element and dilate
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 1)) # Use horizontal line as kernel - dilate horizontally.
dilate = cv2.dilate(thresh, kernel, iterations=1)
#kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10,10))
#dilate = cv2.morphologyEx(dilate, cv2.MORPH_OPEN, kernel) # No need for opening
# Find contours and draw rectangle
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2] # [-2] indexing takes return value before last (due to OpenCV compatibility issues).
#cnts = cnts[0] if len(cnts) == 2 else cnts[1] # [-2] is shorter....
res_img = img.copy() # Copy img to res_img - in case there is no edges text.
for c in cnts:
x, y, w, h = cv2.boundingRect(c)
y2 = y + h # Bottom y coordinate of the bounding rectangle
if (y2 >= img.shape[0]):
# If the rectangle touches the bottom of the img
res_img = res_img[0:y-1, :].copy() # Crop rows from first row to row y-1
return res_img
def remove_lines(image_path,outdir):
image = cv2.imread(image_path)
img1 = crop_buttom_text(image)
img2 = crop_buttom_text(np.rot90(img1)) # Rotate by 90 degrees and crop.
img3 = crop_buttom_text(np.rot90(img2)) # Rotate by 90 degrees and crop.
img4 = crop_buttom_text(np.rot90(img3)) # Rotate by 90 degrees and crop.
output_img = np.rot90(img4)
cv2.imwrite(os.path.join(outdir,os.path.basename(image_path)), output_img)
for jpgfile in glob.glob(r'/content/Dataset/*'):
print(jpgfile)
remove_lines(jpgfile,r'/content/output')
How can i modify the above code to remove the text around the edge
How do you remove the text which is at the edge? Here is a rough way to attack the problem: remove every connected component which touches the border.
To solve this, you can add a 1 pixel border, extract the connected components and then use the one corresponding to the border as a binary mask.
The mask becomes
The result is
Notice how a 7 and few commas get removed as well.
from cv2 import cv2
import numpy as np
# Load original image
img = cv2.imread('kShDc.jpg', cv2.IMREAD_GRAYSCALE)
# Save original dimensions
h, w = img.shape[:2]
# Ensure only bilevel image with white as foreground
_, bimg = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
# Add one pixel border
bimg = cv2.copyMakeBorder(bimg, 1, 1, 1, 1, cv2.BORDER_CONSTANT, None, 255)
# Extract connected components (Spaghetti is the fastest algorithm)
nlabels, label_image = cv2.connectedComponentsWithAlgorithm(bimg, 8, cv2.CV_32S, cv2.CCL_SPAGHETTI)
# Make a mask with the edge label (0 is background, 1 is the first encountered label, i.e. the border)
ccedge = np.uint8((label_image != 1)*255)
cv2.imwrite("mask.png", ccedge, [cv2.IMWRITE_PNG_BILEVEL, 1])
# Zero every pixel touching the border
bimg = cv2.bitwise_and(bimg, ccedge)
# Remove border and invert again
bimg = 255 - bimg[1:h+1, 1:w+1]
# Save result
cv2.imwrite("result.png", bimg, [cv2.IMWRITE_PNG_BILEVEL, 1])
I applied the floodfill function in opencv to extract the foreground from the background but some of the objects in the image were not recognized by the algorithm so I would like to know how I can improve my detections and what modifications are necessary.
image = cv2.imread(args["image"])
image = cv2.resize(image, (800, 800))
h,w,chn = image.shape
ratio = image.shape[0] / 800.0
orig = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
# show the original image and the edge detected image
print("STEP 1: Edge Detection")
cv2.imshow("Image", image)
cv2.imshow("Edged", edged)
warped1 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
T = threshold_local(warped1, 11, offset = 10, method = "gaussian")
warped1 = (warped1 > T).astype("uint8") * 255
print("STEP 3: Apply perspective transform")
seed = (10, 10)
foreground, birdEye = floodFillCustom(image, seed)
cv2.circle(birdEye, seed, 50, (0, 255, 0), -1)
cv2.imshow("originalImg", birdEye)
cv2.circle(birdEye, seed, 100, (0, 255, 0), -1)
cv2.imshow("foreground", foreground)
cv2.imshow("birdEye", birdEye)
gray = cv2.cvtColor(foreground, cv2.COLOR_BGR2GRAY)
cv2.imshow("gray", gray)
cv2.imwrite("gray.jpg", gray)
threshImg = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)[1]
h_threshold,w_threshold = threshImg.shape
area = h_threshold*w_threshold
cv2.imshow("threshImg", threshImg)[![enter image description here][1]][1]
The floodFillCustom function is as follows -
def floodFillCustom(originalImage, seed):
originalImage = np.maximum(originalImage, 10)
foreground = originalImage.copy()
cv2.floodFill(foreground, None, seed, (0, 0, 0),
loDiff=(10, 10, 10), upDiff=(10, 10, 10))
return [foreground, originalImage]
A little bit late, but here's an alternative solution for segmenting the tools. It involves converting the image to the CMYK color space and extracting the K (Key) component. This component can be thresholded to get a nice binary mask of the tools, the procedure is very straightforward:
Convert the image to the CMYK color space
Extract the K (Key) component
Threshold the image via Otsu's thresholding
Apply some morphology (a closing) to clean up the mask
(Optional) Get bounding rectangles of all the tools
Let's see the code:
# Imports
import cv2
import numpy as np
# Read image
imagePath = "C://opencvImages//"
inputImage = cv2.imread(imagePath+"DAxhk.jpg")
# Create deep copy for results:
inputImageCopy = inputImage.copy()
# Convert to float and divide by 255:
imgFloat = inputImage.astype(np.float) / 255.
# Calculate channel K:
kChannel = 1 - np.max(imgFloat, axis=2)
# Convert back to uint 8:
kChannel = (255*kChannel).astype(np.uint8)
The first step is to convert the BGR image to CMYK. There's no direct conversion in OpenCV for this, so I applied directly the conversion formula. We can get every color space component from that formula, but we are only interested on the K channel. The conversion is easy, but we need to be careful with the data types. We need to operate on float arrays. After getting the K channel, we convert back the image to an unsigned 8-bit array, this is the resulting image:
Let's threshold this image using Otsu's thresholding method:
# Threshold via Otsu:
_, binaryImage = cv2.threshold(kChannel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
This yields the following binary image:
Looks very nice! Additionally, we can clean it up a little bit (joining the little gaps) using a morphological closing. Let's apply a rectangular structuring element of size 5 x 5 and use 2 iterations:
# Use a little bit of morphology to clean the mask:
# Set kernel (structuring element) size:
kernelSize = 5
# Set morph operation iterations:
opIterations = 2
# Get the structuring element:
morphKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernelSize, kernelSize))
# Perform closing:
binaryImage = cv2.morphologyEx(binaryImage, cv2.MORPH_CLOSE, morphKernel, None, None, opIterations, cv2.BORDER_REFLECT101)
Which results in this:
Very cool. What follows is optional. We can get the bounding rectangles for every tool by looking for the outer (external) contours:
# Find the contours on the binary image:
contours, hierarchy = cv2.findContours(binaryImage, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Look for the outer bounding boxes (no children):
for _, c in enumerate(contours):
# Get the contours bounding rectangle:
boundRect = cv2.boundingRect(c)
# Get the dimensions of the bounding rectangle:
rectX = boundRect[0]
rectY = boundRect[1]
rectWidth = boundRect[2]
rectHeight = boundRect[3]
# Set bounding rectangle:
color = (0, 0, 255)
cv2.rectangle( inputImageCopy, (int(rectX), int(rectY)),
(int(rectX + rectWidth), int(rectY + rectHeight)), color, 5 )
cv2.imshow("Bounding Rectangles", inputImageCopy)
cv2.waitKey(0)
Which produces the final image:
I have a low resolution concave quadrilateral cursor. My goal is to find the point he points to. So I thought of finding 4 corners and getting the farthest corner from each other. usually works fine but doesn't always detect corners accurately. Using blur gives little results, but I'm not sure it's enough. What would you suggest I do to improve my results?
cursor = cv.bitwise_and(captureHSV, captureHSV, mask=mask)
#resizeCurs = cv.resize(cursor, (0, 0), fx=0.60, fy=0.60)
#blurCurs = blur = cv.blur(cursor, (3,3))
grayCurs = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
corners = cv.goodFeaturesToTrack(grayCurs, 4, 0.01, 1)
corners = np.int0(corners)
coordList = []
for corner in corners:
x, y = corner.ravel()
cv.circle(cursor, (x, y), 1, (255, 0, 0), 1)
coordList.append([x, y])
After detecting the canny edges of your image, converted to grayscale & blurred beforehand of course, you can dilate and erode the edges to get rid of noisy bumps and fill in little gaps.
But that wouldn't be enough to smooth out the results to 4 points, you'll need to use the cv2.approxPolyDP method to approximate the resulting contours.
Here is how it might go:
import cv2
import numpy as np
def process(img):
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(img_gray, (7, 7), 0)
img_canny = cv2.Canny(img_blur, 50, 50)
kernel = np.ones((0, 0))
img_dilate = cv2.dilate(img_canny, kernel, iterations=1)
img_erode = cv2.erode(img_dilate, kernel, iterations=1)
return img_erode
def get_contours(img):
contours, hierarchies = cv2.findContours(process(img), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for cnt in contours:
if cv2.contourArea(cnt) > 10:
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, peri * 0.04, True)
cv2.drawContours(img, approx, -1, (0, 0, 255), 8)
img1 = cv2.imread("cursor1.png")
img2 = cv2.imread("cursor2.png")
get_contours(img1)
get_contours(img2)
cv2.imshow("Image 1", img1)
cv2.imshow("Image 2", img2)
cv2.waitKey(0)
Output:
Where the red dots are the parts that were drawn on by the program.
I want to use OCR (pytesseract) to recognize the text located in images like these:
I have thousands of these arrows. Until now the procedure is as follows: I first resize the image (for another process). Then I crop the image to get rid of the most part of the arrow. Next I draw a white rectangle as a frame to remove further noise but still have distance between text and image borders for better text recognition. I resize the image again to ensure a height of capital letters to ~30 px (https://groups.google.com/forum/#!msg/tesseract-ocr/Wdh_JJwnw94/24JHDYQbBQAJ). Finally I binarize the image with a threshold of 150.
Full code:
import cv2
image_file = '001.jpg'
# load the input image and grab the image dimensions
image = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
(h_1, w_1) = image.shape[:2]
# resize the image and grab the new image dimensions
image = cv2.resize(image, (int(w_1*320/h_1), 320))
(h_1, w_1) = image.shape
# crop image
image_2 = image[70:h_1-70, 20:w_1-20]
# get image_2 height, width
(h_2, w_2) = image_2.shape
# draw white rectangle as a frame around the number -> remove noise
cv2.rectangle(image_2, (0, 0), (w_2, h_2), (255, 255, 255), 40)
# resize image, that capital letters are ~ 30 px in height
image_2 = cv2.resize(image_2, (int(w_2*50/h_2), 50))
# image binarization
ret, image_2 = cv2.threshold(image_2, 150, 255, cv2.THRESH_BINARY)
# save image to file
cv2.imwrite('processed_' + image_file, image_2)
# tesseract part can be commented out
import pytesseract
config_7 = ("-c tessedit_char_whitelist=0123456789AB --oem 1 --psm 7")
text = pytesseract.image_to_string(image_2, config=config_7)
print("OCR TEXT: " + "{}\n".format(text))
The problem is that the text located in the arrow is never centered. Sometimes I remove part of the text with the method described above (e.g. in image 50A).
Is there a method in image processing to get rid of the arrow in a more elegant way? For instance using contour detection and deletion? I am more interested in the OpenCV part than the tesseract part to recognize the text.
Any help is appreciated.
If you look at the pictures you will see that there is a white arrow in the image which is also the biggest contour (especially if you draw a black border on the image). If you make a blank mask and draw the arrow (biggest contour on the image) then erode it a little bit you can perform a per element bitwise conjunction of the actual image and eroded mask. If it is not clear look at the bottom code and comments and you will see that it is actually pretty simple.
# imports
import cv2
import numpy as np
img = cv2.imread("number.png") # read image
# you can resize the image here if you like - it should still work for both sizes
h, w = img.shape[:2] # get the actual images height and width
img = cv2.resize(img, (int(w*320/h), 320))
h, w = img.shape[:2]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # transform to grayscale
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1] # perform OTSU threhold
cv2.rectangle(thresh, (0, 0), (w, h), (0, 0, 0), 2)
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # search for contours
max_cnt = max(contours, key=cv2.contourArea) # select biggest one
mask = np.zeros((h, w), dtype=np.uint8) # create a black mask
cv2.drawContours(mask, [max_cnt], -1, (255, 255, 255), -1) # draw biggest contour on the mask
kernel = np.ones((15, 15), dtype=np.uint8) # make a kernel with appropriate values - in both cases (resized and original) 15 is ok
erosion = cv2.erode(mask, kernel, iterations=1) # erode the mask with given kernel
reverse = cv2.bitwise_not(img.copy()) # reversed image of the actual image 0 becomes 255 and 255 becomes 0
img = cv2.bitwise_and(reverse, reverse, mask=erosion) # per-element bit-wise conjunction of the actual image and eroded mask (erosion)
img = cv2.bitwise_not(img) # revers the image again
# save image to file and display
cv2.imwrite("res.png", img)
cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
You can try simple Python script:
import cv2
import numpy as np
img = cv2.imread('mmubS.png', cv2.IMREAD_GRAYSCALE)
thresh = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY_INV )[1]
im_flood_fill = thresh.copy()
h, w = thresh.shape[:2]
im_flood_fill=cv2.rectangle(im_flood_fill, (0,0), (w-1,h-1), 255, 2)
mask = np.zeros((h + 2, w + 2), np.uint8)
cv2.floodFill(im_flood_fill, mask, (0, 0), 0)
im_flood_fill = cv2.bitwise_not(im_flood_fill)
cv2.imshow('clear text', im_flood_fill)
cv2.imwrite('text.png', im_flood_fill)
Result:
I am trying to detect the outer boundary of the circular object in the images below:
I tried OpenCV's Hough Circle, but the code is not working for every image. I also tried to adjust parameters such as minRadius and maxRadius in Hough Circle but its not working on every image.
The aim is to detect the object from the image and crop it.
Expected output:
Source code:
import imutils
import cv2
import numpy as np
from matplotlib import pyplot as plt
image = cv2.imread("path to the image i have provided")
r = 600.0 / image.shape[1]
dim = (600, int(image.shape[0] * r))
resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
cv2.imwrite("path to were we want to save downscaled image", resized)
image = cv2.imread('path of downscaled image')
image1 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image2 = cv2.GaussianBlur(image1, (5, 5), 0)
edged = cv2.Canny(image2, 30, 150)
img = cv2.medianBlur(image2,5)
cimg = cv2.cvtColor(img,cv2.COLOR_GRAY2BGR)
circles = cv2.HoughCircles(edged,cv2.HOUGH_GRADIENT,1,20,
param1=50,param2=30,minRadius=200,maxRadius=280)
circles = np.uint16(np.around(circles))
max_circle = max(circles[0,:], key=lambda x:x[2])
# print(max_circle)
# # Create mask
height,width = image1.shape
mask = np.zeros((height,width), np.uint8)
for i in [max_circle]:
cv2.circle(mask,(i[0],i[1]),i[2],(255,255,255),thickness=-1)
masked_data = cv2.bitwise_and(image, image, mask=mask)
_,thresh = cv2.threshold(mask,1,255,cv2.THRESH_BINARY)
# Find Contour
contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[0]
x,y,w,h = cv2.boundingRect(contours[0])
# Crop masked_data
crop = masked_data[y:y+h,x:x+w]
#Code to close Window
cv2.imshow('OG',image)
cv2.imshow('Cropped ROI',crop)
cv2.imwrite("path to save roi image", crop)
cv2.waitKey(0)
cv2.destroyAllWindows()
Second Answer: an approach based on color segmentation.
While I was editing the question to improve it's readability and was inserting and resizing all the images from the link you shared to make it easier for everyone to visualize what you are trying to do, it occurred to me that this problem might be a better candidate for an approach based on segmentation by color:
This simpler (but clever) approach assumes that the reel appears pretty much in the same location and has more or less the same dimensions every time:
To discover the approximate color of the reel in the image, define a list of Regions of Interest (ROIs) to sample pixels from and determine the min and max color of that area in the HSV color space. The location and size of the ROI are values derived from the size of the image. In the images below, you can see the ROIs as draw as blue-ish rectangles:
Once the min and max HSV colors have been found, a threshold operation with cv2.inRange() can be executed to segment the reel:
Then, iterate though all the contours in the binary image and assume that the largest one represents the reel. Use this contour and draw it in a separate mask to be able to extract the pixels from original image:
At this stage, it is also possible to compute a bounding box for the contour and extract it's precise location to be able to perform a crop operation later and completely isolate the reel in the image:
This approach works for EVERY image shared on the question.
Source code:
import cv2
import numpy as np
import sys
# initialize global H, S, V values
min_global_h = 179
min_global_s = 255
min_global_v = 255
max_global_h = 0
max_global_s = 0
max_global_v = 0
# load input image from the cmd-line
filename = sys.argv[1]
img = cv2.imread(sys.argv[1])
if (img is None):
print('!!! Failed imread')
sys.exit(-1)
# create an auxiliary image for debugging purposes
dbg_img = img.copy()
# initiailize a list of Regions of Interest that need to be scanned to identify good HSV values to threhsold by color
w = img.shape[1]
h = img.shape[0]
roi_w = int(w * 0.10)
roi_h = int(h * 0.10)
roi_list = []
roi_list.append( (int(w*0.25), int(h*0.15), roi_w, roi_h) )
roi_list.append( (int(w*0.25), int(h*0.60), roi_w, roi_h) )
# convert image to HSV color space
hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# iterate through the ROIs to determine the min/max HSV color of the reel
for rect in roi_list:
x, y, w, h = rect
x2 = x + w
y2 = y + h
print('ROI rect=', rect)
cropped_hsv_img = hsv_img[y:y+h, x:x+w]
h, s, v = cv2.split(cropped_hsv_img)
min_h = np.min(h)
min_s = np.min(s)
min_v = np.min(v)
if (min_h < min_global_h):
min_global_h = min_h
if (min_s < min_global_s):
min_global_s = min_s
if (min_v < min_global_v):
min_global_v = min_v
max_h = np.max(h)
max_s = np.max(s)
max_v = np.max(v)
if (max_h > max_global_h):
max_global_h = max_h
if (max_s > max_global_s):
max_global_s = max_s
if (max_v > max_global_v):
max_global_v = max_v
# debug: draw ROI in original image
cv2.rectangle(dbg_img, (x, y), (x2, y2), (255,165,0), 4) # red
cv2.imshow('ROIs', cv2.resize(dbg_img, dsize=(0, 0), fx=0.5, fy=0.5))
#cv2.waitKey(0)
cv2.imwrite(filename[:-4] + '_rois.png', dbg_img)
# define min/max color for threshold
low_hsv = np.array([min_h, min_s, min_v])
max_hsv = np.array([max_h, max_s, max_v])
#print('low_hsv=', low_hsv)
#print('max_hsv=', max_hsv)
# threshold image by color
img_bin = cv2.inRange(hsv_img, low_hsv, max_hsv)
cv2.imshow('binary', cv2.resize(img_bin, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_binary.png', img_bin)
#cv2.imshow('img_bin', cv2.resize(img_bin, dsize=(0, 0), fx=0.5, fy=0.5))
#cv2.waitKey(0)
# create a mask to store the contour of the reel (hopefully)
mask = np.zeros((img_bin.shape[0], img_bin.shape[1]), np.uint8)
crop_x, crop_y, crop_w, crop_h = (0, 0, 0, 0)
# iterate throw all the contours in the binary image:
# assume that the first contour with an area larger than 100k belongs to the reel
contours, hierarchy = cv2.findContours(img_bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
area = cv2.contourArea(contours[contourIdx])
print('contourIdx=', contourIdx, 'area=', area)
# draw potential reel blob on the mask (in white)
if (area > 100000):
crop_x, crop_y, crop_w, crop_h = cv2.boundingRect(cnt)
centers, radius = cv2.minEnclosingCircle(cnt)
cv2.circle(mask, (int(centers[0]), int(centers[1])), int(radius), (255), -1) # fill with white
break
cv2.imshow('mask', cv2.resize(mask, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_mask.png', mask)
# copy just the reel area into its own image
reel_img = cv2.bitwise_and(img, img, mask=mask)
cv2.imshow('reel_img', cv2.resize(reel_img, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_reel.png', reel_img)
# crop the reel to a smaller image
if (crop_w != 0 and crop_h != 0):
cropped_reel_img = reel_img[crop_y:crop_y+crop_h, crop_x:crop_x+crop_w]
cv2.imshow('cropped_reel_img', cv2.resize(cropped_reel_img, dsize=(0, 0), fx=0.5, fy=0.5))
output_filename = filename[:-4] + '_crop.png'
cv2.imwrite(output_filename, cropped_reel_img)
cv2.waitKey(0)
First answer: an approach based on pre-processing the image and executing an adaptiveThreshold operation.
There might be other ways of solving this problem that are not based on Hough Circles. Here is the result of an approach that is not:
Preprocess the image! Decreasing the size of the image and executing a blur helps with segmentation:
The segmentation method uses a cv2.adaptiveThreshold() to create a binary image that preserves the most important objects: the center of the reel and the external edge of the reel. This is an important step since we are only interested in what exists between these two objects. However, life is not perfect and neither is this segmentation. The shadow of reel on the table became part of the binary objects detected. Also, the outer edge is not fully connected as you can see on the resulting image on the right (look at the top left of the circumference):
To join broken segments, a morphological operation can be executed:
Finally, the entire reel area can be exposed by iterating through the contours of the image above and discarding those whose area is larger than what is expected for a reel. The resulting binary image (on the left) can then be used as a mask to identify the reel location on the original image:
Keep in mind that I'm not trying to find an universal solution for your problem. I'm merely showing that there might be other solutions that don't depend on Hough Circles.
Also, this code might need some adjustments to work on a larger number of cases.
Source code:
import cv2
import numpy as np
import sys
img = cv2.imread("test_images/reel.jpg")
if (img is None):
print('!!! Failed imread')
sys.exit(-1)
# create output image
output_img = img.copy()
# 1. Preprocess the image: downscale to speed up processing and execute a blur
SCALE_FACTOR = 0.5
smaller_img = cv2.resize(img, dsize=(0, 0), fx=SCALE_FACTOR, fy=SCALE_FACTOR)
blur_img = cv2.medianBlur(smaller_img, 9)
cv2.imwrite('reel1_blur_img.png', blur_img)
# 2. Segment the image to identify the 2 most important contours: the center of the reel and the outter edge
gray_img = cv2.cvtColor(blur_img, cv2.COLOR_BGR2GRAY)
img_bin = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 19, 4)
cv2.imwrite('reel2_img_bin.png', img_bin)
green_mask = np.zeros((img_bin.shape[0], img_bin.shape[1]), np.uint8)
#green_mask = cv2.cvtColor(img_bin, cv2.COLOR_GRAY2RGB) # debug
contours, hierarchy = cv2.findContours(img_bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(contours[contourIdx])
#print('contourIdx=', contourIdx, 'w=', w, 'h=', h, 'area=', area)
# filter out tiny segments
if (area < 5000):
#cv2.fillPoly(green_mask, pts=[cnt], color=(0, 0, 255)) # red
continue
# draw green contour (filled)
#cv2.fillPoly(green_mask, pts=[cnt], color=(0, 255, 0)) # green
cv2.fillPoly(green_mask, pts=[cnt], color=(255)) # white
# debug:
#cv2.imshow('green_mask', green_mask)
#cv2.waitKey(0)
cv2.imshow('green_mask', green_mask)
cv2.imwrite('reel2_green_mask.png', green_mask)
# 3. Fix mask: join segments nearby
kernel = np.ones((3,3), np.uint8)
img_dilation = cv2.dilate(green_mask, kernel, iterations=1)
green_mask = cv2.erode(img_dilation, kernel, iterations=1)
cv2.imshow('fixed green_mask', green_mask)
cv2.imwrite('reel3_img.png', green_mask)
# 4. Extract the reel area from the green mask
reel_mask = np.zeros((green_mask.shape[0], green_mask.shape[1]), np.uint8)
#reel_mask = cv2.cvtColor(green_mask, cv2.COLOR_GRAY2RGB) # debug
contours, hierarchy = cv2.findContours(green_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(contours[contourIdx])
print('contourIdx=', contourIdx, 'w=', w, 'h=', h, 'area=', area)
# filter out smaller segments
if (area > 110000):
#cv2.fillPoly(reel_mask, pts=[cnt], color=(0, 0, 255)) # red
continue
# draw green contour (filled)
#cv2.fillPoly(reel_mask, pts=[cnt], color=(0, 255, 0)) # green
cv2.fillPoly(reel_mask, pts=[cnt], color=(255)) # white
# debug:
#cv2.imshow('reel_mask', reel_mask)
#cv2.waitKey(0)
cv2.imshow('reel_mask', reel_mask)
cv2.imwrite('reel4_reel_mask.png', reel_mask)
# 5. Draw the reel area on the original image
contours, hierarchy = cv2.findContours(reel_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
centers, radius = cv2.minEnclosingCircle(cnt)
# rescale these values back to the original image size
centers_orig = (centers[0] // SCALE_FACTOR, centers[1] // SCALE_FACTOR)
radius_orig = radius // SCALE_FACTOR
print('centers=', centers_orig, 'radius=', radius_orig)
cv2.circle(output_img, (int(centers_orig[0]), int(centers_orig[1])), int(radius_orig), (128,0,255), 5) # magenta
cv2.imshow('output_img', output_img)
cv2.imwrite('reel5_output.png', output_img)
# display just the pixels from the original image
larger_reel_mask = cv2.resize(reel_mask, (int(img.shape[1]), int(img.shape[0])))
output_reel_img = cv2.bitwise_and(img, img, mask=larger_reel_mask)
cv2.imshow('output_reel_img', output_reel_img)
cv2.imwrite('reel5_output_reel.png', output_reel_img)
cv2.waitKey(0)
At this point, its possible to use larger_reel_maskand compute a minimal enclosing circle, draw it over this mask to make it a little bit more round and allow us to retrieve the area of the reel more accurately:
But the 4 lines of code that achieve this improvement I leave as an exercise for the reader.