How to crop rectangular shapes in an image using Python - python

Can anyone give me advice on how to crop the two rectangular boxes and save it?
I already tried this code, but it does not crop very well
import cv2;
import numpy as np;
# Run the code with the image name, keep pressing space bar
# Change the kernel, iterations, Contour Area, position accordingly
# These values work for your present image
img = cv2.imread("your_image.jpg", 0);
h, w = img.shape[:2]
kernel = np.ones((15,15),np.uint8)
e = cv2.erode(img,kernel,iterations = 2)
d = cv2.dilate(e,kernel,iterations = 1)
ret, th = cv2.threshold(d, 150, 255, cv2.THRESH_BINARY_INV)
mask = np.zeros((h+2, w+2), np.uint8)
cv2.floodFill(th, mask, (200,200), 255); # position = (200,200)
out = cv2.bitwise_not(th)
out= cv2.dilate(out,kernel,iterations = 3)
cnt, h = cv2.findContours(out,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
for i in range(len(cnt)):
area = cv2.contourArea(cnt[i])
if(area>10000 and area<100000):
mask = np.zeros_like(img)
cv2.drawContours(mask, cnt, i, 255, -1)
x,y,w,h = cv2.boundingRect(cnt[i])
crop= img[ y:h+y,x:w+x]
cv2.imshow("snip",crop )
if(cv2.waitKey(0))==27:break
cv2.destroyAllWindows()
This is the result. It only crops the smaller box. What I want is for it to crop the two squares.

if you have the coordinates of the rectangles you can try:
cropped = img [y1:y2, x1:x2]
cv2.imwrite('cropped.png', cropped)
The first line crops the image base on the given coordinates assuming (y1

Related

How to crop text from a scanned image using python?

I need to extract the bounding box of text and save it as sub-images of the main image. I am not getting the right code documentation for this task.
Please can anyone provide me code documentation or help links or any python modules which can help to crop text from scanned images.
Below I have attached a scanned image and expected output.
below image scanned copy need to crop text from image.
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd ='C:\\Program Files (x86)\\Tesseract-OCR\\tesseract'
img = cv2.imread("test.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))
dilation = cv2.dilate(thresh1, rect_kernel, iterations = 1)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
im2 = img.copy()
file = open("recognized.txt", "w+")
file.write("")
file.close()
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
cropped = im2[y:y + h, x:x + w]
file = open("recognized.txt", "a")
text = pytesseract.image_to_string(cropped)
file.write(text)
file.write("\n")
crop_img = img[y:y+h, x:x+w] # just the region you are interested
file.close
second image expected croped image:
Here is one approach in Python/OpenCV.
Read the input
Get the Canny edges
Get the outer contours of the edges
Filter the contours to remove small extraneous spots
Get the convex hull of the main cluster of edges
Draw the convex hull as white filled on a black background as a mask
Mask to black the outside region of the input
Get the rotated rectangle from the convex hull
From the negative angle and center of the rotated rectangle rectify the orientation using perspective warping
Save the results
Input:
import cv2
import numpy as np
# Read image
img = cv2.imread('receipt.jpg')
hh, ww = img.shape[:2]
# get edges
canny = cv2.Canny(img, 50, 200)
# get contours
contours = cv2.findContours(canny, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# filter out small regions
cimg = np.zeros_like(canny)
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 20:
cv2.drawContours(cimg, [cntr], 0, 255, 1)
# get convex hull and draw on input
points = np.column_stack(np.where(cimg.transpose() > 0))
hull = cv2.convexHull(points)
himg = img.copy()
cv2.polylines(himg, [hull], True, (0,0,255), 1)
# draw convex hull as filled mask
mask = np.zeros_like(cimg, dtype=np.uint8)
cv2.fillPoly(mask, [hull], 255)
# blacken out input using mask
mimg = img.copy()
mimg = cv2.bitwise_and(mimg, mimg, mask=mask)
# get rotate rectangle
rotrect = cv2.minAreaRect(hull)
(center), (width,height), angle = rotrect
box = cv2.boxPoints(rotrect)
boxpts = np.int0(box)
# draw rotated rectangle on copy of input
rimg = img.copy()
cv2.drawContours(rimg, [boxpts], 0, (0,0,255), 1)
# from https://www.pyimagesearch.com/2017/02/20/text-skew-correction-opencv-python/
# the `cv2.minAreaRect` function returns values in the
# range [-90, 0); as the rectangle rotates clockwise the
# returned angle tends to 0 -- in this special case we
# need to add 90 degrees to the angle
if angle < -45:
angle = -(90 + angle)
# otherwise, check width vs height
else:
if width > height:
angle = -(90 + angle)
else:
angle = -angle
# negate the angle to unrotate
neg_angle = -angle
print('unrotation angle:', neg_angle)
print('')
# Get rotation matrix
# center = (width // 2, height // 2)
M = cv2.getRotationMatrix2D(center, neg_angle, scale=1.0)
# unrotate to rectify
result = cv2.warpAffine(mimg, M, (ww, hh), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=(0,0,0))
# save results
cv2.imwrite('receipt_mask.jpg', mask)
cv2.imwrite('receipt_edges.jpg', canny)
cv2.imwrite('receipt_filtered_edges.jpg', cimg)
cv2.imwrite('receipt_hull.jpg', himg)
cv2.imwrite('receipt_rotrect.jpg', rimg)
cv2.imwrite('receipt_masked_result.jpg', result)
cv2.imshow('canny', canny)
cv2.imshow('cimg', cimg)
cv2.imshow('himg', himg)
cv2.imshow('mask', mask)
cv2.imshow('rimg', rimg)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Canny Edges:
Filtered Edges from Contours:
Convex Hull:
Mask:
Rotated Rectangle:
Rectified Result:
In OpenCV you can use cv2.findContours to draw the bounding boxes. See this article which explains how to do that: https://www.geeksforgeeks.org/text-detection-and-extraction-using-opencv-and-ocr/
Then after you have your bounding box locations (your region of interest where text is located, and you want to crop) you can use use slicing to crop the image:
import cv2
img = cv2.imread("lenna.png")
crop_img = img[y:y+h, x:x+w] # just the region you are interested
cv2.imshow("cropped", crop_img)
cv2.waitKey(0)
If you want to extract the text directly, I think you can use tesseract ocr a python package (How to get started: https://pypi.org/project/pytesseract/) . You can also make use of OpenCV built in OCR functions. Read more: https://nanonets.com/blog/ocr-with-tesseract/
from PIL import image
original_image = Image.open(".nameofimage.jpg")
rotate_image = Original_image.rotate(330)
rotate_image.show()
x = 100
y = 80
h = 200
w = 200
cropped_image = rotate_image[y:y+h, x:x+w]
cropped_image.show()

How to remove Edge Text in image using Opencv

I want to remove the text on the edged in the image
I have used the following code but it does not work it also remove the text in the center
Input:
Output:
import cv2
import matplotlib.pyplot as plt
import glob
import os
import numpy as np
def crop_buttom_text(img):
""" Remove the text from the bottom edge of img """
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#blur = cv2.GaussianBlur(gray, (9,9), 0) # No need for blurring
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Create rectangular structuring element and dilate
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 1)) # Use horizontal line as kernel - dilate horizontally.
dilate = cv2.dilate(thresh, kernel, iterations=1)
#kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10,10))
#dilate = cv2.morphologyEx(dilate, cv2.MORPH_OPEN, kernel) # No need for opening
# Find contours and draw rectangle
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2] # [-2] indexing takes return value before last (due to OpenCV compatibility issues).
#cnts = cnts[0] if len(cnts) == 2 else cnts[1] # [-2] is shorter....
res_img = img.copy() # Copy img to res_img - in case there is no edges text.
for c in cnts:
x, y, w, h = cv2.boundingRect(c)
y2 = y + h # Bottom y coordinate of the bounding rectangle
if (y2 >= img.shape[0]):
# If the rectangle touches the bottom of the img
res_img = res_img[0:y-1, :].copy() # Crop rows from first row to row y-1
return res_img
def remove_lines(image_path,outdir):
image = cv2.imread(image_path)
img1 = crop_buttom_text(image)
img2 = crop_buttom_text(np.rot90(img1)) # Rotate by 90 degrees and crop.
img3 = crop_buttom_text(np.rot90(img2)) # Rotate by 90 degrees and crop.
img4 = crop_buttom_text(np.rot90(img3)) # Rotate by 90 degrees and crop.
output_img = np.rot90(img4)
cv2.imwrite(os.path.join(outdir,os.path.basename(image_path)), output_img)
for jpgfile in glob.glob(r'/content/Dataset/*'):
print(jpgfile)
remove_lines(jpgfile,r'/content/output')
How can i modify the above code to remove the text around the edge
How do you remove the text which is at the edge? Here is a rough way to attack the problem: remove every connected component which touches the border.
To solve this, you can add a 1 pixel border, extract the connected components and then use the one corresponding to the border as a binary mask.
The mask becomes
The result is
Notice how a 7 and few commas get removed as well.
from cv2 import cv2
import numpy as np
# Load original image
img = cv2.imread('kShDc.jpg', cv2.IMREAD_GRAYSCALE)
# Save original dimensions
h, w = img.shape[:2]
# Ensure only bilevel image with white as foreground
_, bimg = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
# Add one pixel border
bimg = cv2.copyMakeBorder(bimg, 1, 1, 1, 1, cv2.BORDER_CONSTANT, None, 255)
# Extract connected components (Spaghetti is the fastest algorithm)
nlabels, label_image = cv2.connectedComponentsWithAlgorithm(bimg, 8, cv2.CV_32S, cv2.CCL_SPAGHETTI)
# Make a mask with the edge label (0 is background, 1 is the first encountered label, i.e. the border)
ccedge = np.uint8((label_image != 1)*255)
cv2.imwrite("mask.png", ccedge, [cv2.IMWRITE_PNG_BILEVEL, 1])
# Zero every pixel touching the border
bimg = cv2.bitwise_and(bimg, ccedge)
# Remove border and invert again
bimg = 255 - bimg[1:h+1, 1:w+1]
# Save result
cv2.imwrite("result.png", bimg, [cv2.IMWRITE_PNG_BILEVEL, 1])

How can I reliably choose only the outer contour of a maksed droplet image with python opencv?

I am writing a program which needs to detect the outer contour of a droplet and fit an ellipse to that shape.
I have a setup that has been working fine:
canny edge detection
find contours
pick longest contour
fit ellipse
But now the process needs to include a mask for the syringe that dispenses the droplet, which splits the outer contour in half.
I know how to mask the detected edges from the array returned from canny but I don't know how to proceed from there.
I need to use the two outer contours to fit the ellipse but I don' know how to reliably extract those.
Minimal working code:
from typing import Tuple
import cv2
import numpy as np
def evaluate_droplet(img, y_base, mask: Tuple[int,int,int,int] = None):
# crop img from baseline down (contains no useful information)
crop_img = img[:y_base,:]
shape = img.shape
height = shape[0]
width = shape[1]
# calculate thrresholds
thresh_high, thresh_im = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
thresh_low = 0.5*thresh_high
bw_edges = cv2.Canny(crop_img, thresh_low, thresh_high)
# block detection of syringe
if (not mask is None):
x,y,w,h = mask
bw_edges[y:y+h, x:x+w] = 0
cv2.imshow('bw',bw_edges)
cv2.waitKey(0)
# for testing purposes:
if __name__ == "__main__":
im = cv2.imread('untitled1.png', cv2.IMREAD_GRAYSCALE)
im = np.reshape(im, im.shape + (1,) )
(h,w,d) = np.shape(im)
try:
drp = evaluate_droplet(im, 250, (int(w/2-40), 0, 80, h))
except Exception as ex:
print(ex)
cv2.imshow('Test',im)
cv2.waitKey(0)
the image used:
I solved it like this for now:
Calculate the area of the bounding rect for each contour and pick largest two
def evaluate_droplet(img, y_base, mask: Tuple[int,int,int,int] = None):
# crop img from baseline down (contains no useful information)
crop_img = img[:y_base,:]
shape = img.shape
height = shape[0]
width = shape[1]
# calculate thrresholds
thresh_high, thresh_im = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
thresh_low = 0.5*thresh_high
bw_edges = cv2.Canny(crop_img, thresh_low, thresh_high)
# block detection of syringe
if (not mask is None):
x,y,w,h = mask
bw_edges[y:y+h, x:x+w] = 0
contours, hierarchy = cv2.findContours(bw_edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
if len(contours) == 0:
print("no contour found!")
cont_area_list = []
# match contours with their bounding rect area
for cont in contours:
x,y,w,h = cv2.boundingRect(cont)
cont_area_list.append((cont, w*h))
# sort combined list by area
cont_areas_sorted = sorted(cont_area_list, key=lambda item: item[1])
# largest 2 contours, assumes mask splits largest contour in the middle
if not mask is None:
largest_conts = [elem[0] for elem in cont_areas_sorted[-2:]]
# merge largest 2 contorus into one for handling purposes
contour = np.concatenate((largest_conts[0], largest_conts[1]))
# if no mask is used use only single largest contour
else:
contour = cont_areas_sorted[-1][0]
# display bounding rect of final contour
x,y,w,h = cv2.boundingRect(contour)
bw_edges = cv2.rectangle(bw_edges, (x,y), (x+w,y+h), (255,255,255))
cv2.imshow('bw',bw_edges)
cv2.waitKey(0)

Preprocess images using OpenCV for pytesseract OCR

I want to use OCR (pytesseract) to recognize the text located in images like these:
I have thousands of these arrows. Until now the procedure is as follows: I first resize the image (for another process). Then I crop the image to get rid of the most part of the arrow. Next I draw a white rectangle as a frame to remove further noise but still have distance between text and image borders for better text recognition. I resize the image again to ensure a height of capital letters to ~30 px (https://groups.google.com/forum/#!msg/tesseract-ocr/Wdh_JJwnw94/24JHDYQbBQAJ). Finally I binarize the image with a threshold of 150.
Full code:
import cv2
image_file = '001.jpg'
# load the input image and grab the image dimensions
image = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
(h_1, w_1) = image.shape[:2]
# resize the image and grab the new image dimensions
image = cv2.resize(image, (int(w_1*320/h_1), 320))
(h_1, w_1) = image.shape
# crop image
image_2 = image[70:h_1-70, 20:w_1-20]
# get image_2 height, width
(h_2, w_2) = image_2.shape
# draw white rectangle as a frame around the number -> remove noise
cv2.rectangle(image_2, (0, 0), (w_2, h_2), (255, 255, 255), 40)
# resize image, that capital letters are ~ 30 px in height
image_2 = cv2.resize(image_2, (int(w_2*50/h_2), 50))
# image binarization
ret, image_2 = cv2.threshold(image_2, 150, 255, cv2.THRESH_BINARY)
# save image to file
cv2.imwrite('processed_' + image_file, image_2)
# tesseract part can be commented out
import pytesseract
config_7 = ("-c tessedit_char_whitelist=0123456789AB --oem 1 --psm 7")
text = pytesseract.image_to_string(image_2, config=config_7)
print("OCR TEXT: " + "{}\n".format(text))
The problem is that the text located in the arrow is never centered. Sometimes I remove part of the text with the method described above (e.g. in image 50A).
Is there a method in image processing to get rid of the arrow in a more elegant way? For instance using contour detection and deletion? I am more interested in the OpenCV part than the tesseract part to recognize the text.
Any help is appreciated.
If you look at the pictures you will see that there is a white arrow in the image which is also the biggest contour (especially if you draw a black border on the image). If you make a blank mask and draw the arrow (biggest contour on the image) then erode it a little bit you can perform a per element bitwise conjunction of the actual image and eroded mask. If it is not clear look at the bottom code and comments and you will see that it is actually pretty simple.
# imports
import cv2
import numpy as np
img = cv2.imread("number.png") # read image
# you can resize the image here if you like - it should still work for both sizes
h, w = img.shape[:2] # get the actual images height and width
img = cv2.resize(img, (int(w*320/h), 320))
h, w = img.shape[:2]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # transform to grayscale
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1] # perform OTSU threhold
cv2.rectangle(thresh, (0, 0), (w, h), (0, 0, 0), 2)
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # search for contours
max_cnt = max(contours, key=cv2.contourArea) # select biggest one
mask = np.zeros((h, w), dtype=np.uint8) # create a black mask
cv2.drawContours(mask, [max_cnt], -1, (255, 255, 255), -1) # draw biggest contour on the mask
kernel = np.ones((15, 15), dtype=np.uint8) # make a kernel with appropriate values - in both cases (resized and original) 15 is ok
erosion = cv2.erode(mask, kernel, iterations=1) # erode the mask with given kernel
reverse = cv2.bitwise_not(img.copy()) # reversed image of the actual image 0 becomes 255 and 255 becomes 0
img = cv2.bitwise_and(reverse, reverse, mask=erosion) # per-element bit-wise conjunction of the actual image and eroded mask (erosion)
img = cv2.bitwise_not(img) # revers the image again
# save image to file and display
cv2.imwrite("res.png", img)
cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
You can try simple Python script:
import cv2
import numpy as np
img = cv2.imread('mmubS.png', cv2.IMREAD_GRAYSCALE)
thresh = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY_INV )[1]
im_flood_fill = thresh.copy()
h, w = thresh.shape[:2]
im_flood_fill=cv2.rectangle(im_flood_fill, (0,0), (w-1,h-1), 255, 2)
mask = np.zeros((h + 2, w + 2), np.uint8)
cv2.floodFill(im_flood_fill, mask, (0, 0), 0)
im_flood_fill = cv2.bitwise_not(im_flood_fill)
cv2.imshow('clear text', im_flood_fill)
cv2.imwrite('text.png', im_flood_fill)
Result:

Difficulty in detecting the outer circle with cv2.HoughCircles

I am trying to detect the outer boundary of the circular object in the images below:
I tried OpenCV's Hough Circle, but the code is not working for every image. I also tried to adjust parameters such as minRadius and maxRadius in Hough Circle but its not working on every image.
The aim is to detect the object from the image and crop it.
Expected output:
Source code:
import imutils
import cv2
import numpy as np
from matplotlib import pyplot as plt
image = cv2.imread("path to the image i have provided")
r = 600.0 / image.shape[1]
dim = (600, int(image.shape[0] * r))
resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
cv2.imwrite("path to were we want to save downscaled image", resized)
image = cv2.imread('path of downscaled image')
image1 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image2 = cv2.GaussianBlur(image1, (5, 5), 0)
edged = cv2.Canny(image2, 30, 150)
img = cv2.medianBlur(image2,5)
cimg = cv2.cvtColor(img,cv2.COLOR_GRAY2BGR)
circles = cv2.HoughCircles(edged,cv2.HOUGH_GRADIENT,1,20,
param1=50,param2=30,minRadius=200,maxRadius=280)
circles = np.uint16(np.around(circles))
max_circle = max(circles[0,:], key=lambda x:x[2])
# print(max_circle)
# # Create mask
height,width = image1.shape
mask = np.zeros((height,width), np.uint8)
for i in [max_circle]:
cv2.circle(mask,(i[0],i[1]),i[2],(255,255,255),thickness=-1)
masked_data = cv2.bitwise_and(image, image, mask=mask)
_,thresh = cv2.threshold(mask,1,255,cv2.THRESH_BINARY)
# Find Contour
contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[0]
x,y,w,h = cv2.boundingRect(contours[0])
# Crop masked_data
crop = masked_data[y:y+h,x:x+w]
#Code to close Window
cv2.imshow('OG',image)
cv2.imshow('Cropped ROI',crop)
cv2.imwrite("path to save roi image", crop)
cv2.waitKey(0)
cv2.destroyAllWindows()
Second Answer: an approach based on color segmentation.
While I was editing the question to improve it's readability and was inserting and resizing all the images from the link you shared to make it easier for everyone to visualize what you are trying to do, it occurred to me that this problem might be a better candidate for an approach based on segmentation by color:
This simpler (but clever) approach assumes that the reel appears pretty much in the same location and has more or less the same dimensions every time:
To discover the approximate color of the reel in the image, define a list of Regions of Interest (ROIs) to sample pixels from and determine the min and max color of that area in the HSV color space. The location and size of the ROI are values derived from the size of the image. In the images below, you can see the ROIs as draw as blue-ish rectangles:
Once the min and max HSV colors have been found, a threshold operation with cv2.inRange() can be executed to segment the reel:
Then, iterate though all the contours in the binary image and assume that the largest one represents the reel. Use this contour and draw it in a separate mask to be able to extract the pixels from original image:
At this stage, it is also possible to compute a bounding box for the contour and extract it's precise location to be able to perform a crop operation later and completely isolate the reel in the image:
This approach works for EVERY image shared on the question.
Source code:
import cv2
import numpy as np
import sys
# initialize global H, S, V values
min_global_h = 179
min_global_s = 255
min_global_v = 255
max_global_h = 0
max_global_s = 0
max_global_v = 0
# load input image from the cmd-line
filename = sys.argv[1]
img = cv2.imread(sys.argv[1])
if (img is None):
print('!!! Failed imread')
sys.exit(-1)
# create an auxiliary image for debugging purposes
dbg_img = img.copy()
# initiailize a list of Regions of Interest that need to be scanned to identify good HSV values to threhsold by color
w = img.shape[1]
h = img.shape[0]
roi_w = int(w * 0.10)
roi_h = int(h * 0.10)
roi_list = []
roi_list.append( (int(w*0.25), int(h*0.15), roi_w, roi_h) )
roi_list.append( (int(w*0.25), int(h*0.60), roi_w, roi_h) )
# convert image to HSV color space
hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# iterate through the ROIs to determine the min/max HSV color of the reel
for rect in roi_list:
x, y, w, h = rect
x2 = x + w
y2 = y + h
print('ROI rect=', rect)
cropped_hsv_img = hsv_img[y:y+h, x:x+w]
h, s, v = cv2.split(cropped_hsv_img)
min_h = np.min(h)
min_s = np.min(s)
min_v = np.min(v)
if (min_h < min_global_h):
min_global_h = min_h
if (min_s < min_global_s):
min_global_s = min_s
if (min_v < min_global_v):
min_global_v = min_v
max_h = np.max(h)
max_s = np.max(s)
max_v = np.max(v)
if (max_h > max_global_h):
max_global_h = max_h
if (max_s > max_global_s):
max_global_s = max_s
if (max_v > max_global_v):
max_global_v = max_v
# debug: draw ROI in original image
cv2.rectangle(dbg_img, (x, y), (x2, y2), (255,165,0), 4) # red
cv2.imshow('ROIs', cv2.resize(dbg_img, dsize=(0, 0), fx=0.5, fy=0.5))
#cv2.waitKey(0)
cv2.imwrite(filename[:-4] + '_rois.png', dbg_img)
# define min/max color for threshold
low_hsv = np.array([min_h, min_s, min_v])
max_hsv = np.array([max_h, max_s, max_v])
#print('low_hsv=', low_hsv)
#print('max_hsv=', max_hsv)
# threshold image by color
img_bin = cv2.inRange(hsv_img, low_hsv, max_hsv)
cv2.imshow('binary', cv2.resize(img_bin, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_binary.png', img_bin)
#cv2.imshow('img_bin', cv2.resize(img_bin, dsize=(0, 0), fx=0.5, fy=0.5))
#cv2.waitKey(0)
# create a mask to store the contour of the reel (hopefully)
mask = np.zeros((img_bin.shape[0], img_bin.shape[1]), np.uint8)
crop_x, crop_y, crop_w, crop_h = (0, 0, 0, 0)
# iterate throw all the contours in the binary image:
# assume that the first contour with an area larger than 100k belongs to the reel
contours, hierarchy = cv2.findContours(img_bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
area = cv2.contourArea(contours[contourIdx])
print('contourIdx=', contourIdx, 'area=', area)
# draw potential reel blob on the mask (in white)
if (area > 100000):
crop_x, crop_y, crop_w, crop_h = cv2.boundingRect(cnt)
centers, radius = cv2.minEnclosingCircle(cnt)
cv2.circle(mask, (int(centers[0]), int(centers[1])), int(radius), (255), -1) # fill with white
break
cv2.imshow('mask', cv2.resize(mask, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_mask.png', mask)
# copy just the reel area into its own image
reel_img = cv2.bitwise_and(img, img, mask=mask)
cv2.imshow('reel_img', cv2.resize(reel_img, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_reel.png', reel_img)
# crop the reel to a smaller image
if (crop_w != 0 and crop_h != 0):
cropped_reel_img = reel_img[crop_y:crop_y+crop_h, crop_x:crop_x+crop_w]
cv2.imshow('cropped_reel_img', cv2.resize(cropped_reel_img, dsize=(0, 0), fx=0.5, fy=0.5))
output_filename = filename[:-4] + '_crop.png'
cv2.imwrite(output_filename, cropped_reel_img)
cv2.waitKey(0)
First answer: an approach based on pre-processing the image and executing an adaptiveThreshold operation.
There might be other ways of solving this problem that are not based on Hough Circles. Here is the result of an approach that is not:
Preprocess the image! Decreasing the size of the image and executing a blur helps with segmentation:
The segmentation method uses a cv2.adaptiveThreshold() to create a binary image that preserves the most important objects: the center of the reel and the external edge of the reel. This is an important step since we are only interested in what exists between these two objects. However, life is not perfect and neither is this segmentation. The shadow of reel on the table became part of the binary objects detected. Also, the outer edge is not fully connected as you can see on the resulting image on the right (look at the top left of the circumference):
To join broken segments, a morphological operation can be executed:
Finally, the entire reel area can be exposed by iterating through the contours of the image above and discarding those whose area is larger than what is expected for a reel. The resulting binary image (on the left) can then be used as a mask to identify the reel location on the original image:
Keep in mind that I'm not trying to find an universal solution for your problem. I'm merely showing that there might be other solutions that don't depend on Hough Circles.
Also, this code might need some adjustments to work on a larger number of cases.
Source code:
import cv2
import numpy as np
import sys
img = cv2.imread("test_images/reel.jpg")
if (img is None):
print('!!! Failed imread')
sys.exit(-1)
# create output image
output_img = img.copy()
# 1. Preprocess the image: downscale to speed up processing and execute a blur
SCALE_FACTOR = 0.5
smaller_img = cv2.resize(img, dsize=(0, 0), fx=SCALE_FACTOR, fy=SCALE_FACTOR)
blur_img = cv2.medianBlur(smaller_img, 9)
cv2.imwrite('reel1_blur_img.png', blur_img)
# 2. Segment the image to identify the 2 most important contours: the center of the reel and the outter edge
gray_img = cv2.cvtColor(blur_img, cv2.COLOR_BGR2GRAY)
img_bin = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 19, 4)
cv2.imwrite('reel2_img_bin.png', img_bin)
green_mask = np.zeros((img_bin.shape[0], img_bin.shape[1]), np.uint8)
#green_mask = cv2.cvtColor(img_bin, cv2.COLOR_GRAY2RGB) # debug
contours, hierarchy = cv2.findContours(img_bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(contours[contourIdx])
#print('contourIdx=', contourIdx, 'w=', w, 'h=', h, 'area=', area)
# filter out tiny segments
if (area < 5000):
#cv2.fillPoly(green_mask, pts=[cnt], color=(0, 0, 255)) # red
continue
# draw green contour (filled)
#cv2.fillPoly(green_mask, pts=[cnt], color=(0, 255, 0)) # green
cv2.fillPoly(green_mask, pts=[cnt], color=(255)) # white
# debug:
#cv2.imshow('green_mask', green_mask)
#cv2.waitKey(0)
cv2.imshow('green_mask', green_mask)
cv2.imwrite('reel2_green_mask.png', green_mask)
# 3. Fix mask: join segments nearby
kernel = np.ones((3,3), np.uint8)
img_dilation = cv2.dilate(green_mask, kernel, iterations=1)
green_mask = cv2.erode(img_dilation, kernel, iterations=1)
cv2.imshow('fixed green_mask', green_mask)
cv2.imwrite('reel3_img.png', green_mask)
# 4. Extract the reel area from the green mask
reel_mask = np.zeros((green_mask.shape[0], green_mask.shape[1]), np.uint8)
#reel_mask = cv2.cvtColor(green_mask, cv2.COLOR_GRAY2RGB) # debug
contours, hierarchy = cv2.findContours(green_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(contours[contourIdx])
print('contourIdx=', contourIdx, 'w=', w, 'h=', h, 'area=', area)
# filter out smaller segments
if (area > 110000):
#cv2.fillPoly(reel_mask, pts=[cnt], color=(0, 0, 255)) # red
continue
# draw green contour (filled)
#cv2.fillPoly(reel_mask, pts=[cnt], color=(0, 255, 0)) # green
cv2.fillPoly(reel_mask, pts=[cnt], color=(255)) # white
# debug:
#cv2.imshow('reel_mask', reel_mask)
#cv2.waitKey(0)
cv2.imshow('reel_mask', reel_mask)
cv2.imwrite('reel4_reel_mask.png', reel_mask)
# 5. Draw the reel area on the original image
contours, hierarchy = cv2.findContours(reel_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
centers, radius = cv2.minEnclosingCircle(cnt)
# rescale these values back to the original image size
centers_orig = (centers[0] // SCALE_FACTOR, centers[1] // SCALE_FACTOR)
radius_orig = radius // SCALE_FACTOR
print('centers=', centers_orig, 'radius=', radius_orig)
cv2.circle(output_img, (int(centers_orig[0]), int(centers_orig[1])), int(radius_orig), (128,0,255), 5) # magenta
cv2.imshow('output_img', output_img)
cv2.imwrite('reel5_output.png', output_img)
# display just the pixels from the original image
larger_reel_mask = cv2.resize(reel_mask, (int(img.shape[1]), int(img.shape[0])))
output_reel_img = cv2.bitwise_and(img, img, mask=larger_reel_mask)
cv2.imshow('output_reel_img', output_reel_img)
cv2.imwrite('reel5_output_reel.png', output_reel_img)
cv2.waitKey(0)
At this point, its possible to use larger_reel_maskand compute a minimal enclosing circle, draw it over this mask to make it a little bit more round and allow us to retrieve the area of the reel more accurately:
But the 4 lines of code that achieve this improvement I leave as an exercise for the reader.

Categories

Resources