Related
I want to retrieve all contours of the image below, but ignore text.
Image:
When I try to find the contours of the current image I get the following:
I have no idea how to go about this as I am new to using OpenCV and image processing. I want to get ignore the text, how can I achieve this? If ignoring is not possible but making a single bounding box surrounding the text is, than that would be good too.
Edit:
Criteria that I need to match:
The contours may very in size and shape.
The colors from the image may differ.
The colors and size of the text inside the image may differ.
Here is one way to do that in Python/OpenCV.
Read the input
Convert to grayscale
Get Canny edges
Apply morphology close to ensure they are closed
Get all contour hierarchy
Filter contours to keep only those above threshold in perimeter
Draw contours on input
Draw each contour on a black background
Save results
Input:
import numpy as np
import cv2
# read input
img = cv2.imread('short_title.png')
# convert to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# get canny edges
edges = cv2.Canny(gray, 1, 50)
# apply morphology close to ensure they are closed
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
# get contours
contours = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
contours = contours[0] if len(contours) == 2 else contours[1]
# filter contours to keep only large ones
result = img.copy()
i = 1
for c in contours:
perimeter = cv2.arcLength(c, True)
if perimeter > 500:
cv2.drawContours(result, c, -1, (0,0,255), 1)
contour_img = np.zeros_like(img, dtype=np.uint8)
cv2.drawContours(contour_img, c, -1, (0,0,255), 1)
cv2.imwrite("short_title_contour_{0}.jpg".format(i),contour_img)
i = i + 1
# save results
cv2.imwrite("short_title_gray.jpg", gray)
cv2.imwrite("short_title_edges.jpg", edges)
cv2.imwrite("short_title_contours.jpg", result)
# show images
cv2.imshow("gray", gray)
cv2.imshow("edges", edges)
cv2.imshow("result", result)
cv2.waitKey(0)
Grayscale:
Edges:
All contours on input:
Contour 1:
Contour 2:
Contour 3:
Contour 4:
Here are two options for erasing the text:
Using pytesseract OCR.
Finding white (and small) connected components.
Both solution build a mask, dilate the mask and use cv2.inpaint for erasing the text.
Using pytesseract:
Find text boxes using pytesseract.image_to_boxes.
Fill the boxes in the mask with 255.
Code sample:
import cv2
import numpy as np
from pytesseract import pytesseract, Output
# Tesseract path
pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
img = cv2.imread('ShortAndInteresting.png')
# https://stackoverflow.com/questions/20831612/getting-the-bounding-box-of-the-recognized-words-using-python-tesseract
boxes = pytesseract.image_to_boxes(img, lang='eng', config=' --psm 6') # Run tesseract, returning the bounding boxes
h, w, _ = img.shape # assumes color image
mask = np.zeros((h, w), np.uint8)
# Fill the bounding boxes on the image
for b in boxes.splitlines():
b = b.split(' ')
mask = cv2.rectangle(mask, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), 255, -1)
mask = cv2.dilate(mask, np.ones((5, 5), np.uint8)) # Dilate the boxes in the mask
clean_img = cv2.inpaint(img, mask, 2, cv2.INPAINT_NS) # Remove the text using inpaint (replace the masked pixels with the neighbor pixels).
# Show mask and clean_img for testing
cv2.imshow('mask', mask)
cv2.imshow('clean_img', clean_img)
cv2.waitKey()
cv2.destroyAllWindows()
Mask:
Finding white (and small) connected components:
Use mask = cv2.inRange(img, (230, 230, 230), (255, 255, 255)) for finding the text (assume the text is white).
Finding connected components in the mask using cv2.connectedComponentsWithStats(mask, 4)
Remove large components from the mask - fill components with large area with zeros.
Code sample:
import cv2
import numpy as np
img = cv2.imread('ShortAndInteresting.png')
mask = cv2.inRange(img, (230, 230, 230), (255, 255, 255))
nlabel, labels, stats, centroids = cv2.connectedComponentsWithStats(mask, 4) # Finding connected components with statistics
# Remove large components from the mask (fill components with large area with zeros).
for i in range(1, nlabel):
area = stats[i, cv2.CC_STAT_AREA] # Get area
if area > 1000:
mask[labels == i] = 0 # Remove large connected components from the mask (fill with zero)
mask = cv2.dilate(mask, np.ones((5, 5), np.uint8)) # Dilate the text in the maks
cv2.imwrite('mask2.png', mask)
clean_img = cv2.inpaint(img, mask, 2, cv2.INPAINT_NS) # Remove the text using inpaint (replace the masked pixels with the neighbor pixels).
# Show mask and clean_img for testing
cv2.imshow('mask', mask)
cv2.imshow('clean_img', clean_img)
cv2.waitKey()
cv2.destroyAllWindows()
Mask:
Clean image:
Note:
My assumption is that you know how to split the image into contours, and the only issue is the present of the text.
I would recommend using flood fill, find the seed point for each color region, flood fill it to ignore the text values within. Hope that helps!
Refer to example of using floodfill here: https://www.programcreek.com/python/example/89425/cv2.floodFill
Example below copied from link above
def fillhole(input_image):
'''
input gray binary image get the filled image by floodfill method
Note: only holes surrounded in the connected regions will be filled.
:param input_image:
:return:
'''
im_flood_fill = input_image.copy()
h, w = input_image.shape[:2]
mask = np.zeros((h + 2, w + 2), np.uint8)
im_flood_fill = im_flood_fill.astype("uint8")
cv.floodFill(im_flood_fill, mask, (0, 0), 255)
im_flood_fill_inv = cv.bitwise_not(im_flood_fill)
img_out = input_image | im_flood_fill_inv
return img_out
I want to detect the center of a cross. But since the two rectangles are connected, I don't know how to find it. I have these images for example:
Cross 1
Cross 2
I would like to find the "red dot".
The idea is that the point where a vertical and horizontal line touch is the intersection. A potential approach is:
Obtain binary image. Load image, convert to grayscale, Gaussian blur, then Otsu's threshold.
Obtain horizontal and vertical line masks. Create horizontal and vertical structuring elements with cv2.getStructuringElement then perform cv2.morphologyEx to isolate the lines.
Find joints. We cv2.bitwise_and the two masks together to get the joints.
Find centroid on joint mask. We find contours then calculate the centroid to get the intersection point.
Input image -> Horizontal mask -> Vertical mask -> Joints
Detected intersection in green
Results for the other image
Input image -> Horizontal mask -> Vertical mask -> Joints
Detected intersection in green
Code
import cv2
import numpy as np
# Load image, grayscale, Gaussian blur, Otsus threshold
image = cv2.imread('4.PNG')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Find horizonal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (150,5))
horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
# Find vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,150))
vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
# Find joints
joints = cv2.bitwise_and(horizontal, vertical)
# Find centroid of the joints
cnts = cv2.findContours(joints, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
# Find centroid and draw center point
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
cv2.circle(image, (cx, cy), 15, (36,255,12), -1)
cv2.imshow('horizontal', horizontal)
cv2.imshow('vertical', vertical)
cv2.imshow('joints', joints)
cv2.imshow('image', image)
cv2.waitKey()
Here's a possible solution. It is based on my answer here: How can i get the inner contour points without redundancy in OpenCV - Python. The main idea is to convolve the image with a special kernel that identifies intersections. After this operation, you create a mask with possible intersection points, apply some morphology and get the coordinates.
You did not provide your input image, I'm testing this algorithm with the "cross" image you posted. This is the code:
# Imports:
import cv2
import numpy as np
# Image path
path = "D://opencvImages//"
fileName = "cross.png" # Your "cross" image
# Reading an image in default mode:
inputImage = cv2.imread(path + fileName)
# Prepare a deep copy of the input for results:
inputImageCopy = inputImage.copy()
# Grayscale conversion:
grayscaleImage = cv2.cvtColor(inputImage, cv2.COLOR_BGR2GRAY)
Now, the convolution must receive an image where the shapes have been reduced to a 1 pixel width. This can be done computing the skeleton of the image. The skeleton is a version of the binary image where lines have been normalized to have a width of 1 pixel. We can then convolve the image with a 3 x 3 kernel and look for specific pixel patterns.
Before computing the skeleton, we will add a border around the image. This prevents some artifacts that the skeleton yields if a shape extends all the way to the borders of the image:
# Add borders to prevent skeleton artifacts:
borderThickness = 1
borderColor = (0, 0, 0)
grayscaleImage = cv2.copyMakeBorder(grayscaleImage, borderThickness, borderThickness, borderThickness, borderThickness,
cv2.BORDER_CONSTANT, None, borderColor)
# Compute the skeleton:
skeleton = cv2.ximgproc.thinning(grayscaleImage, None, 1)
This is the skeleton, free of artifacts:
Now, let's find the intersections. The approach is based on Mark Setchell's info on this post. The post mainly shows the method for finding end-points of a shape, but I extended it to also identify line intersections. The main idea is that the convolution yields a very specific value where patterns of black and white pixels are found in the input image. Refer to the post for the theory behind this idea, but here, we are looking for a value of 130:
# Threshold the image so that white pixels get a value of 10 and
# black pixels a value of 0:
_, binaryImage = cv2.threshold(skeleton, 128, 10, cv2.THRESH_BINARY)
# Set the intersections kernel:
h = np.array([[1, 1, 1],
[1, 10, 1],
[1, 1, 1]])
# Convolve the image with the kernel:
imgFiltered = cv2.filter2D(binaryImage, -1, h)
# Prepare the final mask of points:
(height, width) = binaryImage.shape
pointsMask = np.zeros((height, width, 1), np.uint8)
# Perform convolution and create points mask:
thresh = 130
# Locate the threshold in the filtered image:
pointsMask = np.where(imgFiltered == thresh, 255, 0)
# Convert and shape the image to a uint8 height x width x channels
# numpy array:
pointsMask = pointsMask.astype(np.uint8)
pointsMask = pointsMask.reshape(height, width, 1)
This is the pointsMask image:
I we apply some morphology we can join individual pixels into blobs. Here, a dilation will do:
# Set kernel (structuring element) size:
kernelSize = 7
# Set operation iterations:
opIterations = 3
# Get the structuring element:
morphKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernelSize, kernelSize))
# Perform Dilate:
pointsMask = cv2.morphologyEx(pointsMask, cv2.MORPH_DILATE, morphKernel, None, None, opIterations, cv2.BORDER_REFLECT101)
This is the result of applying the dilation:
Now, we can find the coordinates of the white pixels and compute their mean values (or centroids):
# Get the coordinates of the end-points:
(Y, X) = np.where(pointsMask == 255)
# Get the centroid:
y = int(np.mean(Y))
x = int(np.mean(X))
Let's draw a circle using these coordinates on the original image:
# Draw the intersection point:
# Set circle color:
color = (0, 0, 255)
# Draw Circle
cv2.circle(inputImageCopy, (x, y), 3, color, -1)
# Show Image
cv2.imshow("Intersections", inputImageCopy)
cv2.waitKey(0)
This is the final result:
Apologies as I'm very new to OpenCV and the world of image processing in general.
I'm using OpenCV in Python to detect contours/boxes in this image.
It almost manages to detect all contours, but for some odd reason it doesn't pick up the last row and column which are obvious contours. This image shows the bounding boxes for contours it manages to identify.
Not entirely sure why it's not able to easily pick up the remaining contours. I've researched similar questions but haven't found a suitable answer.
Here's my code.
import numpy as np
import cv2
import math
import matplotlib.pyplot as plt
#load image
img = cv2.imread(path)
#remove noise
img = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)
#convert to gray scale
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#make pixels darker
_, img = cv2.threshold(img, 240, 255, cv2.THRESH_TOZERO)
#thresholding the image to a binary image
thresh, img_bin = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
#inverting the image
img_bin = 255 - img_bin
# countcol(width) of kernel as 100th of total width
kernel_len = np.array(img).shape[1]//100
# Defining a vertical kernel to detect all vertical lines of image
ver_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_len))
# Defining a horizontal kernel to detect all horizontal lines of image
hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_len, 1))
# A kernel of 2x2
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
#Use vertical kernel to detect and save the vertical lines in a jpg
image_1 = cv2.erode(img_bin, ver_kernel, iterations = 3)
vertical_lines = cv2.dilate(image_1, np.ones((10, 4),np.uint8), iterations = 30)
vertical_lines = cv2.erode(vertical_lines, np.ones((10, 4),np.uint8), iterations = 29)
#Use horizontal kernel to detect and save the horizontal lines in a jpg
image_2 = cv2.erode(img_bin, np.ones((1, 5),np.uint8), iterations = 5)
horizontal_lines = cv2.dilate(image_2, np.ones((2, 40),np.uint8), iterations = 20)
horizontal_lines = cv2.erode(horizontal_lines, np.ones((2, 39),np.uint8), iterations = 19)
# Combine horizontal and vertical lines in a new third image, with both having same weight.
img_vh = cv2.addWeighted(vertical_lines, 0.5, horizontal_lines, 0.5, 0.0)
rows, cols = img_vh.shape
#shift image so the enhanced lines overlap with original image
M = np.float32([[1,0,-30],[0,1,-21]])
img_vh = cv2.warpAffine(img_vh ,M,(cols,rows))
#Eroding and thesholding the image
img_vh = cv2.erode(~img_vh, kernel, iterations = 2)
thresh, img_vh = cv2.threshold(img_vh, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
bitxor = cv2.bitwise_xor(img, img_vh)
bitnot = cv2.bitwise_not(bitxor)
#find contours
contours, _ = cv2.findContours(img_vh, cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
#create list empty list to append with contours less than a specified area
new_contours = []
for contour in contours:
if cv2.contourArea(contour) < 4000000:
new_contours.append(contour)
#get bounding boxes
bounding_boxes = [cv2.boundingRect(contour) for contour in new_contours]
#plot detected bounding boxes
img_og = cv2.imread(path)
for bounding_box in bounding_boxes:
x,y,w,h = bounding_box
img_plot = cv2.rectangle(img_og, (x, y), (x+w, y+h), (255, 0, 0) , 2)
plotting = plt.imshow(img_plot, cmap='gray')
plt.show()
Like #ypnos was suggesting, the dilation and erosion has most likely pushed the last line off the image in the "saving horizontal lines" section. So the image_vh wouldn't have the last row when it was being searched for contours. I tested (Note:1) this by viewing the image after each of your transformations.
Specifically, the number of iterations had been too much. You had used a reasonably sized kernel as it is. It gave perfect results with iterations = 2 on lines 43 and 44 of your code.
After modifying them to :
horizontal_lines = cv2.dilate(image_2, np.ones((2, 40), np.uint8), iterations=2)
horizontal_lines = cv2.erode(horizontal_lines, np.ones((2, 39), np.uint8), iterations=2)
the bounding box rectangles had shifted off the image a bit. That was fixed by changing line 51 of the code to:
M = np.float32([[1, 0, -30], [0, 1, -5]])
This was the result.
Note:
I test/debug using this function usually.
def test(image, title):
cv2.imshow(title, image)
cv2.waitKey(0)
cv2.destroyWindow(title)
The variable position and the handy waitkey calms me down.
I am trying to detect the outer boundary of the circular object in the images below:
I tried OpenCV's Hough Circle, but the code is not working for every image. I also tried to adjust parameters such as minRadius and maxRadius in Hough Circle but its not working on every image.
The aim is to detect the object from the image and crop it.
Expected output:
Source code:
import imutils
import cv2
import numpy as np
from matplotlib import pyplot as plt
image = cv2.imread("path to the image i have provided")
r = 600.0 / image.shape[1]
dim = (600, int(image.shape[0] * r))
resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
cv2.imwrite("path to were we want to save downscaled image", resized)
image = cv2.imread('path of downscaled image')
image1 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image2 = cv2.GaussianBlur(image1, (5, 5), 0)
edged = cv2.Canny(image2, 30, 150)
img = cv2.medianBlur(image2,5)
cimg = cv2.cvtColor(img,cv2.COLOR_GRAY2BGR)
circles = cv2.HoughCircles(edged,cv2.HOUGH_GRADIENT,1,20,
param1=50,param2=30,minRadius=200,maxRadius=280)
circles = np.uint16(np.around(circles))
max_circle = max(circles[0,:], key=lambda x:x[2])
# print(max_circle)
# # Create mask
height,width = image1.shape
mask = np.zeros((height,width), np.uint8)
for i in [max_circle]:
cv2.circle(mask,(i[0],i[1]),i[2],(255,255,255),thickness=-1)
masked_data = cv2.bitwise_and(image, image, mask=mask)
_,thresh = cv2.threshold(mask,1,255,cv2.THRESH_BINARY)
# Find Contour
contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[0]
x,y,w,h = cv2.boundingRect(contours[0])
# Crop masked_data
crop = masked_data[y:y+h,x:x+w]
#Code to close Window
cv2.imshow('OG',image)
cv2.imshow('Cropped ROI',crop)
cv2.imwrite("path to save roi image", crop)
cv2.waitKey(0)
cv2.destroyAllWindows()
Second Answer: an approach based on color segmentation.
While I was editing the question to improve it's readability and was inserting and resizing all the images from the link you shared to make it easier for everyone to visualize what you are trying to do, it occurred to me that this problem might be a better candidate for an approach based on segmentation by color:
This simpler (but clever) approach assumes that the reel appears pretty much in the same location and has more or less the same dimensions every time:
To discover the approximate color of the reel in the image, define a list of Regions of Interest (ROIs) to sample pixels from and determine the min and max color of that area in the HSV color space. The location and size of the ROI are values derived from the size of the image. In the images below, you can see the ROIs as draw as blue-ish rectangles:
Once the min and max HSV colors have been found, a threshold operation with cv2.inRange() can be executed to segment the reel:
Then, iterate though all the contours in the binary image and assume that the largest one represents the reel. Use this contour and draw it in a separate mask to be able to extract the pixels from original image:
At this stage, it is also possible to compute a bounding box for the contour and extract it's precise location to be able to perform a crop operation later and completely isolate the reel in the image:
This approach works for EVERY image shared on the question.
Source code:
import cv2
import numpy as np
import sys
# initialize global H, S, V values
min_global_h = 179
min_global_s = 255
min_global_v = 255
max_global_h = 0
max_global_s = 0
max_global_v = 0
# load input image from the cmd-line
filename = sys.argv[1]
img = cv2.imread(sys.argv[1])
if (img is None):
print('!!! Failed imread')
sys.exit(-1)
# create an auxiliary image for debugging purposes
dbg_img = img.copy()
# initiailize a list of Regions of Interest that need to be scanned to identify good HSV values to threhsold by color
w = img.shape[1]
h = img.shape[0]
roi_w = int(w * 0.10)
roi_h = int(h * 0.10)
roi_list = []
roi_list.append( (int(w*0.25), int(h*0.15), roi_w, roi_h) )
roi_list.append( (int(w*0.25), int(h*0.60), roi_w, roi_h) )
# convert image to HSV color space
hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# iterate through the ROIs to determine the min/max HSV color of the reel
for rect in roi_list:
x, y, w, h = rect
x2 = x + w
y2 = y + h
print('ROI rect=', rect)
cropped_hsv_img = hsv_img[y:y+h, x:x+w]
h, s, v = cv2.split(cropped_hsv_img)
min_h = np.min(h)
min_s = np.min(s)
min_v = np.min(v)
if (min_h < min_global_h):
min_global_h = min_h
if (min_s < min_global_s):
min_global_s = min_s
if (min_v < min_global_v):
min_global_v = min_v
max_h = np.max(h)
max_s = np.max(s)
max_v = np.max(v)
if (max_h > max_global_h):
max_global_h = max_h
if (max_s > max_global_s):
max_global_s = max_s
if (max_v > max_global_v):
max_global_v = max_v
# debug: draw ROI in original image
cv2.rectangle(dbg_img, (x, y), (x2, y2), (255,165,0), 4) # red
cv2.imshow('ROIs', cv2.resize(dbg_img, dsize=(0, 0), fx=0.5, fy=0.5))
#cv2.waitKey(0)
cv2.imwrite(filename[:-4] + '_rois.png', dbg_img)
# define min/max color for threshold
low_hsv = np.array([min_h, min_s, min_v])
max_hsv = np.array([max_h, max_s, max_v])
#print('low_hsv=', low_hsv)
#print('max_hsv=', max_hsv)
# threshold image by color
img_bin = cv2.inRange(hsv_img, low_hsv, max_hsv)
cv2.imshow('binary', cv2.resize(img_bin, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_binary.png', img_bin)
#cv2.imshow('img_bin', cv2.resize(img_bin, dsize=(0, 0), fx=0.5, fy=0.5))
#cv2.waitKey(0)
# create a mask to store the contour of the reel (hopefully)
mask = np.zeros((img_bin.shape[0], img_bin.shape[1]), np.uint8)
crop_x, crop_y, crop_w, crop_h = (0, 0, 0, 0)
# iterate throw all the contours in the binary image:
# assume that the first contour with an area larger than 100k belongs to the reel
contours, hierarchy = cv2.findContours(img_bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
area = cv2.contourArea(contours[contourIdx])
print('contourIdx=', contourIdx, 'area=', area)
# draw potential reel blob on the mask (in white)
if (area > 100000):
crop_x, crop_y, crop_w, crop_h = cv2.boundingRect(cnt)
centers, radius = cv2.minEnclosingCircle(cnt)
cv2.circle(mask, (int(centers[0]), int(centers[1])), int(radius), (255), -1) # fill with white
break
cv2.imshow('mask', cv2.resize(mask, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_mask.png', mask)
# copy just the reel area into its own image
reel_img = cv2.bitwise_and(img, img, mask=mask)
cv2.imshow('reel_img', cv2.resize(reel_img, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_reel.png', reel_img)
# crop the reel to a smaller image
if (crop_w != 0 and crop_h != 0):
cropped_reel_img = reel_img[crop_y:crop_y+crop_h, crop_x:crop_x+crop_w]
cv2.imshow('cropped_reel_img', cv2.resize(cropped_reel_img, dsize=(0, 0), fx=0.5, fy=0.5))
output_filename = filename[:-4] + '_crop.png'
cv2.imwrite(output_filename, cropped_reel_img)
cv2.waitKey(0)
First answer: an approach based on pre-processing the image and executing an adaptiveThreshold operation.
There might be other ways of solving this problem that are not based on Hough Circles. Here is the result of an approach that is not:
Preprocess the image! Decreasing the size of the image and executing a blur helps with segmentation:
The segmentation method uses a cv2.adaptiveThreshold() to create a binary image that preserves the most important objects: the center of the reel and the external edge of the reel. This is an important step since we are only interested in what exists between these two objects. However, life is not perfect and neither is this segmentation. The shadow of reel on the table became part of the binary objects detected. Also, the outer edge is not fully connected as you can see on the resulting image on the right (look at the top left of the circumference):
To join broken segments, a morphological operation can be executed:
Finally, the entire reel area can be exposed by iterating through the contours of the image above and discarding those whose area is larger than what is expected for a reel. The resulting binary image (on the left) can then be used as a mask to identify the reel location on the original image:
Keep in mind that I'm not trying to find an universal solution for your problem. I'm merely showing that there might be other solutions that don't depend on Hough Circles.
Also, this code might need some adjustments to work on a larger number of cases.
Source code:
import cv2
import numpy as np
import sys
img = cv2.imread("test_images/reel.jpg")
if (img is None):
print('!!! Failed imread')
sys.exit(-1)
# create output image
output_img = img.copy()
# 1. Preprocess the image: downscale to speed up processing and execute a blur
SCALE_FACTOR = 0.5
smaller_img = cv2.resize(img, dsize=(0, 0), fx=SCALE_FACTOR, fy=SCALE_FACTOR)
blur_img = cv2.medianBlur(smaller_img, 9)
cv2.imwrite('reel1_blur_img.png', blur_img)
# 2. Segment the image to identify the 2 most important contours: the center of the reel and the outter edge
gray_img = cv2.cvtColor(blur_img, cv2.COLOR_BGR2GRAY)
img_bin = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 19, 4)
cv2.imwrite('reel2_img_bin.png', img_bin)
green_mask = np.zeros((img_bin.shape[0], img_bin.shape[1]), np.uint8)
#green_mask = cv2.cvtColor(img_bin, cv2.COLOR_GRAY2RGB) # debug
contours, hierarchy = cv2.findContours(img_bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(contours[contourIdx])
#print('contourIdx=', contourIdx, 'w=', w, 'h=', h, 'area=', area)
# filter out tiny segments
if (area < 5000):
#cv2.fillPoly(green_mask, pts=[cnt], color=(0, 0, 255)) # red
continue
# draw green contour (filled)
#cv2.fillPoly(green_mask, pts=[cnt], color=(0, 255, 0)) # green
cv2.fillPoly(green_mask, pts=[cnt], color=(255)) # white
# debug:
#cv2.imshow('green_mask', green_mask)
#cv2.waitKey(0)
cv2.imshow('green_mask', green_mask)
cv2.imwrite('reel2_green_mask.png', green_mask)
# 3. Fix mask: join segments nearby
kernel = np.ones((3,3), np.uint8)
img_dilation = cv2.dilate(green_mask, kernel, iterations=1)
green_mask = cv2.erode(img_dilation, kernel, iterations=1)
cv2.imshow('fixed green_mask', green_mask)
cv2.imwrite('reel3_img.png', green_mask)
# 4. Extract the reel area from the green mask
reel_mask = np.zeros((green_mask.shape[0], green_mask.shape[1]), np.uint8)
#reel_mask = cv2.cvtColor(green_mask, cv2.COLOR_GRAY2RGB) # debug
contours, hierarchy = cv2.findContours(green_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(contours[contourIdx])
print('contourIdx=', contourIdx, 'w=', w, 'h=', h, 'area=', area)
# filter out smaller segments
if (area > 110000):
#cv2.fillPoly(reel_mask, pts=[cnt], color=(0, 0, 255)) # red
continue
# draw green contour (filled)
#cv2.fillPoly(reel_mask, pts=[cnt], color=(0, 255, 0)) # green
cv2.fillPoly(reel_mask, pts=[cnt], color=(255)) # white
# debug:
#cv2.imshow('reel_mask', reel_mask)
#cv2.waitKey(0)
cv2.imshow('reel_mask', reel_mask)
cv2.imwrite('reel4_reel_mask.png', reel_mask)
# 5. Draw the reel area on the original image
contours, hierarchy = cv2.findContours(reel_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
centers, radius = cv2.minEnclosingCircle(cnt)
# rescale these values back to the original image size
centers_orig = (centers[0] // SCALE_FACTOR, centers[1] // SCALE_FACTOR)
radius_orig = radius // SCALE_FACTOR
print('centers=', centers_orig, 'radius=', radius_orig)
cv2.circle(output_img, (int(centers_orig[0]), int(centers_orig[1])), int(radius_orig), (128,0,255), 5) # magenta
cv2.imshow('output_img', output_img)
cv2.imwrite('reel5_output.png', output_img)
# display just the pixels from the original image
larger_reel_mask = cv2.resize(reel_mask, (int(img.shape[1]), int(img.shape[0])))
output_reel_img = cv2.bitwise_and(img, img, mask=larger_reel_mask)
cv2.imshow('output_reel_img', output_reel_img)
cv2.imwrite('reel5_output_reel.png', output_reel_img)
cv2.waitKey(0)
At this point, its possible to use larger_reel_maskand compute a minimal enclosing circle, draw it over this mask to make it a little bit more round and allow us to retrieve the area of the reel more accurately:
But the 4 lines of code that achieve this improvement I leave as an exercise for the reader.
I have an input image of a paragraph of text in single line spacing. I'm trying to implement something like the line spacing option to increase/decrease space between text lines in Microsoft Word. The current image is in single space, how can I convert the text into double space? Or say .5 space? Essentially I'm trying to dynamically restructure the spacing between text lines, preferably with an adjustable parameter. Something like this:
Input image
Desired result
My current attempt looks like this. I've been able to increase the spacing slightly but the text detail seems to be eroded and there is random noise in between lines.
Any ideas on how to improve the code or any better approaches?
import numpy as np
import cv2
img = cv2.imread('text.png')
H, W = img.shape[:2]
grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshed = cv2.threshold(grey, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
hist = cv2.reduce(threshed, 1, cv2.REDUCE_AVG).reshape(-1)
spacing = 2
delimeter = [y for y in range(H - 1) if hist[y] <= spacing < hist[y + 1]]
arr = []
y_prev, y_curr = 0, 0
for y in delimeter:
y_prev = y_curr
y_curr = y
arr.append(threshed[y_prev:y_curr, 0:W])
arr.append(threshed[y_curr:H, 0:W])
space_array = np.zeros((10, W))
result = np.zeros((1, W))
for im in arr:
v = np.concatenate((space_array, im), axis=0)
result = np.concatenate((result, v), axis=0)
result = (255 - result).astype(np.uint8)
cv2.imshow('result', result)
cv2.waitKey()
Approach #1: Pixel analysis
Obtain binary image. Load the image, convert to grayscale, and Otsu's threshold
Sum row pixels. The idea is that the pixel sum of a row can be used to determine if it corresponds to text or white space
Create new image and add additional white space. We iterate through the pixel array and add additional white space
Binary image
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.png')
h, w = image.shape[:2]
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
Now we iterate through each row and sum the white pixels to generate a pixel array.
We can profile a column of data generated from the sum of all the pixels in each row to determine which rows correspond to text. Sections of the data that equal 0 represents rows of the image that are composed of white space. Here's a visualization of the data array:
# Sum white pixels in each row
# Create blank space array and and final image
pixels = np.sum(thresh, axis=1).tolist()
space = np.ones((2, w), dtype=np.uint8) * 255
result = np.zeros((1, w), dtype=np.uint8)
We convert the data to a list and iterate through the data to build the final image. If a row is determined to be white space then we concatenate an empty space array to the final image. By adjusting the size of the empty array, we can change the amount of space to add to the image.
# Iterate through each row and add space if entire row is empty
# otherwise add original section of image to final image
for index, value in enumerate(pixels):
if value == 0:
result = np.concatenate((result, space), axis=0)
row = gray[index:index+1, 0:w]
result = np.concatenate((result, row), axis=0)
Here's the result
Code
import cv2
import numpy as np
import matplotlib.pyplot as plt
# import pandas as pd
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.png')
h, w = image.shape[:2]
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Sum white pixels in each row
# Create blank space array and and final image
pixels = np.sum(thresh, axis=1).tolist()
space = np.ones((1, w), dtype=np.uint8) * 255
result = np.zeros((0, w), dtype=np.uint8)
# Iterate through each row and add space if entire row is empty
# otherwise add original section of image to final image
for index, value in enumerate(pixels):
if value == 0:
result = np.concatenate((result, space), axis=0)
row = gray[index:index+1, 0:w]
result = np.concatenate((result, row), axis=0)
# Uncomment for plot visualization
'''
x = range(len(pixels))[::-1]
df = pd.DataFrame({'y': x, 'x': pixels})
df.plot(x='x', y='y', xlim=(-2000,max(pixels) + 2000), legend=None, color='teal')
'''
cv2.imshow('result', result)
cv2.imshow('thresh', thresh)
plt.show()
cv2.waitKey()
Approach #2: Individual line extraction
For a more dynamic approach, we can find the contours of each line and then add space in between each contour. We use the same method of appending extra white space as the 1st approach.
Obtain binary image. Load image, grayscale, Gaussian blur, and Otsu's threshold
Connect text contours. We create a horizontal shaped kernel and dilate to connect the words of each line into a single contour
Extract each line contour. We find contours, sort from top-to-bottom using imtuils.contours.sort_contours() and extract each line ROI
Append white space in between each line. We create a empty array and build the new image by appending white space between each line contour
Binary image
# Load image, grayscale, blur, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
invert = 255 - thresh
height, width = image.shape[:2]
Create horizontal kernel and dilate
# Dilate with a horizontal kernel to connect text contours
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10,2))
dilate = cv2.dilate(thresh, kernel, iterations=2)
Extracted individual line contour highlighted in green
# Extract each line contour
lines = []
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="top-to-bottom")
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (0, y), (width, y+h), (36,255,12), 2)
line = original[y:y+h, 0:width]
line = cv2.cvtColor(line, cv2.COLOR_BGR2GRAY)
lines.append(line)
Append white space in between each line. Here's the result with a 1 pixel wide space array
Result with a 5 pixel wide space array
# Append white space in between each line
space = np.ones((1, width), dtype=np.uint8) * 255
result = np.zeros((0, width), dtype=np.uint8)
result = np.concatenate((result, space), axis=0)
for line in lines:
result = np.concatenate((result, line), axis=0)
result = np.concatenate((result, space), axis=0)
Full code
import cv2
import numpy as np
from imutils import contours
# Load image, grayscale, blur, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
invert = 255 - thresh
height, width = image.shape[:2]
# Dilate with a horizontal kernel to connect text contours
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10,2))
dilate = cv2.dilate(thresh, kernel, iterations=2)
# Extract each line contour
lines = []
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="top-to-bottom")
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (0, y), (width, y+h), (36,255,12), 2)
line = original[y:y+h, 0:width]
line = cv2.cvtColor(line, cv2.COLOR_BGR2GRAY)
lines.append(line)
# Append white space in between each line
space = np.ones((1, width), dtype=np.uint8) * 255
result = np.zeros((0, width), dtype=np.uint8)
result = np.concatenate((result, space), axis=0)
for line in lines:
result = np.concatenate((result, line), axis=0)
result = np.concatenate((result, space), axis=0)
cv2.imshow('result', result)
cv2.imshow('image', image)
cv2.imshow('dilate', dilate)
cv2.waitKey()