How to extract only outer contours from an image (OpenCV) - python

I am trying to extract digits from the below image using simple OpenCV contours approach, but I am getting overlapping bounding boxes over contours
cv2.RETR_EXTERNAL should return only outer contours in the hierarchy but it’s not working as can be seen from the below output
Code:
from matplotlib import pyplot as plt
import cv2
img = cv2.imread('image.png', 0)
_, contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
imgRGB = cv2.cvtColor(img.copy(), cv2.COLOR_GRAY2RGB)
for c in contours:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(imgRGB, (x, y), (x+w, y+h), (0,255,0), 2)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(imgRGB, cmap='gray')
Requirements:
opencv-python==3.4.5.20
matplotlib==3.1.2

You need to blur then apply a threshold before finding contours. You need to do this because if you find contours directly on the grayscale image, there are tiny particles which are picked up as contours. Here's a simple process:
Load image, grayscale, Gaussian blur, Otsu's threshold
Find contours and sort using imutils.contours.sort_contours() with the left-to-right parameter
Obtain bounding box then extract ROI using Numpy slicing
Here's the detected bounding boxes highlighted in green
Extracted/saved ROIs
Code
import cv2
from imutils import contours
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="left-to-right")
num = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 1)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(num), ROI)
num += 1
cv2.imshow('image', image)
cv2.waitKey()

Related

How to split an image using edge detection in python

My images are stored in an numpy array I want to split the images into individual images containing single character.
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold, dilate
image = arr #numpy_arr_containing 200X200 image
original = image.copy()
gray = image
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,15))
dilate = cv2.dilate(thresh, kernel, iterations=2)
# Find contours, obtain bounding box coordinates, and extract ROI
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
image_number = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 3)
ROI = original[y:y+h, x:x+w]
cv2.imwrite("ROI_{}.png".format(image_number), ROI)
image_number += 1
# cv2.imshow('image', image)
# cv2.imshow('thresh', thresh)
# cv2.imshow('dilate', dilate)
# cv2.waitKey()

How to detect diagram region and extract(crop) it from a research paper's Image

I want to extract all the figures/diagrams from this research paper image. How can I do that?
I just want the figure part is there any way I could crop out that part?
To extract only the figures/diagram here's an approach using OpenCV:
Obtain binary image. Load the image, convert to grayscale and Otsu's threshold to get a binary image.
Connect text contours. We take advantage of the observation that text is structured in paragraphs so we can dilate with a horizontal contour to connect individual words into a single contour.
Remove non-diagram contours. We find contours and filter using aspect ration and contour area. We effectively remove non-diagram contours by filling in the contour.
Form a single bounding box. Iterate through remaining contours and determine the bounding box coordinates
Extract ROI. Crop/Extract the diagram using Numpy slicing.
Here's a visualization of each step:
Threshold image
Dilate with horizontal kernel
Filtering to remove non-diagram contours
Detected diagram bounding box
Extracted ROI
Note: This approach is with the assumption that there is only one diagram in the image. If there are multiple, then you can remove step #4 to get multiple ROIs and save each individual ROI as a separate image. I'm sure it would be an easy change :)
Code
import cv2
import numpy as np
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Dilate with horizontal kernel
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20,10))
dilate = cv2.dilate(thresh, kernel, iterations=2)
# Find contours and remove non-diagram contours
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
area = cv2.contourArea(c)
if w/h > 2 and area > 10000:
cv2.drawContours(dilate, [c], -1, (0,0,0), -1)
# Iterate through diagram contours and form single bounding box
boxes = []
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x, y, w, h = cv2.boundingRect(c)
boxes.append([x,y, x+w,y+h])
boxes = np.asarray(boxes)
x = np.min(boxes[:,0])
y = np.min(boxes[:,1])
w = np.max(boxes[:,2]) - x
h = np.max(boxes[:,3]) - y
# Extract ROI
cv2.rectangle(image, (x,y), (x + w,y + h), (36,255,12), 3)
ROI = original[y:y+h, x:x+w]
cv2.imshow('image', image)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.imshow('ROI', ROI)
cv2.waitKey()

How to detect paragraphs in a text document image for a non-consistent text structure in Python OpenCV

I am trying to identify paragraphs of text in a .pdf document by first converting it into an image then using OpenCV. But I am getting bounding boxes on lines of text instead of paragraphs. How can I set some threshold or some other limit to get paragraphs instead of lines?
Here is the sample input image:
Here is the output I am getting for the above sample:
I am trying to get a single bounding box on the paragraph in the middle. I am using this code.
import cv2
import numpy as np
large = cv2.imread('sample image.png')
rgb = cv2.pyrDown(large)
small = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
# kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
kernel = np.ones((5, 5), np.uint8)
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
# using RETR_EXTERNAL instead of RETR_CCOMP
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#For opencv 3+ comment the previous line and uncomment the following line
#_, contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
mask = np.zeros(bw.shape, dtype=np.uint8)
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y+h, x:x+w] = 0
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
if r > 0.45 and w > 8 and h > 8:
cv2.rectangle(rgb, (x, y), (x+w-1, y+h-1), (0, 255, 0), 2)
cv2.imshow('rects', rgb)
cv2.waitKey(0)
This is a classic situation for dilate. Whenever you want to connect multiple items together, you can dilate them to join adjacent contours into a single contour. Here's a simple approach:
Obtain binary image. Load the image, convert to grayscale, Gaussian blur, then Otsu's threshold to obtain a binary image.
Connect adjacent words together. We create a rectangular kernel and dilate to merge individual contours together.
Detect paragraphs. From here we find contours, obtain the rectangular bounding rectangle coordinates and highlight the rectangular contours.
Otsu's threshold to obtain a binary image
Here's where the magic happens. We can assume that a paragraph is a section of words that are close together, to achieve this we dilate to connect adjacent words
Result
import cv2
import numpy as np
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Create rectangular structuring element and dilate
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations=4)
# Find contours and draw rectangle
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.imshow('image', image)
cv2.waitKey()

How to extract multiple objects from an image using Python OpenCV?

I am trying to extract object from an image using the color using OpenCV, I have tried by inverse thresholding and grayscale combined with cv2.findContours() but I am unable to use it recursively. Furthermore I can't figure out how to "cut out" the match from the original image and save it to a single file.
EDIT
~
import cv2
import numpy as np
# load the images
empty = cv2.imread("empty.jpg")
full = cv2.imread("test.jpg")
# save color copy for visualization
full_c = full.copy()
# convert to grayscale
empty_g = cv2.cvtColor(empty, cv2.COLOR_BGR2GRAY)
full_g = cv2.cvtColor(full, cv2.COLOR_BGR2GRAY)
empty_g = cv2.GaussianBlur(empty_g, (51, 51), 0)
full_g = cv2.GaussianBlur(full_g, (51, 51), 0)
diff = full_g - empty_g
# thresholding
diff_th =
cv2.adaptiveThreshold(full_g,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,11,2)
# combine the difference image and the inverse threshold
zone = cv2.bitwise_and(diff, diff_th, None)
# threshold to get the mask instead of gray pixels
_, zone = cv2.threshold(bag, 100, 255, 0)
# dilate to account for the blurring in the beginning
kernel = np.ones((15, 15), np.uint8)
bag = cv2.dilate(bag, kernel, iterations=1)
# find contours, sort and draw the biggest one
contours, _ = cv2.findContours(bag, cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:3]
i = 0
while i < len(contours):
x, y, width, height = cv2.boundingRect(contours[i])
roi = full_c[y:y+height, x:x+width]
cv2.imwrite("piece"+str(i)+".png", roi)
i += 1
Where empty is just a white image size 1500 * 1000 as the one above and test is the one above.
This is what I came up with, only downside, I have a third image instead of only the 2 expected showing a shadow zone now...
Here's a simple approach:
Obtain binary image. Load the image, grayscale, Gaussian blur, Otsu's threshold, then dilate to obtain a binary black/white image.
Extract ROI. Find contours, obtain bounding boxes, extract ROI using Numpy slicing, and save each ROI
Binary image (Otsu's thresholding + dilation)
Detected ROIs highlighted in green
To extract each ROI, you can find the bounding box coordinates using cv2.boundingRect(), crop the desired region, then save the image
x,y,w,h = cv2.boundingRect(c)
ROI = original[y:y+h, x:x+w]
First object
Second object
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold, dilate
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
dilate = cv2.dilate(thresh, kernel, iterations=1)
# Find contours, obtain bounding box coordinates, and extract ROI
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
image_number = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
cv2.imwrite("ROI_{}.png".format(image_number), ROI)
image_number += 1
cv2.imshow('image', image)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.waitKey()

Extract all bounding boxes using OpenCV Python

I have an image that contains more than one bounding box.
I need to extract everything that has bounding boxes in them. So far, from this site I've gotten this answer:
y = img[by:by+bh, bx:bx+bw]
cv2.imwrite(string + '.png', y)
It works, however, it only gets one. How should I modify the code? I tried putting it in the loop for contours but it still spews out one image instead of multiple ones.
Thank you so much in advance.
there you go:
import cv2
im = cv2.imread('c:/data/ph.jpg')
gray=cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
contours, hierarchy = cv2.findContours(gray,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[-2:]
idx =0
for cnt in contours:
idx += 1
x,y,w,h = cv2.boundingRect(cnt)
roi=im[y:y+h,x:x+w]
cv2.imwrite(str(idx) + '.jpg', roi)
#cv2.rectangle(im,(x,y),(x+w,y+h),(200,0,0),2)
cv2.imshow('img',im)
cv2.waitKey(0)
A simple approach is to find contours, obtain the bounding rectangle coordinates using cv2.boundingRect()
then extract the ROI using Numpy slicing. We can keep a counter to save each ROI then save it with cv2.imwrite(). Here's a working example:
Input image:
Detected ROIs to extract highlighted in green
Saved ROIs
Code
import cv2
import numpy as np
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours, obtain bounding box, extract and save ROI
ROI_number = 0
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
ROI_number += 1
cv2.imshow('image', image)
cv2.waitKey()

Categories

Resources