Extract all bounding boxes using OpenCV Python - python

I have an image that contains more than one bounding box.
I need to extract everything that has bounding boxes in them. So far, from this site I've gotten this answer:
y = img[by:by+bh, bx:bx+bw]
cv2.imwrite(string + '.png', y)
It works, however, it only gets one. How should I modify the code? I tried putting it in the loop for contours but it still spews out one image instead of multiple ones.
Thank you so much in advance.

there you go:
import cv2
im = cv2.imread('c:/data/ph.jpg')
gray=cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
contours, hierarchy = cv2.findContours(gray,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[-2:]
idx =0
for cnt in contours:
idx += 1
x,y,w,h = cv2.boundingRect(cnt)
roi=im[y:y+h,x:x+w]
cv2.imwrite(str(idx) + '.jpg', roi)
#cv2.rectangle(im,(x,y),(x+w,y+h),(200,0,0),2)
cv2.imshow('img',im)
cv2.waitKey(0)

A simple approach is to find contours, obtain the bounding rectangle coordinates using cv2.boundingRect()
then extract the ROI using Numpy slicing. We can keep a counter to save each ROI then save it with cv2.imwrite(). Here's a working example:
Input image:
Detected ROIs to extract highlighted in green
Saved ROIs
Code
import cv2
import numpy as np
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours, obtain bounding box, extract and save ROI
ROI_number = 0
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
ROI_number += 1
cv2.imshow('image', image)
cv2.waitKey()

Related

Extracting and saving characters from an image

I am following up from this post: How to extract only characters from image?
This solution works perfectly for me (with some tweaking) for its desired purpose. However, I am attempting to take it 1 step further by saving each of the characters. So in the example of this post, I wish to save the characters K, N, and M as their own individual images. I attempted iterating through the nested if loop with a cv2.imwrite function with the rect object although the final output is 7 images containing the overall image with just an additional rectangle to highlight the next contour each time.
Here's a simple approach:
Obtain binary image. Load image, grayscale, Otsu's threshold
Extract ROIs. Find contours
and sort from left-to-right to ensure we have the contours in the correct order with imutils.contours.sort_contours. We filter using contour area then extract and save each ROI using Numpy slicing.
Input
Binary image
Detected characters highlighted in green
Extracted ROIs
Code
import cv2
from imutils import contours
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU + cv2.THRESH_BINARY_INV)[1]
# Find contours, sort from left-to-right, then crop
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts, _ = contours.sort_contours(cnts, method="left-to-right")
# Filter using contour area and extract ROI
ROI_number = 0
for c in cnts:
area = cv2.contourArea(c)
if area > 10:
x,y,w,h = cv2.boundingRect(c)
ROI = image[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI_number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.waitKey()

Recognize single characters on a page with Tesseract

this image returns empty string;
basically I am trying to make a bot for WOW game, but I am really new to this OCR thing. I cannot make tesseract to read this image; I want an unordered list of characters and if possible coordinates of each square containing them. Is there anyway to do this?
Thank you for your time!
here is my code:
from PIL import Image
import cv2
from pytesseract import image_to_string
column = Image.open('photo.png')
gray = column.convert('L')
blackwhite = gray.point(lambda x: 255 if x < 200 else 0, '1')
blackwhite.save("code_bw.jpg")
print(image_to_string(cv2.imread("code_bw.jpg")))
You need to do some preprocessing to isolate the text characters. A simple approach is to Otsu's threshold to obtain a binary image then we can find contours and filter using aspect ratio + contour area. This will give us the bounding box coordinates of the text where we can draw this onto a mask. We bitwise-and the mask with the input image to get our cleaned image then throw it into OCR. Here's the result:
Detected text characters
Result
Result from OCR
A
A R
P
Code
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.jpg')
original = image.copy()
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours and filter using aspect ratio and area
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
x,y,w,h = cv2.boundingRect(c)
ar = w / float(h)
if area > 1000 and ar > .85 and ar < 1.2:
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
cv2.rectangle(mask, (x, y), (x + w, y + h), (255,255,255), -1)
ROI = original[y:y+h, x:x+w]
# Bitwise-and to isolate characters
result = cv2.bitwise_and(original, mask)
result[mask==0] = 255
# OCR
data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')
print(data)
cv2.imshow('image', image)
cv2.imshow('thresh', thresh)
cv2.imshow('result', result)
cv2.waitKey()

How to extract only outer contours from an image (OpenCV)

I am trying to extract digits from the below image using simple OpenCV contours approach, but I am getting overlapping bounding boxes over contours
cv2.RETR_EXTERNAL should return only outer contours in the hierarchy but it’s not working as can be seen from the below output
Code:
from matplotlib import pyplot as plt
import cv2
img = cv2.imread('image.png', 0)
_, contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
imgRGB = cv2.cvtColor(img.copy(), cv2.COLOR_GRAY2RGB)
for c in contours:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(imgRGB, (x, y), (x+w, y+h), (0,255,0), 2)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(imgRGB, cmap='gray')
Requirements:
opencv-python==3.4.5.20
matplotlib==3.1.2
You need to blur then apply a threshold before finding contours. You need to do this because if you find contours directly on the grayscale image, there are tiny particles which are picked up as contours. Here's a simple process:
Load image, grayscale, Gaussian blur, Otsu's threshold
Find contours and sort using imutils.contours.sort_contours() with the left-to-right parameter
Obtain bounding box then extract ROI using Numpy slicing
Here's the detected bounding boxes highlighted in green
Extracted/saved ROIs
Code
import cv2
from imutils import contours
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="left-to-right")
num = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 1)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(num), ROI)
num += 1
cv2.imshow('image', image)
cv2.waitKey()

How to detect diagram region and extract(crop) it from a research paper's Image

I want to extract all the figures/diagrams from this research paper image. How can I do that?
I just want the figure part is there any way I could crop out that part?
To extract only the figures/diagram here's an approach using OpenCV:
Obtain binary image. Load the image, convert to grayscale and Otsu's threshold to get a binary image.
Connect text contours. We take advantage of the observation that text is structured in paragraphs so we can dilate with a horizontal contour to connect individual words into a single contour.
Remove non-diagram contours. We find contours and filter using aspect ration and contour area. We effectively remove non-diagram contours by filling in the contour.
Form a single bounding box. Iterate through remaining contours and determine the bounding box coordinates
Extract ROI. Crop/Extract the diagram using Numpy slicing.
Here's a visualization of each step:
Threshold image
Dilate with horizontal kernel
Filtering to remove non-diagram contours
Detected diagram bounding box
Extracted ROI
Note: This approach is with the assumption that there is only one diagram in the image. If there are multiple, then you can remove step #4 to get multiple ROIs and save each individual ROI as a separate image. I'm sure it would be an easy change :)
Code
import cv2
import numpy as np
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Dilate with horizontal kernel
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20,10))
dilate = cv2.dilate(thresh, kernel, iterations=2)
# Find contours and remove non-diagram contours
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
area = cv2.contourArea(c)
if w/h > 2 and area > 10000:
cv2.drawContours(dilate, [c], -1, (0,0,0), -1)
# Iterate through diagram contours and form single bounding box
boxes = []
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x, y, w, h = cv2.boundingRect(c)
boxes.append([x,y, x+w,y+h])
boxes = np.asarray(boxes)
x = np.min(boxes[:,0])
y = np.min(boxes[:,1])
w = np.max(boxes[:,2]) - x
h = np.max(boxes[:,3]) - y
# Extract ROI
cv2.rectangle(image, (x,y), (x + w,y + h), (36,255,12), 3)
ROI = original[y:y+h, x:x+w]
cv2.imshow('image', image)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.imshow('ROI', ROI)
cv2.waitKey()

How to extract multiple objects from an image using Python OpenCV?

I am trying to extract object from an image using the color using OpenCV, I have tried by inverse thresholding and grayscale combined with cv2.findContours() but I am unable to use it recursively. Furthermore I can't figure out how to "cut out" the match from the original image and save it to a single file.
EDIT
~
import cv2
import numpy as np
# load the images
empty = cv2.imread("empty.jpg")
full = cv2.imread("test.jpg")
# save color copy for visualization
full_c = full.copy()
# convert to grayscale
empty_g = cv2.cvtColor(empty, cv2.COLOR_BGR2GRAY)
full_g = cv2.cvtColor(full, cv2.COLOR_BGR2GRAY)
empty_g = cv2.GaussianBlur(empty_g, (51, 51), 0)
full_g = cv2.GaussianBlur(full_g, (51, 51), 0)
diff = full_g - empty_g
# thresholding
diff_th =
cv2.adaptiveThreshold(full_g,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,11,2)
# combine the difference image and the inverse threshold
zone = cv2.bitwise_and(diff, diff_th, None)
# threshold to get the mask instead of gray pixels
_, zone = cv2.threshold(bag, 100, 255, 0)
# dilate to account for the blurring in the beginning
kernel = np.ones((15, 15), np.uint8)
bag = cv2.dilate(bag, kernel, iterations=1)
# find contours, sort and draw the biggest one
contours, _ = cv2.findContours(bag, cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:3]
i = 0
while i < len(contours):
x, y, width, height = cv2.boundingRect(contours[i])
roi = full_c[y:y+height, x:x+width]
cv2.imwrite("piece"+str(i)+".png", roi)
i += 1
Where empty is just a white image size 1500 * 1000 as the one above and test is the one above.
This is what I came up with, only downside, I have a third image instead of only the 2 expected showing a shadow zone now...
Here's a simple approach:
Obtain binary image. Load the image, grayscale, Gaussian blur, Otsu's threshold, then dilate to obtain a binary black/white image.
Extract ROI. Find contours, obtain bounding boxes, extract ROI using Numpy slicing, and save each ROI
Binary image (Otsu's thresholding + dilation)
Detected ROIs highlighted in green
To extract each ROI, you can find the bounding box coordinates using cv2.boundingRect(), crop the desired region, then save the image
x,y,w,h = cv2.boundingRect(c)
ROI = original[y:y+h, x:x+w]
First object
Second object
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold, dilate
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
dilate = cv2.dilate(thresh, kernel, iterations=1)
# Find contours, obtain bounding box coordinates, and extract ROI
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
image_number = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
cv2.imwrite("ROI_{}.png".format(image_number), ROI)
image_number += 1
cv2.imshow('image', image)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.waitKey()

Categories

Resources