My images are stored in an numpy array I want to split the images into individual images containing single character.
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold, dilate
image = arr #numpy_arr_containing 200X200 image
original = image.copy()
gray = image
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,15))
dilate = cv2.dilate(thresh, kernel, iterations=2)
# Find contours, obtain bounding box coordinates, and extract ROI
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
image_number = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 3)
ROI = original[y:y+h, x:x+w]
cv2.imwrite("ROI_{}.png".format(image_number), ROI)
image_number += 1
# cv2.imshow('image', image)
# cv2.imshow('thresh', thresh)
# cv2.imshow('dilate', dilate)
# cv2.waitKey()
Related
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
close = 255 - cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
cv2.imshow('thresh', thresh)
I am assuming you wish to have only the middle part of the image and remove everything else from the image. One simple way is to search for contours, select the bounding box of the biggest contour and draw it out on a newly created mask.
Example code:
import cv2
import numpy as np
img = cv2.imread("1.png")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU+cv2.THRESH_BINARY_INV)[1]
contours = cv2.findContours(thresh, cv2.CHAIN_APPROX_NONE, cv2.RETR_TREE)[0]
cnt = max(contours, key=lambda c: cv2.contourArea(c))
mask = np.ones((img.shape[:2]), np.uint8)*255
x, y, w, h = cv2.boundingRect(cnt)
mask[y:y+h, x:x+w] = gray[y:y+h, x:x+w]
cv2.imwrite("mask.png", mask)
cv2.imshow("mask", mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
EDIT:
This is how I would try to make it based on your input image
import cv2
import numpy as np
img = cv2.imread('1.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3, 3), 0)
thresh = cv2.threshold(
blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
contours = cv2.findContours(thresh, cv2.CHAIN_APPROX_NONE, cv2.RETR_TREE)[0]
cnt = max(contours, key=lambda c: cv2.contourArea(c))
mask = np.ones((img.shape[:2]), np.uint8)*255
mask2 = np.zeros((img.shape[:2]), dtype=np.uint8)
cv2.drawContours(mask, [cnt], -1, (0, 0, 0), -1)
x, y, w, h = cv2.boundingRect(cnt)
mask2[y:y+h, x:x+w] = gray[y:y+h, x:x+w]
cv2.imshow('mask2', mask2)
cv2.imshow('mask', mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
I am trying to locate the X&Y of all horizontal lines in a PDF document.
I was using the code here:
code to detect horizontal lines
This code marks the horizontal lines verywell but I am not able to extract their coordinates in the document.
This is my code:
def DetectLine(pageNum):
# Convert to grayscale and adaptive threshold to obtain a binary image
img = cv2.imread(outpath + 'page_' + str(pageNum) + '.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Then we create a kernel and perform morphological transformations to isolate horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
# detect lines find contours and draw the result
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(img, [c], -1, (36,255,12), 3)
cv2.imshow('image_' + str(pageNum), img)
This function gets the pagenumber and reads a pre-prepared JPG of the specific page.
How can I return the Xs & Ys?
If only need the points:
you can extract it with:
Point 1: c[0][0] or cnts[num]c[0][0]
Point 2: c[1][0] or cnts[num]c[1][0]
where num is the index of the contour
Middle point
The solution will be:
(cnts[0][1][0][0]+cnts[0][0][0][0])//2,cnts[0][0][0][1]
Since each line or countour for get has two points, you can calculate the middle point with the average formula.
e.g:
x1=10 and x2=90, the middle point then is (10+90)/2
Here is the complete code:
import cv2
image = cv2.imread('2.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y=(c[1][0][0]+c[0][0][0])//2,c[0][0][1]
print(f'The middle point is: x: {x}, y: {y}')
cv2.drawContours(image, [c], -1, (36,255,12), 3)
cv2.circle(image, (x,y), radius=5, color=(0, 0, 255), thickness=-1)
cv2.imshow('thresh', thresh)
cv2.imshow('detected_lines', detected_lines)
cv2.imshow('image', image)
cv2.waitKey()
The result image is the following:
I am trying to extract digits from the below image using simple OpenCV contours approach, but I am getting overlapping bounding boxes over contours
cv2.RETR_EXTERNAL should return only outer contours in the hierarchy but it’s not working as can be seen from the below output
Code:
from matplotlib import pyplot as plt
import cv2
img = cv2.imread('image.png', 0)
_, contours, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
imgRGB = cv2.cvtColor(img.copy(), cv2.COLOR_GRAY2RGB)
for c in contours:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(imgRGB, (x, y), (x+w, y+h), (0,255,0), 2)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.imshow(imgRGB, cmap='gray')
Requirements:
opencv-python==3.4.5.20
matplotlib==3.1.2
You need to blur then apply a threshold before finding contours. You need to do this because if you find contours directly on the grayscale image, there are tiny particles which are picked up as contours. Here's a simple process:
Load image, grayscale, Gaussian blur, Otsu's threshold
Find contours and sort using imutils.contours.sort_contours() with the left-to-right parameter
Obtain bounding box then extract ROI using Numpy slicing
Here's the detected bounding boxes highlighted in green
Extracted/saved ROIs
Code
import cv2
from imutils import contours
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
(cnts, _) = contours.sort_contours(cnts, method="left-to-right")
num = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 1)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(num), ROI)
num += 1
cv2.imshow('image', image)
cv2.waitKey()
I am trying to identify paragraphs of text in a .pdf document by first converting it into an image then using OpenCV. But I am getting bounding boxes on lines of text instead of paragraphs. How can I set some threshold or some other limit to get paragraphs instead of lines?
Here is the sample input image:
Here is the output I am getting for the above sample:
I am trying to get a single bounding box on the paragraph in the middle. I am using this code.
import cv2
import numpy as np
large = cv2.imread('sample image.png')
rgb = cv2.pyrDown(large)
small = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
# kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
kernel = np.ones((5, 5), np.uint8)
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
# using RETR_EXTERNAL instead of RETR_CCOMP
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#For opencv 3+ comment the previous line and uncomment the following line
#_, contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
mask = np.zeros(bw.shape, dtype=np.uint8)
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y+h, x:x+w] = 0
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
if r > 0.45 and w > 8 and h > 8:
cv2.rectangle(rgb, (x, y), (x+w-1, y+h-1), (0, 255, 0), 2)
cv2.imshow('rects', rgb)
cv2.waitKey(0)
This is a classic situation for dilate. Whenever you want to connect multiple items together, you can dilate them to join adjacent contours into a single contour. Here's a simple approach:
Obtain binary image. Load the image, convert to grayscale, Gaussian blur, then Otsu's threshold to obtain a binary image.
Connect adjacent words together. We create a rectangular kernel and dilate to merge individual contours together.
Detect paragraphs. From here we find contours, obtain the rectangular bounding rectangle coordinates and highlight the rectangular contours.
Otsu's threshold to obtain a binary image
Here's where the magic happens. We can assume that a paragraph is a section of words that are close together, to achieve this we dilate to connect adjacent words
Result
import cv2
import numpy as np
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Create rectangular structuring element and dilate
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations=4)
# Find contours and draw rectangle
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.imshow('image', image)
cv2.waitKey()
I am trying to extract object from an image using the color using OpenCV, I have tried by inverse thresholding and grayscale combined with cv2.findContours() but I am unable to use it recursively. Furthermore I can't figure out how to "cut out" the match from the original image and save it to a single file.
EDIT
~
import cv2
import numpy as np
# load the images
empty = cv2.imread("empty.jpg")
full = cv2.imread("test.jpg")
# save color copy for visualization
full_c = full.copy()
# convert to grayscale
empty_g = cv2.cvtColor(empty, cv2.COLOR_BGR2GRAY)
full_g = cv2.cvtColor(full, cv2.COLOR_BGR2GRAY)
empty_g = cv2.GaussianBlur(empty_g, (51, 51), 0)
full_g = cv2.GaussianBlur(full_g, (51, 51), 0)
diff = full_g - empty_g
# thresholding
diff_th =
cv2.adaptiveThreshold(full_g,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,11,2)
# combine the difference image and the inverse threshold
zone = cv2.bitwise_and(diff, diff_th, None)
# threshold to get the mask instead of gray pixels
_, zone = cv2.threshold(bag, 100, 255, 0)
# dilate to account for the blurring in the beginning
kernel = np.ones((15, 15), np.uint8)
bag = cv2.dilate(bag, kernel, iterations=1)
# find contours, sort and draw the biggest one
contours, _ = cv2.findContours(bag, cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:3]
i = 0
while i < len(contours):
x, y, width, height = cv2.boundingRect(contours[i])
roi = full_c[y:y+height, x:x+width]
cv2.imwrite("piece"+str(i)+".png", roi)
i += 1
Where empty is just a white image size 1500 * 1000 as the one above and test is the one above.
This is what I came up with, only downside, I have a third image instead of only the 2 expected showing a shadow zone now...
Here's a simple approach:
Obtain binary image. Load the image, grayscale, Gaussian blur, Otsu's threshold, then dilate to obtain a binary black/white image.
Extract ROI. Find contours, obtain bounding boxes, extract ROI using Numpy slicing, and save each ROI
Binary image (Otsu's thresholding + dilation)
Detected ROIs highlighted in green
To extract each ROI, you can find the bounding box coordinates using cv2.boundingRect(), crop the desired region, then save the image
x,y,w,h = cv2.boundingRect(c)
ROI = original[y:y+h, x:x+w]
First object
Second object
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold, dilate
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
dilate = cv2.dilate(thresh, kernel, iterations=1)
# Find contours, obtain bounding box coordinates, and extract ROI
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
image_number = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
cv2.imwrite("ROI_{}.png".format(image_number), ROI)
image_number += 1
cv2.imshow('image', image)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.waitKey()