How to separate images using watershed algorithm in Python - python

How to separate indiviual images among multiple images after image segmentaion using watershed algorithm in Python
The attached image is consists of 4 images , from which we need to apply image segmentation and separate individual image from those 4 images

We will flood fill it first
import cv2;
import numpy as np;
# Read image
im_in = cv2.imread("2SNAT.jpg", cv2.IMREAD_GRAYSCALE);
# Threshold.
# Set values equal to or above 220 to 0.
# Set values below 220 to 255.
th, im_th = cv2.threshold(im_in, 220, 255, cv2.THRESH_BINARY_INV);
# Copy the thresholded image.
im_floodfill = im_th.copy()
# Mask used to flood filling.
# Notice the size needs to be 2 pixels than the image.
h, w = im_th.shape[:2]
mask = np.zeros((h+2, w+2), np.uint8)
# Floodfill from point (0, 0)
cv2.floodFill(im_floodfill, mask, (0,0), 255);
# Invert floodfilled image
im_floodfill_inv = cv2.bitwise_not(im_floodfill)
# Combine the two images to get the foreground.
im_out = im_th | im_floodfill_inv
Then find contour and crop out
im, contours, hierarchy = cv2.findContours(im_out.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
final_contours = []
for contour in contours:
area = cv2.contourArea(contour)
if area > 1000:
final_contours.append(contour)
Crop out step, also drawing rectangle on original image
counter = 0
for c in final_contours:
counter = counter + 1
# for c in [final_contours[0]]:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.01 * peri, True)
x,y,w,h = cv2.boundingRect(approx)
print(x, y, w, h)
aspect_ratio = w / float(h)
if (aspect_ratio >= 0.8 and aspect_ratio <= 4):
cv2.rectangle(im_in,(x,y),(x+w,y+h),(0,255,0),2)
cv2.imwrite('splitted_{}.jpg'.format(counter), im_in[y:y+h, x:x+w])
cv2.imwrite('rectangled_split.jpg', im_in)

Instead of using watershed, here's a simple approach using thresholding + morphological operations. The idea is to obtain a binary image then perform morph close to combine each object as a single contour. We then find contours and extract/save each ROI using Numpy slicing.
Here's each individual object highlighted in green
Individual saved object
Code
import cv2
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph close
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
# Find contours and extract ROI
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
num = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(num), ROI)
num += 1
cv2.imshow('image', image)
cv2.waitKey()

Related

Unable to segment handwritten characters

I am trying to extract handwritten numbers and alphabet from an image, for that i followed this stackoverflow link. It is working fine for most of the images where letter is written using marker but when i am using image where data is written using Pen it is failing miserably. Need some help to fix this.
Below is my code:
import cv2
import imutils
from imutils import contours
# Load image, grayscale, Otsu's threshold
image = cv2.imread('xxx/pic_crop_7.png')
image = imutils.resize(image, width=350)
img=image.copy()
# Remove border
kernel_vertical = cv2.getStructuringElement(cv2.MORPH_RECT, (1,50))
temp1 = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel_vertical)
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
temp2 = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, horizontal_kernel)
temp3 = cv2.add(temp1, temp2)
result = cv2.add(temp3, image)
# Convert to grayscale and Otsu's threshold
gray = cv2.cvtColor(result, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray,(5,5),0)
_,thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
# thresh=cv2.dilate(thresh,None,iterations=1)
# Find contours and filter using contour area
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[0]
MIN_AREA=45
digit_contours = []
for c in cnts:
if cv2.contourArea(c)>MIN_AREA:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(img, (x, y), (x + w, y + h), (36,255,12), 2)
digit_contours.append(c)
# cv2.imwrite("C:/Samples/Dataset/ocr/segmented" + str(i) + ".png", image[y:y+h,x:x+w])
sorted_digit_contours = contours.sort_contours(digit_contours, method='left-to-right')[0]
contour_number = 0
for c in sorted_digit_contours:
x,y,w,h = cv2.boundingRect(c)
ROI = image[y:y+h, x:x+w]
cv2.imwrite('xxx/segment_{}.png'.format(contour_number), ROI)
contour_number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('img', img)
cv2.waitKey()
It is correctly able to extract the numbers when written using marker.
Below is an example:
Original Image
Correctly extracting charachters
Image where it fails to read.
Original Image
Incorrectly Extracting
In this case, you only need to adjust your parameter.
Because there is no vertical line in your handwritten characters' background, so I decided to delete them.
# Remove border
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
temp2 = 255 - cv2.morphologyEx(image, cv2.MORPH_CLOSE, horizontal_kernel)
result = cv2.add(temp2, image)
And it works.
The solution that CodingPeter has given is perfectly fine, except that it may not be generic apropos the two test images you have posted. So, here's my take on it that might work on both of your test images, albeit with a little lesser accuracy.
import numpy as np
import cv2
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20, 20)
plt.rcParams["image.cmap"] = 'gray'
img_rgb = cv2.imread('path/to/your/image.jpg')
img = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
th = cv2.adaptiveThreshold(img,255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV,11,2)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15,1))
horiz = cv2.morphologyEx(th, cv2.MORPH_OPEN, kernel, iterations=3)
ctrs, _ = cv2.findContours(horiz,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for ctr in ctrs:
x,y,w,h = cv2.boundingRect(ctr)
if w < 20:
cv2.drawContours(horiz, [ctr], 0, 0, cv2.FILLED)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,10))
vert = cv2.morphologyEx(th, cv2.MORPH_OPEN, kernel, iterations=3)
ctrs, _ = cv2.findContours(vert,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for ctr in ctrs:
x,y,w,h = cv2.boundingRect(ctr)
if h < 25:
cv2.drawContours(vert, [ctr], 0, 0, cv2.FILLED)
th = th - (horiz | vert)
ctrs, _ = cv2.findContours(th,cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
min_ctr_area = 400 # Min character bounding box area
for ctr in ctrs:
x, y, w, h = cv2.boundingRect(ctr)
# Filter contours based on size
if w * h > min_ctr_area and \
w < 100 and h < 100:
cv2.rectangle(img_rgb, (x, y), (x+w, y+h), (0, 255, 0), 1)
plt.imshow(img_rgb)
Of course some of the parameters here are hard-coded for filtering, which compare the contour height and width to ascertain whether it is a part of a line or maybe a character. With different images you may have to smartly change these values.

Removing background color from image opencv python

I have many images of specimen which have uncontrollable background color. Some of them have black background. Some of them have white background. Some of them have green background, etc.
I would like to remove these background color of a given image where the object in the image is just only one specimen. I try this code but it does not work as i expect.
def get_holes(image, thresh):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
im_bw = cv2.threshold(gray, thresh, 255, cv2.THRESH_BINARY)[1]
im_bw_inv = cv2.bitwise_not(im_bw)
_, contour, _ = cv2.findContours(im_bw_inv, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contour:
cv2.drawContours(im_bw_inv, [cnt], 0, 255, -1)
nt = cv2.bitwise_not(im_bw)
im_bw_inv = cv2.bitwise_or(im_bw_inv, nt)
return im_bw_inv
def remove_background(image, thresh, scale_factor=.25, kernel_range=range(1, 15), border=None):
border = border or kernel_range[-1]
holes = get_holes(image, thresh)
small = cv2.resize(holes, None, fx=scale_factor, fy=scale_factor)
bordered = cv2.copyMakeBorder(small, border, border, border, border, cv2.BORDER_CONSTANT)
for i in kernel_range:
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*i+1, 2*i+1))
bordered = cv2.morphologyEx(bordered, cv2.MORPH_CLOSE, kernel)
unbordered = bordered[border: -border, border: -border]
mask = cv2.resize(unbordered, (image.shape[1], image.shape[0]))
fg = cv2.bitwise_and(image, image, mask=mask)
return fg
file = your_file_location
img = cv2.imread(file)
nb_img = dm.remove_background(img, 255)
These are some example images
May i have your suggestions?
Here's a simple approach with the assumption that there is only one specimen per image.
Kmeans color quantization. We load the image then perform Kmeans color quantization to segment the image into a specified cluster of colors. For instance with clusters=4, the image will be labeled into four colors.
Obtain binary image. Convert to grayscale, Gaussian blur, adaptive threshold.
Draw largest enclosing circle onto mask. Find contours, sort for largest contour using contour area filtering then draw the largest enclosing circle onto a mask using cv2.minEnclosingCircle.
Bitwise-and. Since we have isolated the desired sections to extract, we simply bitwise-and the mask and input image
Input image -> Kmeans -> Binary image
Detected largest enclosing circle -> Mask -> Result
Here's the output for the second image
Input image -> Kmeans -> Binary image
Detected largest enclosing circle -> Mask -> Result
Code
import cv2
import numpy as np
# Kmeans color segmentation
def kmeans_color_quantization(image, clusters=8, rounds=1):
h, w = image.shape[:2]
samples = np.zeros([h*w,3], dtype=np.float32)
count = 0
for x in range(h):
for y in range(w):
samples[count] = image[x][y]
count += 1
compactness, labels, centers = cv2.kmeans(samples,
clusters,
None,
(cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10000, 0.0001),
rounds,
cv2.KMEANS_RANDOM_CENTERS)
centers = np.uint8(centers)
res = centers[labels.flatten()]
return res.reshape((image.shape))
# Load image and perform kmeans
image = cv2.imread('2.jpg')
original = image.copy()
kmeans = kmeans_color_quantization(image, clusters=4)
# Convert to grayscale, Gaussian blur, adaptive threshold
gray = cv2.cvtColor(kmeans, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,21,2)
# Draw largest enclosing circle onto a mask
mask = np.zeros(original.shape[:2], dtype=np.uint8)
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
((x, y), r) = cv2.minEnclosingCircle(c)
cv2.circle(image, (int(x), int(y)), int(r), (36, 255, 12), 2)
cv2.circle(mask, (int(x), int(y)), int(r), 255, -1)
break
# Bitwise-and for result
result = cv2.bitwise_and(original, original, mask=mask)
result[mask==0] = (255,255,255)
cv2.imshow('thresh', thresh)
cv2.imshow('result', result)
cv2.imshow('mask', mask)
cv2.imshow('kmeans', kmeans)
cv2.imshow('image', image)
cv2.waitKey()

Recognize single characters on a page with Tesseract

this image returns empty string;
basically I am trying to make a bot for WOW game, but I am really new to this OCR thing. I cannot make tesseract to read this image; I want an unordered list of characters and if possible coordinates of each square containing them. Is there anyway to do this?
Thank you for your time!
here is my code:
from PIL import Image
import cv2
from pytesseract import image_to_string
column = Image.open('photo.png')
gray = column.convert('L')
blackwhite = gray.point(lambda x: 255 if x < 200 else 0, '1')
blackwhite.save("code_bw.jpg")
print(image_to_string(cv2.imread("code_bw.jpg")))
You need to do some preprocessing to isolate the text characters. A simple approach is to Otsu's threshold to obtain a binary image then we can find contours and filter using aspect ratio + contour area. This will give us the bounding box coordinates of the text where we can draw this onto a mask. We bitwise-and the mask with the input image to get our cleaned image then throw it into OCR. Here's the result:
Detected text characters
Result
Result from OCR
A
A R
P
Code
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.jpg')
original = image.copy()
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours and filter using aspect ratio and area
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
x,y,w,h = cv2.boundingRect(c)
ar = w / float(h)
if area > 1000 and ar > .85 and ar < 1.2:
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
cv2.rectangle(mask, (x, y), (x + w, y + h), (255,255,255), -1)
ROI = original[y:y+h, x:x+w]
# Bitwise-and to isolate characters
result = cv2.bitwise_and(original, mask)
result[mask==0] = 255
# OCR
data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')
print(data)
cv2.imshow('image', image)
cv2.imshow('thresh', thresh)
cv2.imshow('result', result)
cv2.waitKey()

How to detect paragraphs in a text document image for a non-consistent text structure in Python OpenCV

I am trying to identify paragraphs of text in a .pdf document by first converting it into an image then using OpenCV. But I am getting bounding boxes on lines of text instead of paragraphs. How can I set some threshold or some other limit to get paragraphs instead of lines?
Here is the sample input image:
Here is the output I am getting for the above sample:
I am trying to get a single bounding box on the paragraph in the middle. I am using this code.
import cv2
import numpy as np
large = cv2.imread('sample image.png')
rgb = cv2.pyrDown(large)
small = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
# kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
kernel = np.ones((5, 5), np.uint8)
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
# using RETR_EXTERNAL instead of RETR_CCOMP
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#For opencv 3+ comment the previous line and uncomment the following line
#_, contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
mask = np.zeros(bw.shape, dtype=np.uint8)
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y+h, x:x+w] = 0
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
if r > 0.45 and w > 8 and h > 8:
cv2.rectangle(rgb, (x, y), (x+w-1, y+h-1), (0, 255, 0), 2)
cv2.imshow('rects', rgb)
cv2.waitKey(0)
This is a classic situation for dilate. Whenever you want to connect multiple items together, you can dilate them to join adjacent contours into a single contour. Here's a simple approach:
Obtain binary image. Load the image, convert to grayscale, Gaussian blur, then Otsu's threshold to obtain a binary image.
Connect adjacent words together. We create a rectangular kernel and dilate to merge individual contours together.
Detect paragraphs. From here we find contours, obtain the rectangular bounding rectangle coordinates and highlight the rectangular contours.
Otsu's threshold to obtain a binary image
Here's where the magic happens. We can assume that a paragraph is a section of words that are close together, to achieve this we dilate to connect adjacent words
Result
import cv2
import numpy as np
# Load image, grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (7,7), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Create rectangular structuring element and dilate
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations=4)
# Find contours and draw rectangle
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.imshow('image', image)
cv2.waitKey()

How to extract multiple objects from an image using Python OpenCV?

I am trying to extract object from an image using the color using OpenCV, I have tried by inverse thresholding and grayscale combined with cv2.findContours() but I am unable to use it recursively. Furthermore I can't figure out how to "cut out" the match from the original image and save it to a single file.
EDIT
~
import cv2
import numpy as np
# load the images
empty = cv2.imread("empty.jpg")
full = cv2.imread("test.jpg")
# save color copy for visualization
full_c = full.copy()
# convert to grayscale
empty_g = cv2.cvtColor(empty, cv2.COLOR_BGR2GRAY)
full_g = cv2.cvtColor(full, cv2.COLOR_BGR2GRAY)
empty_g = cv2.GaussianBlur(empty_g, (51, 51), 0)
full_g = cv2.GaussianBlur(full_g, (51, 51), 0)
diff = full_g - empty_g
# thresholding
diff_th =
cv2.adaptiveThreshold(full_g,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,11,2)
# combine the difference image and the inverse threshold
zone = cv2.bitwise_and(diff, diff_th, None)
# threshold to get the mask instead of gray pixels
_, zone = cv2.threshold(bag, 100, 255, 0)
# dilate to account for the blurring in the beginning
kernel = np.ones((15, 15), np.uint8)
bag = cv2.dilate(bag, kernel, iterations=1)
# find contours, sort and draw the biggest one
contours, _ = cv2.findContours(bag, cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:3]
i = 0
while i < len(contours):
x, y, width, height = cv2.boundingRect(contours[i])
roi = full_c[y:y+height, x:x+width]
cv2.imwrite("piece"+str(i)+".png", roi)
i += 1
Where empty is just a white image size 1500 * 1000 as the one above and test is the one above.
This is what I came up with, only downside, I have a third image instead of only the 2 expected showing a shadow zone now...
Here's a simple approach:
Obtain binary image. Load the image, grayscale, Gaussian blur, Otsu's threshold, then dilate to obtain a binary black/white image.
Extract ROI. Find contours, obtain bounding boxes, extract ROI using Numpy slicing, and save each ROI
Binary image (Otsu's thresholding + dilation)
Detected ROIs highlighted in green
To extract each ROI, you can find the bounding box coordinates using cv2.boundingRect(), crop the desired region, then save the image
x,y,w,h = cv2.boundingRect(c)
ROI = original[y:y+h, x:x+w]
First object
Second object
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold, dilate
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
dilate = cv2.dilate(thresh, kernel, iterations=1)
# Find contours, obtain bounding box coordinates, and extract ROI
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
image_number = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
cv2.imwrite("ROI_{}.png".format(image_number), ROI)
image_number += 1
cv2.imshow('image', image)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.waitKey()

Categories

Resources