I'm building a simple OCR, I'm facing a problem of not being to crop the letters after segmenting them using OpenCV. Can anyone help me with a simple way to crop the letters?
Here's the segmenting code.
import cv2
import numpy as np
mser = cv2.MSER_create()
# original image
# -1 loads as-is so if it will be 3 or 4 channel as the original
image = cv2.imread('1.jpg', -1)
# mask defaulting to black for 3-channel and transparent for 4-channel
# (of course replace corners with yours)
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
vis = image.copy()
regions = mser.detectRegions(gray)
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions[0]]
channel_count = image.shape[2] # i.e. 3 or 4 depending on your image
ignore_mask_color = (255,)*channel_count
cv2.fillConvexPoly(mask, hulls, ignore_mask_color)
# from Masterfool: use cv2.fillConvexPoly if you know it's convex
masked_image = cv2.bitwise_and(vis, hulls)
#for m in range(len(hulls)):
#masked_image = cv2.bitwise_and(vis, ignore_mask_color)
# save the result
#cv2.imwrite('img'+m, masked_image)
This results:
I need each letter to be cropped using the same hulls. Any help?
You can't crop and directly save the hulls as you can see them in the example you posted. Or, better, you can crop and paste them in a square/rectangle canvas. But it's not the answer you want for this question.
So, if you have all the text which is computer written, best option to begin is to apply cv2.findContours() to the image. There are also other specific tools you can use, but for now (and relatively to this question) use this.
import cv2
import numpy as np
#import image
image = cv2.imread('image.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
cv2.imshow('gray', gray)
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV)
cv2.imshow('second', thresh)
kernel = np.ones((1,1), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
cv2.imshow('dilated', img_dilation)
#find contours
im2,ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#sort contours
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
# Getting ROI
roi = image[y:y+h, x:x+w]
# show ROI
#cv2.imshow('segment no:'+str(i),roi)
cv2.rectangle(image,(x,y),( x + w, y + h ),(0,255,0),2)
if w > 15 and h > 15:
cv2.imwrite('roi{}.png'.format(i), roi)
cv2.imshow('marked areas',image)
You can tweak the kernel for more or less wide of the rectangle detection.
I want to retrieve all contours of the image below, but ignore text.
When I try to find the contours of the current image I get the following:
I have no idea how to go about this as I am new to using OpenCV and image processing. I want to get ignore the text, how can I achieve this? If ignoring is not possible but making a single bounding box surrounding the text is, than that would be good too.
Criteria that I need to match:
The contours may very in size and shape.
The colors from the image may differ.
The colors and size of the text inside the image may differ.
Here is one way to do that in Python/OpenCV.
Read the input
Convert to grayscale
Get Canny edges
Apply morphology close to ensure they are closed
Get all contour hierarchy
Filter contours to keep only those above threshold in perimeter
Draw contours on input
Draw each contour on a black background
Save results
import numpy as np
import cv2
# read input
img = cv2.imread('short_title.png')
# convert to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# get canny edges
edges = cv2.Canny(gray, 1, 50)
# apply morphology close to ensure they are closed
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
# get contours
contours = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
contours = contours[0] if len(contours) == 2 else contours[1]
# filter contours to keep only large ones
result = img.copy()
i = 1
for c in contours:
perimeter = cv2.arcLength(c, True)
if perimeter > 500:
cv2.drawContours(result, c, -1, (0,0,255), 1)
contour_img = np.zeros_like(img, dtype=np.uint8)
cv2.drawContours(contour_img, c, -1, (0,0,255), 1)
i = i + 1
# save results
cv2.imwrite("short_title_gray.jpg", gray)
cv2.imwrite("short_title_edges.jpg", edges)
cv2.imwrite("short_title_contours.jpg", result)
# show images
cv2.imshow("gray", gray)
cv2.imshow("edges", edges)
cv2.imshow("result", result)
All contours on input:
Contour 1:
Contour 2:
Contour 3:
Contour 4:
Here are two options for erasing the text:
Using pytesseract OCR.
Finding white (and small) connected components.
Both solution build a mask, dilate the mask and use cv2.inpaint for erasing the text.
Using pytesseract:
Find text boxes using pytesseract.image_to_boxes.
Fill the boxes in the mask with 255.
Code sample:
import cv2
import numpy as np
from pytesseract import pytesseract, Output
# Tesseract path
pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
img = cv2.imread('ShortAndInteresting.png')
# https://stackoverflow.com/questions/20831612/getting-the-bounding-box-of-the-recognized-words-using-python-tesseract
boxes = pytesseract.image_to_boxes(img, lang='eng', config=' --psm 6') # Run tesseract, returning the bounding boxes
h, w, _ = img.shape # assumes color image
mask = np.zeros((h, w), np.uint8)
# Fill the bounding boxes on the image
for b in boxes.splitlines():
b = b.split(' ')
mask = cv2.rectangle(mask, (int(b[1]), h - int(b[2])), (int(b[3]), h - int(b[4])), 255, -1)
mask = cv2.dilate(mask, np.ones((5, 5), np.uint8)) # Dilate the boxes in the mask
clean_img = cv2.inpaint(img, mask, 2, cv2.INPAINT_NS) # Remove the text using inpaint (replace the masked pixels with the neighbor pixels).
# Show mask and clean_img for testing
cv2.imshow('mask', mask)
cv2.imshow('clean_img', clean_img)
Finding white (and small) connected components:
Use mask = cv2.inRange(img, (230, 230, 230), (255, 255, 255)) for finding the text (assume the text is white).
Finding connected components in the mask using cv2.connectedComponentsWithStats(mask, 4)
Remove large components from the mask - fill components with large area with zeros.
Code sample:
import cv2
import numpy as np
img = cv2.imread('ShortAndInteresting.png')
mask = cv2.inRange(img, (230, 230, 230), (255, 255, 255))
nlabel, labels, stats, centroids = cv2.connectedComponentsWithStats(mask, 4) # Finding connected components with statistics
# Remove large components from the mask (fill components with large area with zeros).
for i in range(1, nlabel):
area = stats[i, cv2.CC_STAT_AREA] # Get area
if area > 1000:
mask[labels == i] = 0 # Remove large connected components from the mask (fill with zero)
mask = cv2.dilate(mask, np.ones((5, 5), np.uint8)) # Dilate the text in the maks
cv2.imwrite('mask2.png', mask)
clean_img = cv2.inpaint(img, mask, 2, cv2.INPAINT_NS) # Remove the text using inpaint (replace the masked pixels with the neighbor pixels).
# Show mask and clean_img for testing
cv2.imshow('mask', mask)
cv2.imshow('clean_img', clean_img)
Clean image:
My assumption is that you know how to split the image into contours, and the only issue is the present of the text.
I would recommend using flood fill, find the seed point for each color region, flood fill it to ignore the text values within. Hope that helps!
Refer to example of using floodfill here: https://www.programcreek.com/python/example/89425/cv2.floodFill
Example below copied from link above
def fillhole(input_image):
input gray binary image get the filled image by floodfill method
Note: only holes surrounded in the connected regions will be filled.
:param input_image:
im_flood_fill = input_image.copy()
h, w = input_image.shape[:2]
mask = np.zeros((h + 2, w + 2), np.uint8)
im_flood_fill = im_flood_fill.astype("uint8")
cv.floodFill(im_flood_fill, mask, (0, 0), 255)
im_flood_fill_inv = cv.bitwise_not(im_flood_fill)
img_out = input_image | im_flood_fill_inv
return img_out
I try to use python, NumPy, and OpenCV to analyze the image below and just draw a circle on each object found. The idea here is not to identify the bug only identify any object that is different from the background.
Original Image:
Here is the code that I'm using.
import cv2
import numpy as np
img = cv2.imread('per.jpeg', cv2.IMREAD_GRAYSCALE)
if cv2.__version__.startswith('2.'):
detector = cv2.SimpleBlobDetector()
detector = cv2.SimpleBlobDetector_create()
keypoints = detector.detect(img)
imgKeyPoints = cv2.drawKeypoints(img, keypoints, np.array([]), (0,0,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
status = cv2.imwrite('teste.jpeg',imgKeyPoints)
print("Image written to file-system : ",status)
But the problem is that I'm getting only a greyscale image as result without any counting or red circle, as shown below:
Since I'm new to OpenCV and object recognition world I'm not able to identify what is wrong, and any help will be very appreciated.
Here is one way in Python/OpenCV.
Threshold on the bugs color in HSV colorspace. Then use morphology to clean up the threshold. Then get contours. Then find the minimum enclosing circle around each contour. Then bias the radius to make a bit larger and draw the circle around each bug.
import cv2
import numpy as np
# read image
img = cv2.imread('bugs.jpg')
# convert image to hsv colorspace
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# threshold on bugs color
thresh = cv2.inRange(hsv, lower, upper)
# apply morphology to clean up
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (6,6))
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
result = img.copy()
bias = 10
for cntr in contours:
center, radius = cv2.minEnclosingCircle(cntr)
cx = int(round(center[0]))
cy = int(round(center[1]))
rr = int(round(radius)) + bias
cv2.circle(result, (cx,cy), rr, (0, 0, 255), 2)
# save results
cv2.imwrite('bugs_threshold.jpg', thresh)
cv2.imwrite('bugs_cleaned.jpg', morph)
cv2.imwrite('bugs_circled.jpg', result)
# display results
cv2.imshow('thresh', thresh)
cv2.imshow('morph', morph)
cv2.imshow('result', result)
Threshold Image:
Morphology Cleaned Image:
Resulting Circles:
I am new to deep learning and try to implement a ML algorithm for image clustering. The problem is that I can't crop the objects in an image in Python using OpenCV.
Here is the code I have implemented and it works for some objects if the color of the object is very different(in RGB values) from the background but it doesn't work for the image I need for ML algorithm. What kind of parameters should I have/change? Any suggestions?
import cv2
import numpy as np
from PIL import Image
import tkinter as tk
from tkinter import filedialog as fd
from tkinter import*
import random
from PIL import Image
import sys
myFile = 'Path' + '/crop.png'
nr_of_im = 1
q = 0
r = 0
x_list = []
y_list = []
img = cv2.imread(myFile, cv2.IMREAD_UNCHANGED)
ret, thresh = cv2.threshold(cv2.cvtColor(img.copy(), cv2.COLOR_BGR2GRAY) , 30, 255, cv2.THRESH_BINARY)
contours, hier = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
if cv2.contourArea(contour) > 80:
x, y, w, h = cv2.boundingRect(contour)
q = w
r = h
ROI = img[y-10:y+10+h, x-10:x+10+w]
ROI = cv2.resize(ROI,(300,300))
file_all = "/images/%d.jpg"%nr_of_im
nr_of_im += 1
There are 21 objects in the image but the length of contours returns 1. The image looks like so
Your threshold is too low and produces a totally white image for me. You need to increase your threshold. Always view your thresholding to be sure it is working the way you expect. You can always remove the viewing later.
The following works for me using Otsu thresholding with a threshold value of 97. I get 21 contours.
import cv2
import numpy as np
# read image
img = cv2.imread('blocks.jpg')
# convert to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# threshold
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
# apply morphology fill and separate large regions and remove small ones
kernel = cv2.getStructuringElement(cv2.MORPH_RECT , (9,9))
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT , (15,15))
morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
# get contours
result = img.copy()
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# get count of contours
# draw bounding boxes on contours
for cntr in contours:
x,y,w,h = cv2.boundingRect(cntr)
cv2.rectangle(result, (x, y), (x+w, y+h), (0, 0, 255), 2)
# save results
cv2.imwrite("blocks_thresh.jpg", thresh)
cv2.imwrite("blocks_morphology.jpg", morph)
cv2.imwrite("blocks_bboxes.jpg", result)
# show thresh and result
cv2.imshow("thresh", thresh)
cv2.imshow("morph", morph)
cv2.imshow("result", result)
Threshold image:
Morphology cleaned image:
Resulting bounding boxes from contours:
from pdf2image import convert_from_path
import cv2,numpy,os
def pil_to_cv2(image):
open_cv_image = numpy.array(image)
return open_cv_image[:, :, ::-1].copy()
images = convert_from_path('test.pdf')
cv_h=[pil_to_cv2(i) for i in images]
for img in cv_h:
cv2.imwrite('modified.png', img)
How can I remove the extra whiteness from the image (top,sideways,under) without actually intercepting the drawing, The drawings from pdf are from different sizes so I can't crop the images by a fixed number.
Ideally,the output would look like this
Here is another way to do that in Python/OpenCV.
Read the image
Convert to gray and invert the polarity
Apply morphology close to fill in holes and make one solid region
Get the outer contour and its bounding box
Use the bounding box to crop the image using Numpy slicing
Save the result
import cv2
import numpy as np
# read image
img = cv2.imread('multipower.png')
# convert to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# invert gray image
gray = 255 - gray
# threshold
thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY)[1]
# apply close and open morphology
kernel = np.ones((75,75), np.uint8)
mask = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# get contours (presumably just one around the nonzero pixels)
contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
cntr = contours[0]
x,y,w,h = cv2.boundingRect(cntr)
# draw contour on input
contour_img = img.copy()
# crop to bounding rectangle
crop = img[y:y+h, x:x+w]
# save cropped image
# show the images
cv2.imshow("THRESH", thresh)
cv2.imshow("MASK", mask)
cv2.imshow("CONTOUR", contour_img)
cv2.imshow("CROP", crop)
Thresholded Image:
Morphology closed image:
Contour image:
import cv2 as cv
import numpy as np
frame = cv.imread('7dcoI.png')
frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
mask=cv.threshold(frame_gray, 85, 255, cv.THRESH_BINARY )[1]
rows, cols = mask.shape
non_empty_columns = np.where(mask.min(axis=0)==0)[0]
non_empty_rows = np.where(mask.min(axis=1)==0)[0]
cropBox = (min(non_empty_rows), min(max(non_empty_rows), rows), min(non_empty_columns), min(max(non_empty_columns), cols))
cropped = frame[cropBox[0]:cropBox[1]+1, cropBox[2]:cropBox[3]+1 , :]
cv.imwrite('out_mask.png', cropped)
I have a small script (GitHub) (based on this answer) to detect objects on a white background. The script is working fine and detects the objects. For example, this image:
becomes this:
and I crop the boundingRect (red one).
I'll be doing further operations on this image. For example instead of a rectangle crop, I will be cropping just the contour. (Anyway, these are further problems to be faced.)
What I want to do, now, is scale up/grow the contour (green one). I'm not sure if scale and grow means the same thing in this context, because when I think of scale, there's usually a single point of origin/anchor point. With grow, it's relative to the edges. I want to have something like this (created in Photoshop):
So after I detect the object/find contours, I want to grow it by some value/ratio, so that I have some space/pixels to modify which won't affect the object. How can I do that?
Mentioned script:
# drop an image on this script file
img_path = Path(sys.argv[1])
# open image with Pillow and convert it to RGB if the image is CMYK
img = Image.open(str(img_path))
if img.mode == "CMYK":
img = ImageCms.profileToProfile(img, "Color Profiles\\USWebCoatedSWOP.icc", "Color Profiles\\sRGB_Color_Space_Profile.icm", outputMode="RGB")
img = cv2.cvtColor(numpy.array(img), cv2.COLOR_RGB2BGR)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshed = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11,11))
morphed = cv2.morphologyEx(threshed, cv2.MORPH_CLOSE, kernel)
contours = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
contour = sorted(contours, key=cv2.contourArea)[-1]
x, y, w, h = cv2.boundingRect(contour)
final = cv2.drawContours(img, contours, -1, (0,255,0), 2)
cv2.rectangle(final, (x,y), (x+w,y+h), (0,0,255), 2)
cv2.imshow("final", final)
Images posted here are scaled down to keep the question short. Original images and the script(s) can be found on the mentioned (first paragraph) GitHub page.
Thanks to HansHirse's suggestion (using morphological dilation), I've managed to make it work.
img_path = Path(sys.argv[1])
def cmyk_to_rgb(cmyk_img):
img = Image.open(cmyk_img)
if img.mode == "CMYK":
img = ImageCms.profileToProfile(img, "Color Profiles\\USWebCoatedSWOP.icc", "Color Profiles\\sRGB_Color_Space_Profile.icm", outputMode="RGB")
return cv2.cvtColor(numpy.array(img), cv2.COLOR_RGB2BGR)
def cv_threshold(img, thresh=128, maxval=255, type=cv2.THRESH_BINARY):
if len(img.shape) == 3:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshed = cv2.threshold(img, thresh, maxval, type)[1]
return threshed
def find_contours(img, to_gray=None):
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (11,11))
morphed = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
contours = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
return contours[-2]
def mask_from_contours(ref_img, contours):
mask = numpy.zeros(ref_img.shape, numpy.uint8)
mask = cv2.drawContours(mask, contours, -1, (255,255,255), -1)
return cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
def dilate_mask(mask, kernel_size=10):
kernel = numpy.ones((kernel_size,kernel_size), numpy.uint8)
dilated = cv2.dilate(mask, kernel, iterations=1)
return dilated
def draw_contours(src_img, contours):
canvas = cv2.drawContours(src_img.copy(), contours, -1, (0,255,0), 2)
x, y, w, h = cv2.boundingRect(contours[-1])
cv2.rectangle(canvas, (x,y), (x+w,y+h), (0,0,255), 2)
return canvas
orig_img = cmyk_to_rgb(str(img_path))
orig_threshed = cv_threshold(orig_img, 240, type=cv2.THRESH_BINARY_INV)
orig_contours = find_contours(orig_threshed)
orig_mask = mask_from_contours(orig_img, orig_contours)
orig_output = draw_contours(orig_img, orig_contours)
dilated_mask = dilate_mask(orig_mask, 50)
dilated_contours = find_contours(dilated_mask)
dilated_output = draw_contours(orig_img, dilated_contours)
cv2.imshow("orig_output", orig_output)
cv2.imshow("dilated_output", dilated_output)
I believe the code is self-explonatory enough. An example output:
Full script (again) can be found at show_dilated_contours.py
As a bonus, later I wanted to smooth the contours. I've came across this blog post in which the author talks about how to smooth the edges of a shape (in Photoshop). The idea is really simple and can also be applied in OpenCV to smooth the contours. The steps are:
Create a mask from contours (or from the shape)
Blur the mask
Threshold the blurred mask (now, we have a smoother mask than the mask in step 1)
Find the contours again on the blurred + thresholded image. Since the mask/shape is smoother, we'll get smoother contours.
Example code and output:
# ... continuing previos code
# pass 1
smooth_mask_blurred = cv2.GaussianBlur(dilated_mask, (21,21), 0)
smooth_mask_threshed1 = cv_threshold(smooth_mask_blurred)
# pass 2
smooth_mask_blurred = cv2.GaussianBlur(smooth_mask_threshed1, (21,21), 0)
smooth_mask_threshed2 = cv_threshold(smooth_mask_blurred)
# find contours from smoothened mask
smooth_mask_contours = find_contours(smooth_mask_threshed2)
# draw the contours on the original image
smooth_mask_output = draw_contours(orig_img, smooth_mask_contours)
cv2.imshow("dilated_output", dilated_output)
cv2.imshow("smooth_mask_output", smooth_mask_output)
Full code at show_smooth_contours.py.