Related
I need to extract the bounding box of text and save it as sub-images of the main image. I am not getting the right code documentation for this task.
Please can anyone provide me code documentation or help links or any python modules which can help to crop text from scanned images.
Below I have attached a scanned image and expected output.
below image scanned copy need to crop text from image.
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd ='C:\\Program Files (x86)\\Tesseract-OCR\\tesseract'
img = cv2.imread("test.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))
dilation = cv2.dilate(thresh1, rect_kernel, iterations = 1)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_NONE)
im2 = img.copy()
file = open("recognized.txt", "w+")
file.write("")
file.close()
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
cropped = im2[y:y + h, x:x + w]
file = open("recognized.txt", "a")
text = pytesseract.image_to_string(cropped)
file.write(text)
file.write("\n")
crop_img = img[y:y+h, x:x+w] # just the region you are interested
file.close
second image expected croped image:
Here is one approach in Python/OpenCV.
Read the input
Get the Canny edges
Get the outer contours of the edges
Filter the contours to remove small extraneous spots
Get the convex hull of the main cluster of edges
Draw the convex hull as white filled on a black background as a mask
Mask to black the outside region of the input
Get the rotated rectangle from the convex hull
From the negative angle and center of the rotated rectangle rectify the orientation using perspective warping
Save the results
Input:
import cv2
import numpy as np
# Read image
img = cv2.imread('receipt.jpg')
hh, ww = img.shape[:2]
# get edges
canny = cv2.Canny(img, 50, 200)
# get contours
contours = cv2.findContours(canny, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# filter out small regions
cimg = np.zeros_like(canny)
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 20:
cv2.drawContours(cimg, [cntr], 0, 255, 1)
# get convex hull and draw on input
points = np.column_stack(np.where(cimg.transpose() > 0))
hull = cv2.convexHull(points)
himg = img.copy()
cv2.polylines(himg, [hull], True, (0,0,255), 1)
# draw convex hull as filled mask
mask = np.zeros_like(cimg, dtype=np.uint8)
cv2.fillPoly(mask, [hull], 255)
# blacken out input using mask
mimg = img.copy()
mimg = cv2.bitwise_and(mimg, mimg, mask=mask)
# get rotate rectangle
rotrect = cv2.minAreaRect(hull)
(center), (width,height), angle = rotrect
box = cv2.boxPoints(rotrect)
boxpts = np.int0(box)
# draw rotated rectangle on copy of input
rimg = img.copy()
cv2.drawContours(rimg, [boxpts], 0, (0,0,255), 1)
# from https://www.pyimagesearch.com/2017/02/20/text-skew-correction-opencv-python/
# the `cv2.minAreaRect` function returns values in the
# range [-90, 0); as the rectangle rotates clockwise the
# returned angle tends to 0 -- in this special case we
# need to add 90 degrees to the angle
if angle < -45:
angle = -(90 + angle)
# otherwise, check width vs height
else:
if width > height:
angle = -(90 + angle)
else:
angle = -angle
# negate the angle to unrotate
neg_angle = -angle
print('unrotation angle:', neg_angle)
print('')
# Get rotation matrix
# center = (width // 2, height // 2)
M = cv2.getRotationMatrix2D(center, neg_angle, scale=1.0)
# unrotate to rectify
result = cv2.warpAffine(mimg, M, (ww, hh), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=(0,0,0))
# save results
cv2.imwrite('receipt_mask.jpg', mask)
cv2.imwrite('receipt_edges.jpg', canny)
cv2.imwrite('receipt_filtered_edges.jpg', cimg)
cv2.imwrite('receipt_hull.jpg', himg)
cv2.imwrite('receipt_rotrect.jpg', rimg)
cv2.imwrite('receipt_masked_result.jpg', result)
cv2.imshow('canny', canny)
cv2.imshow('cimg', cimg)
cv2.imshow('himg', himg)
cv2.imshow('mask', mask)
cv2.imshow('rimg', rimg)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Canny Edges:
Filtered Edges from Contours:
Convex Hull:
Mask:
Rotated Rectangle:
Rectified Result:
In OpenCV you can use cv2.findContours to draw the bounding boxes. See this article which explains how to do that: https://www.geeksforgeeks.org/text-detection-and-extraction-using-opencv-and-ocr/
Then after you have your bounding box locations (your region of interest where text is located, and you want to crop) you can use use slicing to crop the image:
import cv2
img = cv2.imread("lenna.png")
crop_img = img[y:y+h, x:x+w] # just the region you are interested
cv2.imshow("cropped", crop_img)
cv2.waitKey(0)
If you want to extract the text directly, I think you can use tesseract ocr a python package (How to get started: https://pypi.org/project/pytesseract/) . You can also make use of OpenCV built in OCR functions. Read more: https://nanonets.com/blog/ocr-with-tesseract/
from PIL import image
original_image = Image.open(".nameofimage.jpg")
rotate_image = Original_image.rotate(330)
rotate_image.show()
x = 100
y = 80
h = 200
w = 200
cropped_image = rotate_image[y:y+h, x:x+w]
cropped_image.show()
For this image, I tried to use hough cirlce to find the center of the "black hole".
After playing with the parameters of cv2.HoughCircles for a long time, the following is the best I can get.
raw image:
# reproducible code for stackoverflow
import cv2
import os
import sys
from matplotlib import pyplot as plt
import numpy as np
# read image can turn it gray
img = cv2.imread(FILE)
cimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_gray = dst = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
plt.figure(figsize = (18,18))
plt.imshow(cimg, cmap = "gray")
# removing noises
element = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
closing = cv2.morphologyEx(y, cv2.MORPH_CLOSE, element, iterations = 7)
plt.figure(figsize = (12,12))
plt.imshow(closing, cmap = "gray")
# try to find the circles
circles = cv2.HoughCircles(closing,cv2.HOUGH_GRADIENT,3,50,
param1=50,param2=30,minRadius=20,maxRadius=50)
circles = np.uint16(np.around(circles))
for i in circles[0,:]:
# draw the outer circle
cv2.circle(cimg,(i[0],i[1]),i[2],(0,255,0),2)
# draw the center of the circle
cv2.circle(cimg,(i[0],i[1]),2,(0,0,255),3)
plt.figure(figsize = (12,12))
plt.imshow(cimg)
Update::
The one with Canny:
edges = cv2.Canny(closing, 100, 300)
plt.figure(figsize = (12,12))
plt.imshow(edges, cmap = "gray")
circles = cv2.HoughCircles(edges,cv2.HOUGH_GRADIENT,2,50,
param1=50,param2=30,minRadius=20,maxRadius=60)
circles = np.uint16(np.around(circles))
cimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
for i in circles[0,:]:
# draw the outer circle
cv2.circle(cimg,(i[0],i[1]),i[2],(0,255,0),2)
# draw the center of the circle
cv2.circle(cimg,(i[0],i[1]),2,(0,0,255),3)
plt.figure(figsize = (12,12))
plt.imshow(cimg)
Still not the right circle that is wanted.
Update:
#crackanddie
Sometimes there is 6 or 9 in the identity number.
The circle in 6 or 9 is not very round.
Is there any way to filter that out?
This is an alternative method if you do not want to implement or fiddle with Hough's parameters. You must be sure there's at least one circle visible in your picture. The idea is to create a segmentation mask based on the CMYK color space and filter the blobs of interest by circularity and area. These are the steps:
Convert the image from BGR to CMYK
Threshold the K channel to get a binary mask
Filter blobs by circularity and area
Approximate the filtered blobs as circles
I'm choosing the CMYK color space because the circle is mostly black. The K (key) channel (in this case - black) should do a good job of representing the blob of interest, albeit, with some noise - as usual. Let's see the code:
# Imports:
import cv2
import numpy as np
# image path
path = "D://opencvImages//"
fileName = "dyj3O.jpg"
# load image
bgr = cv2.imread(path + fileName)
Alright, we need to convert the image from BGR to CMYK. OpenCV does not offer the conversion, so we need to do it manually. The formula is very straightforward. I'm just interested on the K channel, so I just calculate it like this:
# Make float and divide by 255:
bgrFloat = bgr.astype(np.float) / 255.
# Calculate K as (1 - whatever is biggest out of bgrFloat)
kChannel = 1 - np.max(bgrFloat, axis=2)
# Convert back to uint 8:
kChannel = 255 * kChannel
kChannel = kChannel.astype(np.uint8)
Gotta keep en eye on the data types, because there are float operations going on. This is the result:
As you see, the hole is almost 100% white, that's cool, we can threshold this image via Otsu like this:
# Compute binary mask of the hole via Otsu:
_, binaryImage = cv2.threshold(kChannel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
Which gives you this nice binary mask:
Now, here comes the laborious part. Let's find contours on this image. For every contour/blob compute circularity and area. Use this info to filter noise and get the contour of interest, keep in mind that a perfect circle should have circularity close to 1.0. Once you get a contour of interest, approximate a circle to it. This is the process:
# Find the big contours/blobs on the filtered image:
contours, hierarchy = cv2.findContours(binaryImage, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
# Store the detected circles here:
detectedCircles = []
# Look for the potential contours of interest:
for _, c in enumerate(contours):
# Get the blob's area and perimeter:
contourArea = cv2.contourArea(c)
contourPerimeter = cv2.arcLength(c, True)
# Compute circularity:
if contourPerimeter > 0:
circularity = (4 * 3.1416 * contourArea) / (pow(contourPerimeter, 2))
else:
circularity = 0.0
# Set the min threshold values to identify the
# blob of interest:
minCircularity = 0.7
minArea = 2000
if circularity >= minCircularity and contourArea >= minArea:
# Approximate the contour to a circle:
(x, y), radius = cv2.minEnclosingCircle(c)
# Compute the center and radius:
center = (int(x), int(y))
# Cast radius to in:
radius = int(radius)
# Store the center and radius:
detectedCircles.append([center, radius])
# Draw the circles:
cv2.circle(bgr, center, radius, (0, 255, 0), 2)
cv2.imshow("Detected Circles", bgr)
print("Circles Found: " + str(len(detectedCircles)))
Additionally, I have stored the circle (center and radius) in the detectedCircles list. This is the final result:
Circles Found: 1
Here it is:
import numpy as np
import cv2
def threshold_gray_const(image_, rang: tuple):
return cv2.inRange(image_, rang[0], rang[1])
def binary_or(image_1, image_2):
return cv2.bitwise_or(image_1, image_2)
def negate_image(image_):
return cv2.bitwise_not(image_)
def particle_filter(image_, power):
# Abdrakov's particle filter
nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(image_, connectivity=8)
sizes = stats[1:, -1]
nb_components = nb_components - 1
min_size = power
img2 = np.zeros(output.shape, dtype=np.uint8)
for i in range(0, nb_components):
if sizes[i] >= min_size:
img_to_compare = threshold_gray_const(output, (i + 1, i + 1))
img2 = binary_or(img2, img_to_compare)
img2 = img2.astype(np.uint8)
return img2
def reject_borders(image_):
# Abdrakov's border rejecter
out_image = image_.copy()
h, w = image_.shape[:2]
for row in range(h):
if out_image[row, 0] == 255:
cv2.floodFill(out_image, None, (0, row), 0)
if out_image[row, w - 1] == 255:
cv2.floodFill(out_image, None, (w - 1, row), 0)
for col in range(w):
if out_image[0, col] == 255:
cv2.floodFill(out_image, None, (col, 0), 0)
if out_image[h - 1, col] == 255:
cv2.floodFill(out_image, None, (col, h - 1), 0)
return out_image
src = cv2.imread("your_image")
img_gray = dst = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
element = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
closing = cv2.morphologyEx(img_gray, cv2.MORPH_CLOSE, element, iterations=2)
tv, thresh = cv2.threshold(closing, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
neg = negate_image(thresh)
rej = reject_borders(neg)
filtered = particle_filter(rej, 300)
edges = cv2.Canny(filtered, 100, 200)
circles = cv2.HoughCircles(edges, cv2.HOUGH_GRADIENT, 3, 50, param1=50, param2=30, minRadius=20, maxRadius=50)
circles = np.uint16(np.around(circles))
for i in circles[0, :]:
# draw the outer circle
cv2.circle(src, (i[0], i[1]), i[2], (0, 255, 0), 2)
# draw the center of the circle
cv2.circle(src, (i[0], i[1]), 2, (0, 0, 255), 3)
cv2.imshow("closing", closing)
cv2.imshow("edges", edges)
cv2.imshow("out", src)
cv2.waitKey(0)
I changed cv2.morphologyEx parameters a bit, because they were too strong. And after this noise removing I made a binary image using cv2.THRESH_OTSU parameter, negated it, rejected borders and filtered a bit. Then I used cv2.Canny to find edges and this 'cannied' image I passed into cv2.HoughCircles. If any questions - ask me :)
If you want to use a "thinking out of the box" solution then check this solution out. Remember this might have a few false positives in some cases and would only work in cases where circle contour is complete or joined.
import numpy as np
import cv2
import matplotlib.pyplot as plt
from math import pi
pi_eps = 0.1
rgb = cv2.imread('/path/to/your/image/find_circle.jpg')
gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
th = cv2.adaptiveThreshold(gray,255, cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV,21,5)
contours, hier = cv2.findContours(th.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
out_img = rgb.copy()
for i in range(len(contours)):
x,y,w,h = cv2.boundingRect(contours[i])
ar = min(w,h)/max(w,h)
# For a circle aspect ratio is close to 1.0
# In your use case circle diameter is between 40px-100px
if ar < 0.9 or \
w < 40 or w > 100:
continue
# P = 2 * PI * r
perimeter = cv2.arcLength(contours[i], True)
if perimeter == 0:
continue
# Second level confirmation could be done using PI = P * P / (4 * A)
# A = PI * r * r
area = cv2.contourArea(contours[i])
if area == 0:
continue
# d = (w+h) / 2 average diameter
# A contour is a circle if (P / d) = PI
ctr_pi = perimeter / ((w+h) / 2)
if abs(ctr_pi - pi) < pi_eps * pi:
cv2.circle(out_img, (int(x+w/2), int(y+h/2)), int(max(w,h)/2), (0, 255, 0), 1)
print("Center of the circle: ", x + w/2, y+h/2)
plt.imshow(out_img)
I want to detect all the patches in the enter image description hereimage, I attached the code used to detect them:
import cv2
import numpy as np
import matplotlib.pyplot as plt
image=cv2.imread("bw2.jpg",0)
# convert to RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# create a binary thresholded image
_, binary = cv2.threshold(gray, 0, 500, cv2.THRESH_BINARY_INV)
# show it
plt.imshow(gray, cmap="gray")
plt.show()
# find the contours from the thresholded image
contours, hierarchy = cv2.findContours(gray, cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
print("contours:",contours)
# draw all contours
for c in contours:
if cv2.contourArea(c) < 3000:
continue
(x, y, w, h) = cv2.boundingRect(c)
#cv2.rectangle(image, (x,y), (x+w,y+h), (0, 255, 0), 2)
## BEGIN - draw rotated rectangle
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(image,[box],0,(255,51,255),2)
# show the image with the drawn contours
plt.imshow(image)
#plt.imshow(im3)
cv2.imwrite("detectImg2.png",image)
plt.show()
I get output image as hereenter image description here
I want to detect all of them, can anyone tell me how to achieve this I new to image processing
Here is how I would extract and rotate each blob in your image using Python OpenCV.
Read the input
Convert to gray
Threshold
Apply morphology open and close to clean small spots
Get all the external contours
Loop over each contour and do the following:
Draw the contour on a copy of the input image
Get the rotated rectangle of the contour and extract its center, dimensions and rotation angle
Get the corners of the rotated rectangle
Draw the rotated rectangle on another copy of the input
Correct the rotation angle for image unrotation
Generate a mask image with the filled rotated rectangle
Apply the mask image to the morphology cleaned image to remove near-by other white regions
Get the affine warp matrix using the center and corrected rotation angle
Unrotated the the masked image using warpAffine
Get the contour of the one blob in the unrotated image
Get the contours bounding box
Crop the masked image (or alternately crop the input image)
Save the cropped image
Exit the loop
Save the contour and rotrect images
Input:
import cv2
import numpy as np
image = cv2.imread("bw2.jpg")
hh, ww = image.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# create a binary thresholded image
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# apply morphology
kernel = np.ones((7,7), np.uint8)
clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = np.ones((13,13), np.uint8)
clean = cv2.morphologyEx(clean, cv2.MORPH_CLOSE, kernel)
# get external contours
contours = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
contour_img = image.copy()
rotrect_img = image.copy()
i = 1
for c in contours:
# draw contour on input
cv2.drawContours(contour_img,[c],0,(0,0,255),2)
# get rotated rectangle from contour
# get its dimensions
# get angle relative to horizontal from rotated rectangle
rotrect = cv2.minAreaRect(c)
(center), (width,height), angle = rotrect
box = cv2.boxPoints(rotrect)
boxpts = np.int0(box)
# draw rotated rectangle on copy of image
cv2.drawContours(rotrect_img,[boxpts],0,(0,255,0),2)
# from https://www.pyimagesearch.com/2017/02/20/text-skew-correction-opencv-python/
# the `cv2.minAreaRect` function returns values in the
# range [-90, 0); as the rectangle rotates clockwise the
# returned angle tends to 0 -- in this special case we
# need to add 90 degrees to the angle
if angle < -45:
angle = -(90 + angle)
# otherwise, check width vs height
else:
if width > height:
angle = -(90 + angle)
else:
angle = -angle
# negate the angle for deskewing
neg_angle = -angle
# draw mask as filled rotated rectangle on black background the size of the input
mask = np.zeros_like(clean)
cv2.drawContours(mask,[boxpts],0,255,-1)
# apply mask to cleaned image
blob_img = cv2.bitwise_and(clean, mask)
# Get rotation matrix
#center = (width // 2, height // 2)
M = cv2.getRotationMatrix2D(center, neg_angle, scale=1.0)
#print('m: ',M)
# deskew (unrotate) the rotated rectangle
deskewed = cv2.warpAffine(blob_img, M, (ww, hh), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
# threshold it again
deskewed = cv2.threshold(deskewed, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# get bounding box of contour of deskewed rectangle
cntrs = cv2.findContours(deskewed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
cntr = cntrs[0]
x,y,w,h = cv2.boundingRect(cntr)
# crop to white region
crop = deskewed[y:y+h, x:x+w]
# alternately crop the input
#crop = image[y:y+h, x:x+w]
# save deskewed image
cv2.imwrite("bw2_deskewed_{0}.png".format(i),crop)
print("")
i = i + 1
# save contour and rot rect images
cv2.imwrite("bw2_contours.png",contour_img)
cv2.imwrite("bw2_rotrects.png",rotrect_img)
# display result, though it won't show transparency
cv2.imshow("thresh", thresh)
cv2.imshow("clean", clean)
cv2.imshow("contours", contour_img)
cv2.imshow("rectangles", rotrect_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Contour image:
Rotated rectangles images:
First 3 unrotated images:
Affine warp rotation angles:
13.916877746582031
-42.87890625
18.8118896484375
-44.333797454833984
-38.65980911254883
-37.25965881347656
8.806793212890625
14.931419372558594
-37.405357360839844
-34.99202346801758
35.537681579589844
-35.350345611572266
-42.3245735168457
50.12316131591797
-42.969085693359375
52.750038146972656
45.0
your code is correct for detecting those patches, only a minor mistake is here
if cv2.contourArea(c) < 3000:
continue
reduce 3000 to 100 or below values, because your are giving a condition as contours below 3000 to be neglect
I am trying to detect the outer boundary of the circular object in the images below:
I tried OpenCV's Hough Circle, but the code is not working for every image. I also tried to adjust parameters such as minRadius and maxRadius in Hough Circle but its not working on every image.
The aim is to detect the object from the image and crop it.
Expected output:
Source code:
import imutils
import cv2
import numpy as np
from matplotlib import pyplot as plt
image = cv2.imread("path to the image i have provided")
r = 600.0 / image.shape[1]
dim = (600, int(image.shape[0] * r))
resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
cv2.imwrite("path to were we want to save downscaled image", resized)
image = cv2.imread('path of downscaled image')
image1 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image2 = cv2.GaussianBlur(image1, (5, 5), 0)
edged = cv2.Canny(image2, 30, 150)
img = cv2.medianBlur(image2,5)
cimg = cv2.cvtColor(img,cv2.COLOR_GRAY2BGR)
circles = cv2.HoughCircles(edged,cv2.HOUGH_GRADIENT,1,20,
param1=50,param2=30,minRadius=200,maxRadius=280)
circles = np.uint16(np.around(circles))
max_circle = max(circles[0,:], key=lambda x:x[2])
# print(max_circle)
# # Create mask
height,width = image1.shape
mask = np.zeros((height,width), np.uint8)
for i in [max_circle]:
cv2.circle(mask,(i[0],i[1]),i[2],(255,255,255),thickness=-1)
masked_data = cv2.bitwise_and(image, image, mask=mask)
_,thresh = cv2.threshold(mask,1,255,cv2.THRESH_BINARY)
# Find Contour
contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[0]
x,y,w,h = cv2.boundingRect(contours[0])
# Crop masked_data
crop = masked_data[y:y+h,x:x+w]
#Code to close Window
cv2.imshow('OG',image)
cv2.imshow('Cropped ROI',crop)
cv2.imwrite("path to save roi image", crop)
cv2.waitKey(0)
cv2.destroyAllWindows()
Second Answer: an approach based on color segmentation.
While I was editing the question to improve it's readability and was inserting and resizing all the images from the link you shared to make it easier for everyone to visualize what you are trying to do, it occurred to me that this problem might be a better candidate for an approach based on segmentation by color:
This simpler (but clever) approach assumes that the reel appears pretty much in the same location and has more or less the same dimensions every time:
To discover the approximate color of the reel in the image, define a list of Regions of Interest (ROIs) to sample pixels from and determine the min and max color of that area in the HSV color space. The location and size of the ROI are values derived from the size of the image. In the images below, you can see the ROIs as draw as blue-ish rectangles:
Once the min and max HSV colors have been found, a threshold operation with cv2.inRange() can be executed to segment the reel:
Then, iterate though all the contours in the binary image and assume that the largest one represents the reel. Use this contour and draw it in a separate mask to be able to extract the pixels from original image:
At this stage, it is also possible to compute a bounding box for the contour and extract it's precise location to be able to perform a crop operation later and completely isolate the reel in the image:
This approach works for EVERY image shared on the question.
Source code:
import cv2
import numpy as np
import sys
# initialize global H, S, V values
min_global_h = 179
min_global_s = 255
min_global_v = 255
max_global_h = 0
max_global_s = 0
max_global_v = 0
# load input image from the cmd-line
filename = sys.argv[1]
img = cv2.imread(sys.argv[1])
if (img is None):
print('!!! Failed imread')
sys.exit(-1)
# create an auxiliary image for debugging purposes
dbg_img = img.copy()
# initiailize a list of Regions of Interest that need to be scanned to identify good HSV values to threhsold by color
w = img.shape[1]
h = img.shape[0]
roi_w = int(w * 0.10)
roi_h = int(h * 0.10)
roi_list = []
roi_list.append( (int(w*0.25), int(h*0.15), roi_w, roi_h) )
roi_list.append( (int(w*0.25), int(h*0.60), roi_w, roi_h) )
# convert image to HSV color space
hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# iterate through the ROIs to determine the min/max HSV color of the reel
for rect in roi_list:
x, y, w, h = rect
x2 = x + w
y2 = y + h
print('ROI rect=', rect)
cropped_hsv_img = hsv_img[y:y+h, x:x+w]
h, s, v = cv2.split(cropped_hsv_img)
min_h = np.min(h)
min_s = np.min(s)
min_v = np.min(v)
if (min_h < min_global_h):
min_global_h = min_h
if (min_s < min_global_s):
min_global_s = min_s
if (min_v < min_global_v):
min_global_v = min_v
max_h = np.max(h)
max_s = np.max(s)
max_v = np.max(v)
if (max_h > max_global_h):
max_global_h = max_h
if (max_s > max_global_s):
max_global_s = max_s
if (max_v > max_global_v):
max_global_v = max_v
# debug: draw ROI in original image
cv2.rectangle(dbg_img, (x, y), (x2, y2), (255,165,0), 4) # red
cv2.imshow('ROIs', cv2.resize(dbg_img, dsize=(0, 0), fx=0.5, fy=0.5))
#cv2.waitKey(0)
cv2.imwrite(filename[:-4] + '_rois.png', dbg_img)
# define min/max color for threshold
low_hsv = np.array([min_h, min_s, min_v])
max_hsv = np.array([max_h, max_s, max_v])
#print('low_hsv=', low_hsv)
#print('max_hsv=', max_hsv)
# threshold image by color
img_bin = cv2.inRange(hsv_img, low_hsv, max_hsv)
cv2.imshow('binary', cv2.resize(img_bin, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_binary.png', img_bin)
#cv2.imshow('img_bin', cv2.resize(img_bin, dsize=(0, 0), fx=0.5, fy=0.5))
#cv2.waitKey(0)
# create a mask to store the contour of the reel (hopefully)
mask = np.zeros((img_bin.shape[0], img_bin.shape[1]), np.uint8)
crop_x, crop_y, crop_w, crop_h = (0, 0, 0, 0)
# iterate throw all the contours in the binary image:
# assume that the first contour with an area larger than 100k belongs to the reel
contours, hierarchy = cv2.findContours(img_bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
area = cv2.contourArea(contours[contourIdx])
print('contourIdx=', contourIdx, 'area=', area)
# draw potential reel blob on the mask (in white)
if (area > 100000):
crop_x, crop_y, crop_w, crop_h = cv2.boundingRect(cnt)
centers, radius = cv2.minEnclosingCircle(cnt)
cv2.circle(mask, (int(centers[0]), int(centers[1])), int(radius), (255), -1) # fill with white
break
cv2.imshow('mask', cv2.resize(mask, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_mask.png', mask)
# copy just the reel area into its own image
reel_img = cv2.bitwise_and(img, img, mask=mask)
cv2.imshow('reel_img', cv2.resize(reel_img, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_reel.png', reel_img)
# crop the reel to a smaller image
if (crop_w != 0 and crop_h != 0):
cropped_reel_img = reel_img[crop_y:crop_y+crop_h, crop_x:crop_x+crop_w]
cv2.imshow('cropped_reel_img', cv2.resize(cropped_reel_img, dsize=(0, 0), fx=0.5, fy=0.5))
output_filename = filename[:-4] + '_crop.png'
cv2.imwrite(output_filename, cropped_reel_img)
cv2.waitKey(0)
First answer: an approach based on pre-processing the image and executing an adaptiveThreshold operation.
There might be other ways of solving this problem that are not based on Hough Circles. Here is the result of an approach that is not:
Preprocess the image! Decreasing the size of the image and executing a blur helps with segmentation:
The segmentation method uses a cv2.adaptiveThreshold() to create a binary image that preserves the most important objects: the center of the reel and the external edge of the reel. This is an important step since we are only interested in what exists between these two objects. However, life is not perfect and neither is this segmentation. The shadow of reel on the table became part of the binary objects detected. Also, the outer edge is not fully connected as you can see on the resulting image on the right (look at the top left of the circumference):
To join broken segments, a morphological operation can be executed:
Finally, the entire reel area can be exposed by iterating through the contours of the image above and discarding those whose area is larger than what is expected for a reel. The resulting binary image (on the left) can then be used as a mask to identify the reel location on the original image:
Keep in mind that I'm not trying to find an universal solution for your problem. I'm merely showing that there might be other solutions that don't depend on Hough Circles.
Also, this code might need some adjustments to work on a larger number of cases.
Source code:
import cv2
import numpy as np
import sys
img = cv2.imread("test_images/reel.jpg")
if (img is None):
print('!!! Failed imread')
sys.exit(-1)
# create output image
output_img = img.copy()
# 1. Preprocess the image: downscale to speed up processing and execute a blur
SCALE_FACTOR = 0.5
smaller_img = cv2.resize(img, dsize=(0, 0), fx=SCALE_FACTOR, fy=SCALE_FACTOR)
blur_img = cv2.medianBlur(smaller_img, 9)
cv2.imwrite('reel1_blur_img.png', blur_img)
# 2. Segment the image to identify the 2 most important contours: the center of the reel and the outter edge
gray_img = cv2.cvtColor(blur_img, cv2.COLOR_BGR2GRAY)
img_bin = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 19, 4)
cv2.imwrite('reel2_img_bin.png', img_bin)
green_mask = np.zeros((img_bin.shape[0], img_bin.shape[1]), np.uint8)
#green_mask = cv2.cvtColor(img_bin, cv2.COLOR_GRAY2RGB) # debug
contours, hierarchy = cv2.findContours(img_bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(contours[contourIdx])
#print('contourIdx=', contourIdx, 'w=', w, 'h=', h, 'area=', area)
# filter out tiny segments
if (area < 5000):
#cv2.fillPoly(green_mask, pts=[cnt], color=(0, 0, 255)) # red
continue
# draw green contour (filled)
#cv2.fillPoly(green_mask, pts=[cnt], color=(0, 255, 0)) # green
cv2.fillPoly(green_mask, pts=[cnt], color=(255)) # white
# debug:
#cv2.imshow('green_mask', green_mask)
#cv2.waitKey(0)
cv2.imshow('green_mask', green_mask)
cv2.imwrite('reel2_green_mask.png', green_mask)
# 3. Fix mask: join segments nearby
kernel = np.ones((3,3), np.uint8)
img_dilation = cv2.dilate(green_mask, kernel, iterations=1)
green_mask = cv2.erode(img_dilation, kernel, iterations=1)
cv2.imshow('fixed green_mask', green_mask)
cv2.imwrite('reel3_img.png', green_mask)
# 4. Extract the reel area from the green mask
reel_mask = np.zeros((green_mask.shape[0], green_mask.shape[1]), np.uint8)
#reel_mask = cv2.cvtColor(green_mask, cv2.COLOR_GRAY2RGB) # debug
contours, hierarchy = cv2.findContours(green_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(contours[contourIdx])
print('contourIdx=', contourIdx, 'w=', w, 'h=', h, 'area=', area)
# filter out smaller segments
if (area > 110000):
#cv2.fillPoly(reel_mask, pts=[cnt], color=(0, 0, 255)) # red
continue
# draw green contour (filled)
#cv2.fillPoly(reel_mask, pts=[cnt], color=(0, 255, 0)) # green
cv2.fillPoly(reel_mask, pts=[cnt], color=(255)) # white
# debug:
#cv2.imshow('reel_mask', reel_mask)
#cv2.waitKey(0)
cv2.imshow('reel_mask', reel_mask)
cv2.imwrite('reel4_reel_mask.png', reel_mask)
# 5. Draw the reel area on the original image
contours, hierarchy = cv2.findContours(reel_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
centers, radius = cv2.minEnclosingCircle(cnt)
# rescale these values back to the original image size
centers_orig = (centers[0] // SCALE_FACTOR, centers[1] // SCALE_FACTOR)
radius_orig = radius // SCALE_FACTOR
print('centers=', centers_orig, 'radius=', radius_orig)
cv2.circle(output_img, (int(centers_orig[0]), int(centers_orig[1])), int(radius_orig), (128,0,255), 5) # magenta
cv2.imshow('output_img', output_img)
cv2.imwrite('reel5_output.png', output_img)
# display just the pixels from the original image
larger_reel_mask = cv2.resize(reel_mask, (int(img.shape[1]), int(img.shape[0])))
output_reel_img = cv2.bitwise_and(img, img, mask=larger_reel_mask)
cv2.imshow('output_reel_img', output_reel_img)
cv2.imwrite('reel5_output_reel.png', output_reel_img)
cv2.waitKey(0)
At this point, its possible to use larger_reel_maskand compute a minimal enclosing circle, draw it over this mask to make it a little bit more round and allow us to retrieve the area of the reel more accurately:
But the 4 lines of code that achieve this improvement I leave as an exercise for the reader.
I am working with an image with distorted/rotated texts. I need to rotate these text blobs back to the horizontal level (0 degrees) before I can run OCR on them. I managed to fix the rotation issue but now I need to find a way to copy the contents of the original contour to the rotated matrix.
Here are a few things I've done to extract and fix the rotation issue:
Find contour
Heavy dilation and remove non-text lines
Find the contour angle and do angle correction in the polar space.
I have tried using affine transformation to rotate the rectangle text blobs but it ended up cropping out some of the texts because some of the text blobs are irregular. Result here
Blue dots in the contours are centroids, the numbers are contour angles. How can I copy the content of unrotated contour, rotate them and copy to a new image?
Code
def getContourCenter(contour):
M = cv2.moments(contour)
if M["m00"] != 0:
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
else:
return 0, 0
return int(cx), int(cy)
def rotateContour(contour, center: tuple, angle: float):
def cart2pol(x, y):
theta = np.arctan2(y, x)
rho = np.hypot(x, y)
return theta, rho
def pol2cart(theta, rho):
x = rho * np.cos(theta)
y = rho * np.sin(theta)
return x, y
# Translating the contour by subtracting the center with all the points
norm = contour - [center[0], center[1]]
# Convert the points to polar co-ordinates, add the rotation, and convert it back to Cartesian co-ordinates.
coordinates = norm[:, 0, :]
xs, ys = coordinates[:, 0], coordinates[:, 1]
thetas, rhos = cart2pol(xs, ys)
thetas = np.rad2deg(thetas)
thetas = (thetas + angle) % 360
thetas = np.deg2rad(thetas)
# Convert the new polar coordinates to cartesian co-ordinates
xs, ys = pol2cart(thetas, rhos)
norm[:, 0, 0] = xs
norm[:, 0, 1] = ys
rotated = norm + [center[0], center[1]]
rotated = rotated.astype(np.int32)
return rotated
def straightenText(image, vis):
# create a new mat
mask = 0*np.ones([image.shape[0], image.shape[1], 3], dtype=np.uint8)
# invert pixel index arrangement and dilate aggressively
dilate = cv2.dilate(~image, ImageUtils.box(33, 1))
# find contours
_, contours, hierarchy = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for contour in contours:
[x, y, w, h] = cv2.boundingRect(contour)
if w > h:
# find contour angle and centers
(x, y), (w, h), angle = cv2.minAreaRect(contour)
cx, cy = getContourCenter(contour)
# fix angle returned
if w < h:
angle = 90 + angle
# fix contour angle
rotatedContour = rotateContour(contour, (cx, cy), 0-angle)
cv2.drawContours(vis, contour, -1, (0, 255, 0), 2)
cv2.drawContours(mask, rotatedContour, -1, (255, 0, 0), 2)
cv2.circle(vis, (cx, cy), 2, (0, 0, 255), 2, 8) # centroid
cv2.putText(vis, str(round(angle, 2)), (cx, cy), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255,0,0), 2)
Here is one way, which is the simplest way I can think to do it in Python/OpenCV, though may not be optimal in speed.
Create a white empty image for the desired output. (So we have black text on white background in case you need to do OCR)
Get the rotated bounding rectangle of your contour in your input.
Get the normal bounding rectangle of your contour in the output.
Get the 4 bounding box corners for each.
Compute an affine transform matrix between the two sets of 4 corner
points.
Warp the (whole) input image to the same size (non-optimal).
Use the output bounding box dimensions and upper left corner with
numpy slicing to transfer the region in the warped image to the same
region in the white output image.
Repeat for each text contour using the resulting image in place of
the original white image as the new destintination image.
So here is a simulation to show you how.
Source Text Image:
Source Text Image with Red Rotated Rectangle:
Desired Bounding Rectangle in White Destination Image:
Text Transferred To White Image into Desired Rectangle Region:
Code:
import cv2
import numpy as np
# Read source text image.
src = cv2.imread('text_on_white.png')
hs, ws, cs = src.shape
# Read same text image with red rotated bounding box drawn.
src2 = cv2.imread('text2_on_white.png')
# Read white image showing desired output bounding box.
src2 = cv2.imread('text2_on_white.png')
# create white destination image
dst = np.full((hs,ws,cs), (255,255,255), dtype=np.uint8)
# define coordinates of bounding box in src
src_pts = np.float32([[51,123], [298,102], [300,135], [54,157]])
# size and placement of text in dst is (i.e. bounding box):
xd = 50
yd = 200
wd = 249
hd = 123
dst_pts = np.float32([[50,200], [298,200], [298,234], [50,234]])
# get rigid affine transform (no skew)
# use estimateRigidTransform rather than getAffineTransform so can use all 4 points
matrix = cv2.estimateRigidTransform(src_pts, dst_pts, 0)
# warp the source image
src_warped = cv2.warpAffine(src, matrix, (ws,hs), cv2.INTER_AREA, borderValue=(255,255,255))
# do numpy slicing on warped source and place in white destination
dst[yd:yd+hd, xd:xd+wd] = src_warped[yd:yd+hd, xd:xd+wd]
# show results
cv2.imshow('SRC', src)
cv2.imshow('SRC2', src2)
cv2.imshow('SRC_WARPED', src_warped)
cv2.imshow('DST', dst)
cv2.waitKey(0)
cv2.destroyAllWindows()
# save results
cv2.imwrite('text_on_white_transferred.png', dst)
To extract ONLY the content of a single contour, and not its larger bounding box, you can create a mask by drawing a filled contour and then applying that to the original image. In your case you would need something like this:
# prepare the target image
resX,resY = image.shape[1],image.shape[0]
target = np.zeros((resY, resX , 3), dtype=np.uint8)
target.fill(255) # make it entirely white
# find the contours
allContours,hierarchy = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# then perform rotation, etc, per contour
for contour in allContours:
# create empty mask
mask = np.zeros((resY, resX , 1), dtype=np.uint8)
# draw the contour filled into the mask
cv2.drawContours(mask, [contour], -1, (255), thickness=cv2.FILLED)
# copy the relevant part into a new image
# (you might want to use bounding box here for more efficiency)
single = cv2.bitwise_and(image, image, mask=mask)
# then apply your rotation operations both on the mask and the result
single = doContourSpecificOperation(single)
mask = doContourSpecificOperation(mask)
# then, put the result into your target image (which was originally white)
target = cv2.bitwise_and(target, single, mask=mask)