Related
I'm trying to extract the corner points of a rectangular section containing bubbles from an OMR sheet so I can later use those points to warpPerspective to get bird's eye view on that section but I am not getting expected results.
Following is the OMR sheet image :- OMRsheet.jpg
Code :-
import cv2
import numpy as np
def extract_rect(contours): #Function to extract rectangular contours above a certain area unit
rect_contours = []
for c in contours:
if cv2.contourArea(c) > 10000:
perimeter = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02*perimeter, True) #approximates a curve or a polygon with another curve/polygon with less vertices so that the distance between them is less or equal to the specified precision. Uses Douglas-Peucker algorithm
if len(approx) == 4:
rect_contours.append(c)
rect_contours = sorted(rect_contours, key=cv2.contourArea,reverse=True) # Sorting the contours based on area from large to small
return rect_contours
def rect_points(rect_contour): #Function to find corner points of the contour passed #Something wrong with this Not giving expected results. Messing up the warping of the image
perimeter = cv2.arcLength(rect_contour, True)
approx = cv2.approxPolyDP(rect_contour, 0.02*perimeter, True)
print("APPROX")
print(type(approx))
print(approx)
cv2.drawContours(img, approx, -1, (100,10,55), 18) #Rechecking if cotour passed to this function is the correct one
cv2.drawContours(img, rect_contour, -1, (100,10,55), 1)
x, y, w, h = cv2.boundingRect(rect_contour) #I Suspect Logical error in this line as it returns corner points for the outer rectangle instead of the contour passed to it
print("printing x y w h")
print(x, y, w, h)
# Corner points of the rectangle further used to be used to warp the rectangular section
point_1 = np.array([x, y])
point_2 = np.array([x+w, y])
point_3 = np.array([x, y+h])
point_4 = np.array([w, h])
corner_list = np.ndarray(shape=(4,2), dtype=np.int32)
np.append(corner_list, point_1)
np.append(corner_list, point_2)
np.append(corner_list, point_3)
np.append(corner_list, point_4)
print("corners list")
print(corner_list)
myPointsNew = np.zeros((4, 1, 2), np.int32)
add = corner_list.sum(1)
# print(add)
# print(np.argmax(add))
myPointsNew[0] = corner_list[np.argmin(add)] #[0,0] #Setting up points in a coordinate system
myPointsNew[3] = corner_list[np.argmax(add)] #[w,h]
diff = np.diff(corner_list, axis=1)
myPointsNew[1] = corner_list[np.argmin(diff)] #[w,0]
myPointsNew[2] = corner_list[np.argmax(diff)] #[h,0]
print("mypointsnew")
print(myPointsNew.shape)
return myPointsNew
img_path = 'OMRsheet.jpg'
img = cv2.imread(img_path)
img_width = 700
img_height = 700
img = cv2.resize(img, (img_width, img_height), interpolation=cv2.INTER_AREA)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blur = cv2.GaussianBlur(img_gray, (5,5), 0) # blurred image
img_canny = cv2.Canny(img_blur, 20, 110) # Edge detection on processed image using Canny edge detection , binary thresholding could have been an alternative (i.e If the pixel value is smaller than the threshold, it is set to 0, otherwise it is set to a maximum value. )
contours, heirarchy = cv2.findContours(img_canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) # Find contours.
#parameters are (input_image, retrieval_mode, approximation_method)
img_contours = img.copy()
cv2.drawContours(img_contours, contours, -1, (0,255,0), 1) #parameters are (image, contours, countour_idx, contour_color, contour_thickness) . contour_idx is -1 for all contours
cv2.imshow('Contours', img_contours)
rect_contours = extract_rect(contours)
cv2.drawContours(img, rect_contours[1], -1, (0,255,0), 1)
rect_2 = rect_points(rect_contours[1])
cv2.drawContours(img, rect_2, -1, (0,0,255), 12)
warp_img_width = int(img_width/1.2)
warp_img_height = int(img_height/1.2)
warp_from = np.float32(rect_2)
warp_to = np.float32([[0,0], [warp_img_width, 0], [0, warp_img_height], [warp_img_width, warp_img_height]])
transformation_matrix = cv2.getPerspectiveTransform(warp_from, warp_to)
img_warp = cv2.warpPerspective(img, transformation_matrix, (warp_img_height, warp_img_height))
cv2.imshow('Warped Perspective', img_warp)
cv2.imshow('Original', img)
cv2.waitKey(0)
Output for cv2.imshow('Original', img) :- OMRsheet_contours.jpg
Output for cv2.imshow('Warped Perspective', img_warp) :-Bird's Eye perspective.jpg
EXPECTED Output for cv2.imshow('Warped Perspective', img_warp) :- Expected Bird's eye.jpg
Instead of getting warped perspective of the section containing only bubbles I am getting warped perspective for the whole paper which means either the points returned by rect_points function or the contour passed to the function i.e rect_contours[1] must have a mistake. The latter seemed to be fine as suggested after drawing contour lines for the contour passed to rect_points function. I suspect x, y, w, h = cv2.boundingRect(rect_contour) is returning incorrect points.
Any idea on how I could solve this problem and get the Expected Bird's eye.jpg ?
I'm deskewing an image containing a number using Projection profile based skew estimation algorithm and extracting it through OCR.
In order to calculate the correct skew angle, we compare the maximum difference between peaks and using this skew angle, thus rotate the image using the correct angle to correct the skew.
Each image (which has a single number) has prefixed background and foreground (number) color:
If the first pixel of the background is black, the foreground is dark gray.
If the first pixel of the background is white, the foreground is dark white.
Here are some sample images:
which all of them get successfully deskewed to these:
After it's been deskewed, I've tried with no luck to improve image quality to let OCR (PyTesseract) recognize the numbers.
import cv2
import numpy as np
import scipy.ndimage
import pytesseract
from PIL import Image, ImageEnhance, ImageFilter
from scipy.ndimage import interpolation as inter
def correct_skew(image, delta=6, limit=150):
def determine_score(arr, angle):
data = inter.rotate(arr, angle, reshape=False, order=0)
histogram = np.sum(data, axis=1)
score = np.sum((histogram[1:] - histogram[:-1]) ** 2)
return histogram, score
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.medianBlur(gray, 21)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
scores = []
angles = np.arange(-limit, limit + delta, delta)
for angle in angles:
histogram, score = determine_score(thresh, angle)
scores.append(score)
best_angle = angles[scores.index(max(scores))] + 90
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, best_angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC)
## Image processing to improve OCR accuracy
"""
#rotated = cv2.medianBlur(rotated, 20)
rotated = rotated.astype(np.float) / 255.
# Calculate channel K:
rotated = 1 - np.max(rotated, axis=2)
# Convert back to uint 8:
rotated = (255 * rotated).astype(np.uint8)
binaryThresh = 190
_, binaryImage = cv2.threshold(rotated, binaryThresh, 255, cv2.THRESH_BINARY)
binaryThresh = 190
_, binaryImage = cv2.threshold(rotated, binaryThresh, 255, cv2.THRESH_BINARY)
# Use a little bit of morphology to clean the mask:
# Set kernel (structuring element) size:
kernelSize = 3
# Set morph operation iterations:
opIterations = 2
# Get the structuring element:
morphKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernelSize, kernelSize))
# Perform closing:
rotated = cv2.morphologyEx(binaryImage, cv2.MORPH_CLOSE, morphKernel, None, None, opIterations, cv2.BORDER_REFLECT101)
"""
return best_angle, rotated
if __name__ == '__main__':
image = cv2.imread('number5.jpg')
if image[0][0][0] > 128: image = cv2.bitwise_not(image)
angle, rotated = correct_skew(image)
print(angle)
cv2.imshow('rotated', rotated)
cv2.imwrite('rotated.png', rotated)
pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
text = pytesseract.image_to_string(rotated, config="-c tessedit_char_whitelist=0123456789")
print("number:", text)
cv2.waitKey()
This code (PyTesseract) recognizes the first and the second number, while not the others. Why?
I've already tried some tweaks concerning:
Rescaling
Binarisation
Noise Removal
Dilation / Erosion
Rotation / Deskewing
Borders
Transparency / Alpha channel
For this image, I tried to use hough cirlce to find the center of the "black hole".
After playing with the parameters of cv2.HoughCircles for a long time, the following is the best I can get.
raw image:
# reproducible code for stackoverflow
import cv2
import os
import sys
from matplotlib import pyplot as plt
import numpy as np
# read image can turn it gray
img = cv2.imread(FILE)
cimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_gray = dst = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
plt.figure(figsize = (18,18))
plt.imshow(cimg, cmap = "gray")
# removing noises
element = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
closing = cv2.morphologyEx(y, cv2.MORPH_CLOSE, element, iterations = 7)
plt.figure(figsize = (12,12))
plt.imshow(closing, cmap = "gray")
# try to find the circles
circles = cv2.HoughCircles(closing,cv2.HOUGH_GRADIENT,3,50,
param1=50,param2=30,minRadius=20,maxRadius=50)
circles = np.uint16(np.around(circles))
for i in circles[0,:]:
# draw the outer circle
cv2.circle(cimg,(i[0],i[1]),i[2],(0,255,0),2)
# draw the center of the circle
cv2.circle(cimg,(i[0],i[1]),2,(0,0,255),3)
plt.figure(figsize = (12,12))
plt.imshow(cimg)
Update::
The one with Canny:
edges = cv2.Canny(closing, 100, 300)
plt.figure(figsize = (12,12))
plt.imshow(edges, cmap = "gray")
circles = cv2.HoughCircles(edges,cv2.HOUGH_GRADIENT,2,50,
param1=50,param2=30,minRadius=20,maxRadius=60)
circles = np.uint16(np.around(circles))
cimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
for i in circles[0,:]:
# draw the outer circle
cv2.circle(cimg,(i[0],i[1]),i[2],(0,255,0),2)
# draw the center of the circle
cv2.circle(cimg,(i[0],i[1]),2,(0,0,255),3)
plt.figure(figsize = (12,12))
plt.imshow(cimg)
Still not the right circle that is wanted.
Update:
#crackanddie
Sometimes there is 6 or 9 in the identity number.
The circle in 6 or 9 is not very round.
Is there any way to filter that out?
This is an alternative method if you do not want to implement or fiddle with Hough's parameters. You must be sure there's at least one circle visible in your picture. The idea is to create a segmentation mask based on the CMYK color space and filter the blobs of interest by circularity and area. These are the steps:
Convert the image from BGR to CMYK
Threshold the K channel to get a binary mask
Filter blobs by circularity and area
Approximate the filtered blobs as circles
I'm choosing the CMYK color space because the circle is mostly black. The K (key) channel (in this case - black) should do a good job of representing the blob of interest, albeit, with some noise - as usual. Let's see the code:
# Imports:
import cv2
import numpy as np
# image path
path = "D://opencvImages//"
fileName = "dyj3O.jpg"
# load image
bgr = cv2.imread(path + fileName)
Alright, we need to convert the image from BGR to CMYK. OpenCV does not offer the conversion, so we need to do it manually. The formula is very straightforward. I'm just interested on the K channel, so I just calculate it like this:
# Make float and divide by 255:
bgrFloat = bgr.astype(np.float) / 255.
# Calculate K as (1 - whatever is biggest out of bgrFloat)
kChannel = 1 - np.max(bgrFloat, axis=2)
# Convert back to uint 8:
kChannel = 255 * kChannel
kChannel = kChannel.astype(np.uint8)
Gotta keep en eye on the data types, because there are float operations going on. This is the result:
As you see, the hole is almost 100% white, that's cool, we can threshold this image via Otsu like this:
# Compute binary mask of the hole via Otsu:
_, binaryImage = cv2.threshold(kChannel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
Which gives you this nice binary mask:
Now, here comes the laborious part. Let's find contours on this image. For every contour/blob compute circularity and area. Use this info to filter noise and get the contour of interest, keep in mind that a perfect circle should have circularity close to 1.0. Once you get a contour of interest, approximate a circle to it. This is the process:
# Find the big contours/blobs on the filtered image:
contours, hierarchy = cv2.findContours(binaryImage, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
# Store the detected circles here:
detectedCircles = []
# Look for the potential contours of interest:
for _, c in enumerate(contours):
# Get the blob's area and perimeter:
contourArea = cv2.contourArea(c)
contourPerimeter = cv2.arcLength(c, True)
# Compute circularity:
if contourPerimeter > 0:
circularity = (4 * 3.1416 * contourArea) / (pow(contourPerimeter, 2))
else:
circularity = 0.0
# Set the min threshold values to identify the
# blob of interest:
minCircularity = 0.7
minArea = 2000
if circularity >= minCircularity and contourArea >= minArea:
# Approximate the contour to a circle:
(x, y), radius = cv2.minEnclosingCircle(c)
# Compute the center and radius:
center = (int(x), int(y))
# Cast radius to in:
radius = int(radius)
# Store the center and radius:
detectedCircles.append([center, radius])
# Draw the circles:
cv2.circle(bgr, center, radius, (0, 255, 0), 2)
cv2.imshow("Detected Circles", bgr)
print("Circles Found: " + str(len(detectedCircles)))
Additionally, I have stored the circle (center and radius) in the detectedCircles list. This is the final result:
Circles Found: 1
Here it is:
import numpy as np
import cv2
def threshold_gray_const(image_, rang: tuple):
return cv2.inRange(image_, rang[0], rang[1])
def binary_or(image_1, image_2):
return cv2.bitwise_or(image_1, image_2)
def negate_image(image_):
return cv2.bitwise_not(image_)
def particle_filter(image_, power):
# Abdrakov's particle filter
nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(image_, connectivity=8)
sizes = stats[1:, -1]
nb_components = nb_components - 1
min_size = power
img2 = np.zeros(output.shape, dtype=np.uint8)
for i in range(0, nb_components):
if sizes[i] >= min_size:
img_to_compare = threshold_gray_const(output, (i + 1, i + 1))
img2 = binary_or(img2, img_to_compare)
img2 = img2.astype(np.uint8)
return img2
def reject_borders(image_):
# Abdrakov's border rejecter
out_image = image_.copy()
h, w = image_.shape[:2]
for row in range(h):
if out_image[row, 0] == 255:
cv2.floodFill(out_image, None, (0, row), 0)
if out_image[row, w - 1] == 255:
cv2.floodFill(out_image, None, (w - 1, row), 0)
for col in range(w):
if out_image[0, col] == 255:
cv2.floodFill(out_image, None, (col, 0), 0)
if out_image[h - 1, col] == 255:
cv2.floodFill(out_image, None, (col, h - 1), 0)
return out_image
src = cv2.imread("your_image")
img_gray = dst = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
element = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
closing = cv2.morphologyEx(img_gray, cv2.MORPH_CLOSE, element, iterations=2)
tv, thresh = cv2.threshold(closing, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
neg = negate_image(thresh)
rej = reject_borders(neg)
filtered = particle_filter(rej, 300)
edges = cv2.Canny(filtered, 100, 200)
circles = cv2.HoughCircles(edges, cv2.HOUGH_GRADIENT, 3, 50, param1=50, param2=30, minRadius=20, maxRadius=50)
circles = np.uint16(np.around(circles))
for i in circles[0, :]:
# draw the outer circle
cv2.circle(src, (i[0], i[1]), i[2], (0, 255, 0), 2)
# draw the center of the circle
cv2.circle(src, (i[0], i[1]), 2, (0, 0, 255), 3)
cv2.imshow("closing", closing)
cv2.imshow("edges", edges)
cv2.imshow("out", src)
cv2.waitKey(0)
I changed cv2.morphologyEx parameters a bit, because they were too strong. And after this noise removing I made a binary image using cv2.THRESH_OTSU parameter, negated it, rejected borders and filtered a bit. Then I used cv2.Canny to find edges and this 'cannied' image I passed into cv2.HoughCircles. If any questions - ask me :)
If you want to use a "thinking out of the box" solution then check this solution out. Remember this might have a few false positives in some cases and would only work in cases where circle contour is complete or joined.
import numpy as np
import cv2
import matplotlib.pyplot as plt
from math import pi
pi_eps = 0.1
rgb = cv2.imread('/path/to/your/image/find_circle.jpg')
gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
th = cv2.adaptiveThreshold(gray,255, cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY_INV,21,5)
contours, hier = cv2.findContours(th.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
out_img = rgb.copy()
for i in range(len(contours)):
x,y,w,h = cv2.boundingRect(contours[i])
ar = min(w,h)/max(w,h)
# For a circle aspect ratio is close to 1.0
# In your use case circle diameter is between 40px-100px
if ar < 0.9 or \
w < 40 or w > 100:
continue
# P = 2 * PI * r
perimeter = cv2.arcLength(contours[i], True)
if perimeter == 0:
continue
# Second level confirmation could be done using PI = P * P / (4 * A)
# A = PI * r * r
area = cv2.contourArea(contours[i])
if area == 0:
continue
# d = (w+h) / 2 average diameter
# A contour is a circle if (P / d) = PI
ctr_pi = perimeter / ((w+h) / 2)
if abs(ctr_pi - pi) < pi_eps * pi:
cv2.circle(out_img, (int(x+w/2), int(y+h/2)), int(max(w,h)/2), (0, 255, 0), 1)
print("Center of the circle: ", x + w/2, y+h/2)
plt.imshow(out_img)
I am trying to detect the outer boundary of the circular object in the images below:
I tried OpenCV's Hough Circle, but the code is not working for every image. I also tried to adjust parameters such as minRadius and maxRadius in Hough Circle but its not working on every image.
The aim is to detect the object from the image and crop it.
Expected output:
Source code:
import imutils
import cv2
import numpy as np
from matplotlib import pyplot as plt
image = cv2.imread("path to the image i have provided")
r = 600.0 / image.shape[1]
dim = (600, int(image.shape[0] * r))
resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
cv2.imwrite("path to were we want to save downscaled image", resized)
image = cv2.imread('path of downscaled image')
image1 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image2 = cv2.GaussianBlur(image1, (5, 5), 0)
edged = cv2.Canny(image2, 30, 150)
img = cv2.medianBlur(image2,5)
cimg = cv2.cvtColor(img,cv2.COLOR_GRAY2BGR)
circles = cv2.HoughCircles(edged,cv2.HOUGH_GRADIENT,1,20,
param1=50,param2=30,minRadius=200,maxRadius=280)
circles = np.uint16(np.around(circles))
max_circle = max(circles[0,:], key=lambda x:x[2])
# print(max_circle)
# # Create mask
height,width = image1.shape
mask = np.zeros((height,width), np.uint8)
for i in [max_circle]:
cv2.circle(mask,(i[0],i[1]),i[2],(255,255,255),thickness=-1)
masked_data = cv2.bitwise_and(image, image, mask=mask)
_,thresh = cv2.threshold(mask,1,255,cv2.THRESH_BINARY)
# Find Contour
contours = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)[0]
x,y,w,h = cv2.boundingRect(contours[0])
# Crop masked_data
crop = masked_data[y:y+h,x:x+w]
#Code to close Window
cv2.imshow('OG',image)
cv2.imshow('Cropped ROI',crop)
cv2.imwrite("path to save roi image", crop)
cv2.waitKey(0)
cv2.destroyAllWindows()
Second Answer: an approach based on color segmentation.
While I was editing the question to improve it's readability and was inserting and resizing all the images from the link you shared to make it easier for everyone to visualize what you are trying to do, it occurred to me that this problem might be a better candidate for an approach based on segmentation by color:
This simpler (but clever) approach assumes that the reel appears pretty much in the same location and has more or less the same dimensions every time:
To discover the approximate color of the reel in the image, define a list of Regions of Interest (ROIs) to sample pixels from and determine the min and max color of that area in the HSV color space. The location and size of the ROI are values derived from the size of the image. In the images below, you can see the ROIs as draw as blue-ish rectangles:
Once the min and max HSV colors have been found, a threshold operation with cv2.inRange() can be executed to segment the reel:
Then, iterate though all the contours in the binary image and assume that the largest one represents the reel. Use this contour and draw it in a separate mask to be able to extract the pixels from original image:
At this stage, it is also possible to compute a bounding box for the contour and extract it's precise location to be able to perform a crop operation later and completely isolate the reel in the image:
This approach works for EVERY image shared on the question.
Source code:
import cv2
import numpy as np
import sys
# initialize global H, S, V values
min_global_h = 179
min_global_s = 255
min_global_v = 255
max_global_h = 0
max_global_s = 0
max_global_v = 0
# load input image from the cmd-line
filename = sys.argv[1]
img = cv2.imread(sys.argv[1])
if (img is None):
print('!!! Failed imread')
sys.exit(-1)
# create an auxiliary image for debugging purposes
dbg_img = img.copy()
# initiailize a list of Regions of Interest that need to be scanned to identify good HSV values to threhsold by color
w = img.shape[1]
h = img.shape[0]
roi_w = int(w * 0.10)
roi_h = int(h * 0.10)
roi_list = []
roi_list.append( (int(w*0.25), int(h*0.15), roi_w, roi_h) )
roi_list.append( (int(w*0.25), int(h*0.60), roi_w, roi_h) )
# convert image to HSV color space
hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# iterate through the ROIs to determine the min/max HSV color of the reel
for rect in roi_list:
x, y, w, h = rect
x2 = x + w
y2 = y + h
print('ROI rect=', rect)
cropped_hsv_img = hsv_img[y:y+h, x:x+w]
h, s, v = cv2.split(cropped_hsv_img)
min_h = np.min(h)
min_s = np.min(s)
min_v = np.min(v)
if (min_h < min_global_h):
min_global_h = min_h
if (min_s < min_global_s):
min_global_s = min_s
if (min_v < min_global_v):
min_global_v = min_v
max_h = np.max(h)
max_s = np.max(s)
max_v = np.max(v)
if (max_h > max_global_h):
max_global_h = max_h
if (max_s > max_global_s):
max_global_s = max_s
if (max_v > max_global_v):
max_global_v = max_v
# debug: draw ROI in original image
cv2.rectangle(dbg_img, (x, y), (x2, y2), (255,165,0), 4) # red
cv2.imshow('ROIs', cv2.resize(dbg_img, dsize=(0, 0), fx=0.5, fy=0.5))
#cv2.waitKey(0)
cv2.imwrite(filename[:-4] + '_rois.png', dbg_img)
# define min/max color for threshold
low_hsv = np.array([min_h, min_s, min_v])
max_hsv = np.array([max_h, max_s, max_v])
#print('low_hsv=', low_hsv)
#print('max_hsv=', max_hsv)
# threshold image by color
img_bin = cv2.inRange(hsv_img, low_hsv, max_hsv)
cv2.imshow('binary', cv2.resize(img_bin, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_binary.png', img_bin)
#cv2.imshow('img_bin', cv2.resize(img_bin, dsize=(0, 0), fx=0.5, fy=0.5))
#cv2.waitKey(0)
# create a mask to store the contour of the reel (hopefully)
mask = np.zeros((img_bin.shape[0], img_bin.shape[1]), np.uint8)
crop_x, crop_y, crop_w, crop_h = (0, 0, 0, 0)
# iterate throw all the contours in the binary image:
# assume that the first contour with an area larger than 100k belongs to the reel
contours, hierarchy = cv2.findContours(img_bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
area = cv2.contourArea(contours[contourIdx])
print('contourIdx=', contourIdx, 'area=', area)
# draw potential reel blob on the mask (in white)
if (area > 100000):
crop_x, crop_y, crop_w, crop_h = cv2.boundingRect(cnt)
centers, radius = cv2.minEnclosingCircle(cnt)
cv2.circle(mask, (int(centers[0]), int(centers[1])), int(radius), (255), -1) # fill with white
break
cv2.imshow('mask', cv2.resize(mask, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_mask.png', mask)
# copy just the reel area into its own image
reel_img = cv2.bitwise_and(img, img, mask=mask)
cv2.imshow('reel_img', cv2.resize(reel_img, dsize=(0, 0), fx=0.5, fy=0.5))
cv2.imwrite(filename[:-4] + '_reel.png', reel_img)
# crop the reel to a smaller image
if (crop_w != 0 and crop_h != 0):
cropped_reel_img = reel_img[crop_y:crop_y+crop_h, crop_x:crop_x+crop_w]
cv2.imshow('cropped_reel_img', cv2.resize(cropped_reel_img, dsize=(0, 0), fx=0.5, fy=0.5))
output_filename = filename[:-4] + '_crop.png'
cv2.imwrite(output_filename, cropped_reel_img)
cv2.waitKey(0)
First answer: an approach based on pre-processing the image and executing an adaptiveThreshold operation.
There might be other ways of solving this problem that are not based on Hough Circles. Here is the result of an approach that is not:
Preprocess the image! Decreasing the size of the image and executing a blur helps with segmentation:
The segmentation method uses a cv2.adaptiveThreshold() to create a binary image that preserves the most important objects: the center of the reel and the external edge of the reel. This is an important step since we are only interested in what exists between these two objects. However, life is not perfect and neither is this segmentation. The shadow of reel on the table became part of the binary objects detected. Also, the outer edge is not fully connected as you can see on the resulting image on the right (look at the top left of the circumference):
To join broken segments, a morphological operation can be executed:
Finally, the entire reel area can be exposed by iterating through the contours of the image above and discarding those whose area is larger than what is expected for a reel. The resulting binary image (on the left) can then be used as a mask to identify the reel location on the original image:
Keep in mind that I'm not trying to find an universal solution for your problem. I'm merely showing that there might be other solutions that don't depend on Hough Circles.
Also, this code might need some adjustments to work on a larger number of cases.
Source code:
import cv2
import numpy as np
import sys
img = cv2.imread("test_images/reel.jpg")
if (img is None):
print('!!! Failed imread')
sys.exit(-1)
# create output image
output_img = img.copy()
# 1. Preprocess the image: downscale to speed up processing and execute a blur
SCALE_FACTOR = 0.5
smaller_img = cv2.resize(img, dsize=(0, 0), fx=SCALE_FACTOR, fy=SCALE_FACTOR)
blur_img = cv2.medianBlur(smaller_img, 9)
cv2.imwrite('reel1_blur_img.png', blur_img)
# 2. Segment the image to identify the 2 most important contours: the center of the reel and the outter edge
gray_img = cv2.cvtColor(blur_img, cv2.COLOR_BGR2GRAY)
img_bin = cv2.adaptiveThreshold(gray_img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 19, 4)
cv2.imwrite('reel2_img_bin.png', img_bin)
green_mask = np.zeros((img_bin.shape[0], img_bin.shape[1]), np.uint8)
#green_mask = cv2.cvtColor(img_bin, cv2.COLOR_GRAY2RGB) # debug
contours, hierarchy = cv2.findContours(img_bin, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(contours[contourIdx])
#print('contourIdx=', contourIdx, 'w=', w, 'h=', h, 'area=', area)
# filter out tiny segments
if (area < 5000):
#cv2.fillPoly(green_mask, pts=[cnt], color=(0, 0, 255)) # red
continue
# draw green contour (filled)
#cv2.fillPoly(green_mask, pts=[cnt], color=(0, 255, 0)) # green
cv2.fillPoly(green_mask, pts=[cnt], color=(255)) # white
# debug:
#cv2.imshow('green_mask', green_mask)
#cv2.waitKey(0)
cv2.imshow('green_mask', green_mask)
cv2.imwrite('reel2_green_mask.png', green_mask)
# 3. Fix mask: join segments nearby
kernel = np.ones((3,3), np.uint8)
img_dilation = cv2.dilate(green_mask, kernel, iterations=1)
green_mask = cv2.erode(img_dilation, kernel, iterations=1)
cv2.imshow('fixed green_mask', green_mask)
cv2.imwrite('reel3_img.png', green_mask)
# 4. Extract the reel area from the green mask
reel_mask = np.zeros((green_mask.shape[0], green_mask.shape[1]), np.uint8)
#reel_mask = cv2.cvtColor(green_mask, cv2.COLOR_GRAY2RGB) # debug
contours, hierarchy = cv2.findContours(green_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
x, y, w, h = cv2.boundingRect(cnt)
area = cv2.contourArea(contours[contourIdx])
print('contourIdx=', contourIdx, 'w=', w, 'h=', h, 'area=', area)
# filter out smaller segments
if (area > 110000):
#cv2.fillPoly(reel_mask, pts=[cnt], color=(0, 0, 255)) # red
continue
# draw green contour (filled)
#cv2.fillPoly(reel_mask, pts=[cnt], color=(0, 255, 0)) # green
cv2.fillPoly(reel_mask, pts=[cnt], color=(255)) # white
# debug:
#cv2.imshow('reel_mask', reel_mask)
#cv2.waitKey(0)
cv2.imshow('reel_mask', reel_mask)
cv2.imwrite('reel4_reel_mask.png', reel_mask)
# 5. Draw the reel area on the original image
contours, hierarchy = cv2.findContours(reel_mask, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contourIdx, cnt in enumerate(contours):
centers, radius = cv2.minEnclosingCircle(cnt)
# rescale these values back to the original image size
centers_orig = (centers[0] // SCALE_FACTOR, centers[1] // SCALE_FACTOR)
radius_orig = radius // SCALE_FACTOR
print('centers=', centers_orig, 'radius=', radius_orig)
cv2.circle(output_img, (int(centers_orig[0]), int(centers_orig[1])), int(radius_orig), (128,0,255), 5) # magenta
cv2.imshow('output_img', output_img)
cv2.imwrite('reel5_output.png', output_img)
# display just the pixels from the original image
larger_reel_mask = cv2.resize(reel_mask, (int(img.shape[1]), int(img.shape[0])))
output_reel_img = cv2.bitwise_and(img, img, mask=larger_reel_mask)
cv2.imshow('output_reel_img', output_reel_img)
cv2.imwrite('reel5_output_reel.png', output_reel_img)
cv2.waitKey(0)
At this point, its possible to use larger_reel_maskand compute a minimal enclosing circle, draw it over this mask to make it a little bit more round and allow us to retrieve the area of the reel more accurately:
But the 4 lines of code that achieve this improvement I leave as an exercise for the reader.
I'm trying to use Open CV to scale down numbers in an image. I currently am able to identify the contours but I am having trouble figuring out how to scale down the numbers once I have identified them.
Here is an example image:
Here are the contours I have identified:
Here is the code I am using to achieve this:
import cv2
image = cv2.imread("numbers.png")
edged = cv2.Canny(image, 10, 250)
# applying closing function
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
closed = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, kernel)
_, cnts,_ = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
contours = []
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
contours.append(approx)
cv2.drawContours(image, [approx], -1, (0, 255, 0), 2)
cv2.imshow("Output", image)
cv2.waitKey(0)
I want to be able to use the contours to scale down the numbers without affecting the size of the image. Is this possible? Thanks!
Assuming you have an input image named "numbers.png".
First of all, import useful libraries and load the input image:
import cv2
import numpy as np
img = cv2.imread("./numbers.png", 1)
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
Secondly, you need to binarize the input image and find the external contours of the numbers:
_, im_th = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
_, contours, _ = cv2.findContours(255-im_th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
So you can see the detected contours will be around the numbers.
Thirdly, find the relative bounding boxes around the numbers and find the middle point coordinates of the boxes (I assume the numbers should be resized and put in the center of the bottom line):
number_imgs = []
number_btm_mid_pos = []
for cnt in contours:
(x, y, w, h) = cv2.boundingRect(cnt)
number_imgs.append(img[y:y+h, x:x+w])
number_btm_mid_pos.append((int(x+w/2), y+h))
Finally, resize the numbers, put them back to the image, and display the result:
# resize images and put it back
output_img = np.ones_like(img) * 255
resize_ratio = 0.5
for (i, num_im) in enumerate(number_imgs):
num_im = cv2.resize(num_im, (0,0), fx=resize_ratio, fy=resize_ratio)
(img_h, img_w) = num_im.shape[:2]
# x1, y1, x2, y2
btm_x, btm_y = number_btm_mid_pos[i]
x1 = btm_x - int(img_w / 2)
y1 = btm_y - img_h
x2 = x1 + img_w
y2 = y1 + img_h
output_img[y1:y2, x1:x2] = num_im
cv2.imshow("Output Image", output_img)
cv2.imshow("Original Input", img)
cv2.waitKey()
You can adjust the variable "resize_ratio" to make sure the ratio is what you expected. The result should be something like this image here:
You may notice the last number "10" is splitting apart. It is because the "1 0" was recognized as two separate digits. To make it perfect, it is possible to write some code to test the gap/distance between every two digits. However, that would be not closely relevant, and a bit hard to generalize the solution based on the limited testing input. So I stop here.
Anyway, good luck and have fun.