I'm trying to use Open CV to scale down numbers in an image. I currently am able to identify the contours but I am having trouble figuring out how to scale down the numbers once I have identified them.
Here is an example image:
Here are the contours I have identified:
Here is the code I am using to achieve this:
import cv2
image = cv2.imread("numbers.png")
edged = cv2.Canny(image, 10, 250)
# applying closing function
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
closed = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, kernel)
_, cnts,_ = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
contours = []
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
contours.append(approx)
cv2.drawContours(image, [approx], -1, (0, 255, 0), 2)
cv2.imshow("Output", image)
cv2.waitKey(0)
I want to be able to use the contours to scale down the numbers without affecting the size of the image. Is this possible? Thanks!
Assuming you have an input image named "numbers.png".
First of all, import useful libraries and load the input image:
import cv2
import numpy as np
img = cv2.imread("./numbers.png", 1)
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
Secondly, you need to binarize the input image and find the external contours of the numbers:
_, im_th = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
_, contours, _ = cv2.findContours(255-im_th, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
So you can see the detected contours will be around the numbers.
Thirdly, find the relative bounding boxes around the numbers and find the middle point coordinates of the boxes (I assume the numbers should be resized and put in the center of the bottom line):
number_imgs = []
number_btm_mid_pos = []
for cnt in contours:
(x, y, w, h) = cv2.boundingRect(cnt)
number_imgs.append(img[y:y+h, x:x+w])
number_btm_mid_pos.append((int(x+w/2), y+h))
Finally, resize the numbers, put them back to the image, and display the result:
# resize images and put it back
output_img = np.ones_like(img) * 255
resize_ratio = 0.5
for (i, num_im) in enumerate(number_imgs):
num_im = cv2.resize(num_im, (0,0), fx=resize_ratio, fy=resize_ratio)
(img_h, img_w) = num_im.shape[:2]
# x1, y1, x2, y2
btm_x, btm_y = number_btm_mid_pos[i]
x1 = btm_x - int(img_w / 2)
y1 = btm_y - img_h
x2 = x1 + img_w
y2 = y1 + img_h
output_img[y1:y2, x1:x2] = num_im
cv2.imshow("Output Image", output_img)
cv2.imshow("Original Input", img)
cv2.waitKey()
You can adjust the variable "resize_ratio" to make sure the ratio is what you expected. The result should be something like this image here:
You may notice the last number "10" is splitting apart. It is because the "1 0" was recognized as two separate digits. To make it perfect, it is possible to write some code to test the gap/distance between every two digits. However, that would be not closely relevant, and a bit hard to generalize the solution based on the limited testing input. So I stop here.
Anyway, good luck and have fun.
Related
I am new to CV and I just learned how to detect the edge of a paper. I want to try something more complicated. So I make a screenshot from a movie website and want to detect the poster from the website. It works well if the background color is different from the poster. But when they are similar in color, I can't find the edge of the picture by
cv2.findContours()
The original Picture is:
Poster
And what I do is:
img = cv2.imread('pic5.jpg')
orig = img.copy()
image = orig
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
binary = cv2.medianBlur(gray,3)
# blur = cv2.GaussianBlur(binary, (5, 5), 0)
# ret, binary = cv2.threshold(blur,127,255,cv2.THRESH_TRUNC)
edged = cv2.Canny(binary, 3, 30)
show(edged)
# detect edge
contours, hierarchy = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnts = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
#
for c in cnts:
# approx
peri = cv2.arcLength(c, True)
eps = 0.02
approx = cv2.approxPolyDP(c, eps*peri, True)
# detect square (4 points)
if len(approx) == 4:
screenCnt = approx
break
res = cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 2)
show(orig)
And the result is:
after preprocess
What I detect
I don't know if this method works. Is it possible to detect the square part based on the background color (regardless of the poster's color)?
You may continue with the edged result, and use closing morphological operation for closing small gaps.
Instead of searching for a rectangle using approxPolyDP, I suggest you to find the bounding rectangle of the largest connected component (or largest contour).
In my code sample, I replaced findContours with connectedComponentsWithStats due to the external boundary line.
You may use opening morphological operation to get rid of the external line (and use continue using findContours).
You may also use approxPolyDP for refining the result.
Here is the code sample:
import numpy as np
import cv2
img = cv2.imread('pic5.png')
orig = img.copy()
image = orig
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
binary = cv2.medianBlur(gray, 3)
edged = cv2.Canny(binary, 3, 30)
edged = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, np.ones((5,5))) # Close small gaps
#contours, hierarchy = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
#c = max(contours, key=cv2.contourArea) # Get the largest contour
#x, y, w, h = cv2.boundingRect(c) # Find bounding rectangle.
nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(edged, 8) # finding components
# https://stackoverflow.com/a/61662694/4926757
# Find the largest non background component.
# Note: range() starts from 1 since 0 is the background label.
max_label, max_size = max([(i, stats[i, cv2.CC_STAT_AREA]) for i in range(1, nb_components)], key=lambda x: x[1])
# Find bounding rectangle of largest connected component.
x = stats[max_label, cv2.CC_STAT_LEFT]
y = stats[max_label, cv2.CC_STAT_TOP]
w = stats[max_label, cv2.CC_STAT_WIDTH]
h = stats[max_label, cv2.CC_STAT_HEIGHT]
res = image.copy()
cv2.rectangle(res, (x, y), (x+w, y+h), (0, 255, 0), 2) # Draw a rectangle
cv2.imshow('edged', edged)
cv2.imshow('res', res)
cv2.waitKey()
cv2.destroyAllWindows()
Results:
edged:
res:
I would like to crop out a region full of small curves in an image.
The original image is like the following:
Using the opening morphing, I can remove most of the noises. The result is like:
I tried to use dilate to connect these pixels in my desired region, but the result is not satisfactory.
Is there any function in opencv that can locate this region?
You are on the right track, here's an approach using morphological transformations
Convert image to grayscale and Gaussian blur
Otsu's threshold
Perform morphological operations
Find contours and filter using maximum area
Extract ROI
The idea is to connect the desired region into a single contour then filter using maximum area. This way, we can grab the region as one piece. Here's the detected area
Afterwards, we can extract the region with Numpy slicing
import cv2
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.threshold(gray,0,255,cv2.THRESH_OTSU + cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
dilate = cv2.dilate(opening, dilate_kernel, iterations=5)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
break
cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('dilate', dilate)
cv2.imshow('image', image)
cv2.imshow('ROI', ROI)
cv2.waitKey(0)
Here's my approach using NumPy's sum. Just sum the pixel values along the x and y axis individually, set up some thresholds for the minimum number of pixels describing the desired area, and obtain proper column and row indices.
Let's have a look at the following code:
import cv2
import numpy as np
from matplotlib import pyplot as plt
# Read input image; get shape
img = cv2.imread('images/UKf5Z.jpg', cv2.IMREAD_GRAYSCALE)
w, h = img.shape[0:2]
# Threshold to prevent JPG artifacts
_, img = cv2.threshold(img, 240, 255, cv2.THRESH_BINARY)
# Sum pixels along x and y axis
xSum = np.sum(img / 255, axis=0)
ySum = np.sum(img / 255, axis=1)
# Visualize curves
plt.plot(xSum)
plt.plot(ySum)
plt.show()
# Set up thresholds
xThr = 15
yThr = 15
# Find proper row indices
tmp = np.argwhere(xSum > xThr)
tmp = tmp[np.where((tmp > 20) & (tmp < w - 20))]
x1 = tmp[0]
x2 = tmp[-1]
# Find proper column indices
tmp = np.argwhere(ySum > yThr)
tmp = tmp[np.where((tmp > 20) & (tmp < h - 20))]
y1 = tmp[0]
y2 = tmp[-1]
# Visualize result
out = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
cv2.rectangle(out, (x1, y1), (x2, y2), (0, 0, 255), 4)
cv2.imshow('out', out)
cv2.waitKey(0)
The curves of the summations look like this (just for visualization purposes):
And, for visualization I just drew a red rectangle described by the found indices.
As you can see, I manually excluded some "border" area of 20 pixels, since there are some larger artifacts. Depending on the location of your desired area, this may be sufficient. Otherwise, your approach using morphological opening should be kept.
Hope that helps!
EDIT: As suggested by Mark in his answer, using mean instead of sum avoids adaptations regarding varying image dimensions. Changing the code appropriately is left to the reader. :-)
Same idea as #HansHirse's excellent answer, but I had made a diagram that I couldn't share in the comments.
I'm trying to read all of the handwritten digits in the scanned image here
I tried looking through pixel-by-pixel using PIL, cropping the sub-images, then feeding them through a neural network, but the regions that were being cropped never quite lined up and led to a lot of inaccuracy.
I've also tried using OpenCV to find all the grey squares, then crop the images and feed them through a neural network, but I couldn't seem to get it to find all or even only miss a few; it would miss ~30% of squares. (I'm not very experienced with OpenCV, so I could be messing something up)
So I'm just looking for a potential idea/solution for this problem, so any suggestions would be appreciated, thanks in advance!
I assume the input image name is "sqaures.jpg"
First of all, import required libraries and load image in both RGB and Gray format:
import cv2
import numpy as np
image = cv2.imread("squares.jpg", 1)
image_gray = cv2.imread("squares.jpg", 0)
Then, we perform a simple operation to clean some noise from the input image using np.where() function:
image_gray = np.where(image_gray > 240, 255, image_gray)
image_gray = np.where(image_gray <= 240, 0, image_gray)
Because we want to grab the whole square regions from the image. We need to blur the image a little bit before performing the adaptive thresholding method:
image_gray = cv2.blur(image_gray, (5, 5))
im_th = cv2.adaptiveThreshold(image_gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 115, 1)
kernal = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
im_th = cv2.morphologyEx(im_th, cv2.MORPH_OPEN, kernal, iterations=3)
Use contour detection in OpenCV to find all possible regions:
_, contours, _ = cv2.findContours(im_th.copy(), cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)
contours.remove(contours[0]) #remove the biggest contour
Finally, try to find the potential square regions based on the ratio of height and width:
square_rects = []
square_areas = []
for i, cnt in enumerate(contours):
(x, y, w, h) = cv2.boundingRect(cnt)
ar = w / float(h)
if 0.9 < ar < 1.1:
square_rects.append(((x,y), (x+w, y+h)))
square_areas.append(w*h) #store area information
We need to remove anything that is too small from the list by doing the follows:
import statistics
median_size_limit= statistics.median(square_areas) * 0.8
square_rects = [rect for i, rect in enumerate(square_rects)
if square_areas[i] > median_size_limit]
You can visually check the output by drawing all the rectangles on the original image:
for rect in square_rects:
cv2.rectangle(image, rect[0], rect[1], (0,255,0), 2)
cv2.imwrite("_output_image.png", image)
cv2.imshow("image", image)
cv2.waitKey()
You can use "square_rects" to locate all the squares and crop them from the original image.
The following is the preview of the final result.
Cheers.
I have an image
consisting of 5 checkboxes arranged in a parallel manner.
I need to separate checkbox button
and checkbox text
and save them separately.
There can be any number of checkboxes in an image arranged in a parallel manner how can I split the checkbox button and checkbox text and save them separately.
I am new to computer vision. Please guide me how to achieve it.
I have split the images that are arranged in a parallel manner and saved them like the below images.
Here is my working code that splits the square checkbox and the text in this image.
# Import necessary libraries
from matplotlib import pyplot as plt
import cv2
# Read image
image = cv2.imread('data_2/5.png')
# Height and width of the image
height, width = image.shape[:2]
print("Dimensions of the image.")
print("Height:",height)
print("Width:",width)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5,5), 0)
# Finding Edges
edges = cv2.Canny(gray, 60, 255)
# contours -> an outline representing or bounding the shape.
_,cnts, hierarchy = cv2.findContours(edges.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(cnts, key=cv2.contourArea, reverse=True)[:10]
count = 1
for contour in contours:
if(count<=4):
#print("Count:",count)
count = count + 1
area = cv2.contourArea(contour)
if area > 100000 and area < 1000:
contours.remove(contour)
perimeter = cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, 0.01*perimeter, True)
if len(approx) == 4:
cv2.circle(image, (720, 360), 5, (255,0,0), 5)
cv2.drawContours(image, [approx], -1, (0, 255, 0), 2)
M = cv2.moments(approx)
centers = []
if M["m00"] != 0:
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
else:
cX, cY = 0, 0
P1 = approx[0]
P1x = P1[0][0]
P1y = P1[0][1]
P2 = approx[1]
P2x = P2[0][0]
P2y = P2[0][1]
P3 = approx[2]
P3x = P3[0][0]
P3y = P3[0][1]
P4 = approx[3]
P4x = P4[0][0]
P4y = P4[0][1]
plt.imshow(image)
plt.title('Detecting Square')
plt.show()
# Cropping the square_image using array slices -- it's a NumPy array
cropped_square = image[P1y:P3y, P2x:P3x]
# Cropping the text image
cropped_text = image[P1y:P3y,P3x+5:width]
# Displaying the cropped square and cropped text image.
plt.imshow(cropped_square)
plt.title('Cropped Square')
plt.show()
plt.imshow(cropped_text)
plt.title('Cropped Text')
plt.show()
# Now saving the cropped square and cropped text image
cv2.imwrite('results/square1.png',cropped_square)
cv2.imwrite('results/text1.png',cropped_text)
The Output of the above Program:
I am a bit late with the answer but nevertheless maybe it will serve someone for an alternitive idea.
First you search your image for contours and create masks and final images to display the outcome.
For every contour you find the size of the contour so it will help you filter your boxes from letters (boxes are larger in size). For the checkmark I found the solution to filter it by size AND by distance from extreme point on the left and the right side of the contour (you could find a lot of different criterion to filter the checkpoints and probably a lot better ones - I focused only on the image you posted in your question).
To get to the area of interest you find x,y,h,w values with cv2.boundingRect()
Then you simply draw it on the images created in the begining (note that these outputs are only black and white). Maybe this approach will give you some idea. Cheers!
Example in code:
import cv2
import numpy as np
img = cv2.imread('checkbox.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, threshold = cv2.threshold(gray,170,255,cv2.THRESH_BINARY_INV)
im, contours, hierarchy = cv2.findContours(threshold,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
final = np.zeros(gray.shape,np.uint8)
mask = np.zeros(gray.shape,np.uint8)
final2 = np.zeros(gray.shape,np.uint8)
list1 = []
for i in range(0, len(contours)):
cnt = contours[i]
epsilon = 0.1*cv2.arcLength(cnt,True)
approx = cv2.approxPolyDP(cnt,epsilon,True)
size = cv2.contourArea(approx)
extLeft = tuple(cnt[cnt[:, :, 0].argmin()][0])
extRight = tuple(cnt[cnt[:, :, 0].argmax()][0])
distance = np.sqrt((extLeft[0] - extRight[0])**2 + (extLeft[1] - extRight[1])**2)
x,y,w,h = cv2.boundingRect(cnt)
mask[x:x+h, y:y+w]=0
if 700 > size > 220:
cv2.drawContours(mask,contours,i,255,-1)
cv2.drawContours(final,contours,i,255,2)
elif 16 < distance < 17 and size > 60:
list1.append(cnt)
elif size < 250:
cv2.drawContours(final2,contours,i,(255,255,255),1)
for i in list1:
cv2.drawContours(final, [i], -1, (255,255,255), -1)
cv2.bitwise_not(final,final)
cv2.bitwise_not(final2,final2)
cv2.imwrite('c_orig.png', img)
cv2.imwrite('c_boxes.png', final)
cv2.imwrite('c_text.png', final2)
cv2.imshow('img', img)
cv2.imshow('img2', final)
cv2.imshow('img3', final2)
cv2.waitKey(0)
cv2.destroyAllWindows()
Output:
Assuming that irrespective of the number of check boxes, your images will follow a grid like pattern, a good choice would be to try template matching. MSTM is an example.
You should try finding checked or unchecked boxes in the image by using template matching and extract the region to the right side of it. Since you've mentioned that there can be upto 5 checkboxes in a row, you can check the width of the image, divide that by 5 and get an approximate size of the text region
I previously posted here
And I've seen this post
Despite the great information provided by the community, I have been unable to smoothly trace an image using cv2.findContours(). While in my previous post I asked about generating splines to smoothly trace curves, my focus now is to get a smooth trace of an object, regardless of how many points are generated for the contour. I consistently get results with jagged edges:
My desired output would be something similar to this, which I've manually created in Adobe Illustrator:
I have experimented extensively with blurring and thresholding, and have been unable to get a smooth outline. I am running openCV version 3.3.0.
import numpy as np
import cv2
import math
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
print(cv2.__version__)
im = cv2.imread('img.jpg')
# orient the image properly
# grab the dimensions of the image and calculate the center
# of the image
(h, w) = im.shape[:2]
center = (w / 2, h / 2)
# rotate the image 180 degrees
M = cv2.getRotationMatrix2D(center, 180, 1.0)
rotated = cv2.warpAffine(im, M, (w, h))
# flip the image across
flippedColor = cv2.flip(rotated, 1) #for testing
imgray = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY)
flipped = cv2.flip(imgray, 1)
(thresh, binRed) = cv2.threshold(flipped, 180, 255, cv2.THRESH_BINARY)
_, Rcontours, hier_r = cv2.findContours(binRed,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_SIMPLE)
r_areas = [cv2.contourArea(c) for c in Rcontours]
max_rarea = np.argmax(r_areas)
CntExternalMask = np.ones(binRed.shape[:2], dtype="uint8") * 255
contour= Rcontours[max_rarea]
cv2.drawContours(flippedColor,[contour],-1,(255,0,0),1)
I can use this code to show you that effect.
import cv2
img = cv2.imread(r'E:/test_opencv/images/0ub4h.jpg')
imgray = cv2.cvtColor( img, cv2.COLOR_BGR2GRAY )
ret, thresh = cv2.threshold( imgray, 220, 255, cv2.THRESH_BINARY )
cv2.imshow('1',cv2.resize(thresh,(600,400)))
_, countours, hierarchy = cv2.findContours( thresh, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE )
cnt = sorted(countours, key=cv2.contourArea)[-1]
epsilon = 0.1 * cv2.arcLength( countours[0], True )
approx = cv2.approxPolyDP( cnt, epsilon, True )
cv2.drawContours( img, [approx],-1, (0, 255, 0), 3 )
cv2.imshow( "Contour", cv2.resize(img,(600,400)) )
cv2.imwrite(r'E:/test.jpg',img)
cv2.waitKey( 0 )
cv2.destroyAllWindows()
This is my result. The green contour is the original, while the red contour is approximated and the gray dots are the approximated dots.
# find contours without approx
cnts = cv2.findContours(threshed,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_NONE)[-2]
# get the max-area contour
cnt = sorted(cnts, key=cv2.contourArea)[-1]
# calc arclentgh
arclen = cv2.arcLength(cnt, True)
# approx the contour
epsilon = arclen * 0.001
epsilon = arclen * 0.0001
approx = cv2.approxPolyDP(cnt, epsilon, True)
cv2.drawContour(img, [approx], -1, (0,0,255), 1)
cv2.imwrite("res.png", img)
More details refer to my another answer: Is there a function similar to OpenCV findContours that detects curves and replaces points with a spline?