How to erase endpoints of lines in OpenCV - python

Hey guys i'm from Viet Nam so my English is not good, forgive me if my words confuse you.
I have some lines with 1 pixel wide (after i skeletonize the binary image). How can I make a loop that erase all the endpoints of curves until there is no pixel that connected in all eight directions is more than two? (I just want to keep the longest line). Thank you for your help :(
Here is the thinned image link:
I'm trying to determine the length of shrimp using image processing. My idea is first thresholding the image, then thinning the object to make a curve represent for the lenght of object, finally measure the lenght of that curve.
This is my code:
import cv2
import numpy as np
from skimage import morphology
from skimage import io
kernel = np.ones((3,3), np.uint8)
kernel2 = np.ones((30,30), np.uint8)
img_ori = cv2.imread("0523.1212.06b.png")
img = cv2.cvtColor(img_ori,cv2.COLOR_BGR2GRAY)
# Threshold the image
ret,mask = cv2.threshold(img, 100, 255, cv2.THRESH_BINARY_INV)
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel2)
#closing_display = cv2.resize(closing, (605,645))
#cv2.imshow("Closing", closing_display)
# Erase small objects
cnts = cv2.findContours(closing, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 5500:
cv2.drawContours(closing, [c], -1, (0,0,0), -1)
closing_display = cv2.resize(closing, (605,645))
cv2.imshow("Shimp Detecting", closing)
# Determine contours
contours, hierachy = cv2.findContours(closing, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
#Thinning
thinned = cv2.ximgproc.thinning(closing)
thinned_display = cv2.resize(thinned, (605,645))
#Draw contours on thinned
cv2.drawContours(thinned, contours, -1, (321, 100, 100), 2)
cv2.imshow("Thinned Image", thinned)
cv2.waitKey(0)
cv2.destroyAllWindows()

Related

Draw grid on a gridless (fully or partially) table image

I have an image that contains a table, the table can be in many sizes and the image too, and the table can be fully gridded (with only some blank spot that needs to be filled), it can be with only vertical grid lines and can be only with horizontal grid lines.
I've searched the web for a long time and found no solution that worked for me.
I found the following questions that seem to be suitable for me:
Python & OpenCV: How to add lines to the gridless table
Draw a line on a gridless image Python Opencv
How to repair incomplete grid cells and fix missing sections in image
Python & OpenCV: How to crop half-formed bounding boxes
My code is taken from the answers to the above questions and the "best" result I got from the above question codes is that it drew 2 lines one at the rightmost part and one on the leftmost part.
I'm kind of new to OpenCV and the image processing field so I am not sure how can I fix the above questions codes to suit my needs or how to accomplish my needs exactly, I would appreciate any help you can provide.
Example of an image table:
Update:
To remove the horizontal lines I use exactly the code you can find in here, but the result I get on the example image is this:
as you can see it removed most of them but not all of them, and then when I try to apply the same for the vertical ones (I tried the same code with rotation, or flipping the kernel) it does not work at all...
I also tried this code but it didn't work at all also.
Update 2:
I was able to remove the lines using this code:
def removeLines(result, axis) -> np.ndarray:
img = result.copy()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
if axis == "horizontal":
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 25))
elif axis == "vertical":
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 1))
else:
raise ValueError("Axis must be either 'horizontal' or 'vertical'")
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
result = img.copy()
for c in cnts:
cv2.drawContours(result, [c], -1, (255, 255, 255), 2)
return result
gridless = removeLines(removeLines(cv2.imread(image_path), 'horizontal'), 'vertical')
Result:
Problem:
After I remove lines, when I try to draw the vertical lines using this code:
# read image
img = old_image.copy() # cv2.imread(image_path1)
hh, ww = img.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# average gray image to one column
column = cv2.resize(gray, (ww,1), interpolation = cv2.INTER_AREA)
# threshold on white
thresh = cv2.threshold(column, 248, 255, cv2.THRESH_BINARY)[1]
# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# Draw vertical
for cntr in contours_v:
x,y,w,h = cv2.boundingRect(cntr)
xcenter = x+w//2
cv2.line(original_image, (xcenter,0), (xcenter,hh-1), (0, 0, 0), 1)
I get this result:
Update 3:
when I try even thresh = cv2.threshold(column, 254, 255, cv2.THRESH_BINARY)[1] (I tried lowering it 1 by 1 until 245, for both the max value and the threshold value, each time I get a different or similar result but always too much lines or too less lines) I get the following:
Input:
Output:
It's putting too many lines instead of just 1 line in each column
Code:
# read image
img = old_image.copy() # cv2.imread(image_path1)
hh, ww = img.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# average gray image to one column
column = cv2.resize(gray, (ww, 1), interpolation = cv2.INTER_AREA)
# threshold on white
thresh = cv2.threshold(column, 254, 255, cv2.THRESH_BINARY)[1]
# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
for cntr in contours:
x, y, w, h = cv2.boundingRect(cntr)
xcenter = x + w // 2
cv2.line(original_image, (xcenter,0), (xcenter, hh_-1), (0, 0, 0), 1)
Here is one way to get the lines in Python/OpenCV. Average the image down to 1 column. Then threshold and get the contours. Then get the bounding boxes and find the vertical centers. Draw lines at those places.
If you do not want the extra lines, crop your image first to get the inside of the table.
Input:
import cv2
import numpy as np
# read image
img = cv2.imread("table4.png")
hh, ww = img.shape[:2]
# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# average gray image to one column
column = cv2.resize(gray, (1,hh), interpolation = cv2.INTER_AREA)
# threshold on white
thresh = cv2.threshold(column, 248, 255, cv2.THRESH_BINARY)[1]
# get contours
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# loop over contours and get bounding boxes and ycenter and draw horizontal line at ycenter
result = img.copy()
for cntr in contours:
x,y,w,h = cv2.boundingRect(cntr)
ycenter = y+h//2
cv2.line(result, (0,ycenter), (ww-1,ycenter), (0, 0, 255), 1)
# write results
cv2.imwrite("table4_lines3.png", result)
# display results
cv2.imshow("RESULT", result)
cv2.waitKey(0)
Result:
You wrote that you tried to remove the lines using the code, but it did not work.
It works fine for me in Python/OpenCV.
Read the input
Convert to grayscale
Threshold to show the horizontal lines
Apply morphology open with a horizontal kernel to isolate the horizontal lines
Get their contours
Draw the contours on a copy of the input as white to cover over the black horizontal lines
Save the results
Input:
import cv2
import numpy as np
# read the input
img = cv2.imread('table4.png')
# convert to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# threshold
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# do morphology to detect lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
# get contours
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
# draw contours as white on copy of input
result = img.copy()
for c in cnts:
cv2.drawContours(result, [c], -1, (255,255,255), 2)
# save results
cv2.imwrite('table4_horizontal_lines_threshold.png', thresh)
cv2.imwrite('table4_horizontal_lines_detected.png', detected_lines)
cv2.imwrite('table4_horizontal_lines_removed.png', result)
# show results
cv2.imshow('thresh', thresh)
cv2.imshow('morphology', detected_lines)
cv2.imshow('result', result)
cv2.waitKey(0)
Threshold Image:
Morphology Detected Lines Image:
Result:

Computer Vision: Opencv Counting small circles inside big circle

Here is the image on which i have been working on
The goal is to detect small circles inside the big one.
currently what i have done is converted the image to gray scale and applied threshold (cv2.THRESH_OTSU) to which resulted in this image
After this i have filtered out large objects using findcontours applied Morph open using elliptical shaped kernel which i found on stackoverflow
The result image is like this
Can someone guide me through the correct path on what to do and where i'm getting wrong.
Below is attached code on which i have been working on
import cv2
import numpy as np
# Load image, grayscale, Otsu's threshold
image = cv2.imread('01.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
#cv2.imwrite('thresh.jpg', thresh)
# Filter out large non-connecting objects
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
#print(area)
if area < 200 and area > 0:
cv2.drawContours(thresh,[c],0,0,-1)
# Morph open using elliptical shaped kernel
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=3)
# Find circles
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area > 20 and area < 50:
((x, y), r) = cv2.minEnclosingCircle(c)
cv2.circle(image, (int(x), int(y)), int(r), (36, 255, 12), 2)
cv2.namedWindow('orig', cv2.WINDOW_NORMAL)
cv2.imshow('orig', thresh)
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
cv2.imshow('image', image)
cv2.waitKey()
Thank you!
You throw away a lot of useful information by converting your image to grayscale.
Why not use the fact that the spots you are looking for are the only thing that is red/orange?
I multiplied the saturaton channel with the red channel which gave me this image:
Now finding the white blobs becomes trivial.
Experiment with different weights for those channels, or apply thresholds first. There are many ways. Experiment with different illumination, different backgrounds until you get the ideal input for your image processing.
The main problem in your code is the flag that you are using in cv2.findContours() function.
For such a problem in which we have to find contours which can appear inside another contour(the big circle), we should not use the flag cv2.RETR_EXTERNAL, instead use cv2.RETR_TREE. Click here for detailed info..
Also, it is always better to use cv2.CHAIN_APPROX_NONE instead of cv2.CHAIN_APPROX_SIMPLE if memory is not an issue. Click here for detailed info.
Thus, the following simple code can be used to solve this problem.
import cv2
import numpy as np
Image = cv2.imread("Adg5.jpg")
GrayImage = cv2.cvtColor(Image, cv2.COLOR_BGR2GRAY)
# Applying Otsu's Thresholding
Retval, ThreshImage = cv2.threshold(GrayImage, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Finding Contours in the image
Contours, Hierarchy = cv2.findContours(ThreshImage, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# Taking only those contours which have no child contour.
FinalContours = [Contours[i] for i in range(len(Contours)) if Hierarchy[0][i][2] == -1]
# Drawing contours
Image = cv2.drawContours(Image, FinalContours, -1, (0, 255, 0), 1)
cv2.imshow("Contours", Image)
cv2.waitKey(0)
Resulting image
In this method, a lot of noise at the boundary is also coming but the required orange points are also being detected. Now the task is to remove boundary noise.
Another method that removes boundary noise to a great extent is similar to #Piglet 's approach.
Here, I am using HSV image to segment out the orange points and then detecting them using the above approach.
import cv2
import numpy as np
Image = cv2.imread("Adg5.jpg")
HSV_Image = cv2.cvtColor(Image, cv2.COLOR_BGR2HSV)
# Extracting orange colour using HSV Image.
ThreshImage = cv2.inRange(HSV_Image, np.array([0, 81, 0]), np.array([41, 255, 255]))
# Finding Contours
Contours, Hierarchy = cv2.findContours(ThreshImage, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# Taking only those contours which have no child contour.
FinalContours = [Contours[i] for i in range(len(Contours)) if Hierarchy[0][i][2] == -1]
# Drawing Contours
Image = cv2.drawContours(Image, FinalContours, -1, (0, 255, 0), 1)
cv2.imshow("Contours", Image)
cv2.waitKey(0)
Resultant Image
I have an idea that detet small circles by sliding window. when the small cicle area occupied the sliding window area large than 90%(Inscribed circle and square), and less than 100%(avoiding sliding window move in the bigger cicle). this position is a small circle. the largest sliding windows size is the largest small cicle size. Hope some help.
in addtion, on the result of Piglet, apply k-means, which k = 2, you can get a binary image, and then use findcontours to count the small circles.

Remove text boxes for OCR with OpenCV

I am trying to run OCR (using Google's Tesseract) on a document with the following format:
However, Tesseract assumes the short bars in between to be letters/numbers (l or i or 1).
As a pre-processing measure I tried to remove vertical and horizontal lines using the following code:
import cv2
from pdf2image import convert_from_path
pages = convert_from_path('..\\app\\1.pdf', 500)
for page in pages:
page.save('..\\app\\out.jpg', 'JPEG')
image = cv2.imread('out.jpg')
result = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Remove horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40,1))
remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(result, [c], -1, (255,255,255), 5)
# Remove vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,40))
remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(result, [c], -1, (255,255,255), 5)
cv2.imshow('thresh', thresh)
cv2.imshow('result', result)
cv2.imwrite('result.png', result)
cv2.waitKey()
I run into an issue where the output of this document removes most of the vertical and horizontal lines in the document even the start and the finish line on the left and right side of the image below but not the small bars in between.
I'm wondering if I am going about this wrong by trying to pre-process and remove lines. Is there a better way to pre-process or another way to solve this problem?
With the observation that the form fields are separate from the characters, you can simply filter using contour area to isolate the text characters. The idea is to Gaussian blur, then Otsu's threshold to obtain a binary image. From here we find contours and filter using contour area with some predetermined threshold value. We can effectively remove the lines by drawing in the contours with cv2.drawContours.
Binary image
Removed lines
Invert ready for OCR
OCR result using Pytesseract
HELLO
Code
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, blur, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours and filter using contour area
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area > 500:
cv2.drawContours(thresh, [c], -1, 0, -1)
# Invert image and OCR
invert = 255 - thresh
data = pytesseract.image_to_string(invert, lang='eng',config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imshow('invert', invert)
cv2.waitKey()
Note: If you still want to go with the remove horizontal/vertical lines approach, you need to modify the vertical kernel size. For instance, change (1,40) to (1,10). This will help to remove smaller lines but it may also remove some of the vertical lines in the text such as in L.

How to get rectangular box contours when there are overlapping distractions using OpenCV

I pieced together a quick algorithm in python to get the input boxes from a handwritten invoice.
# some preprocessing
img = np.copy(orig_img)
img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
img = cv2.GaussianBlur(img,(5,5),0)
_, img = cv2.threshold(img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
# get contours
contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for i, cnt in enumerate(contours):
approx = cv2.approxPolyDP(cnt, 0.01*cv2.arcLength(cnt,True), True)
if len(approx) == 4:
cv2.drawContours(orig_img, contours, i, (0, 255, 0), 2)
It fails to get the 2nd one in this example because the handwriting crosses the box boundary.
Note that this picture could be taken with a mobile phone, so aspect ratios may be a little funny.
So, what are some neat recipes to get around my problem?
And as a bonus. These boxes are from an A4 page with a lot of other stuff going on. Would you recommend a whole different approach to getting the handwritten numbers out?
EDIT
This might be interesting. If I don't filter for 4 sided polys, I get the contours but they go all around the hand-drawn digit. Maybe there's a way to make contours have water-like cohesion so that they pinch off when they get close to themselves?
FURTHER EDIT
Here is the original image without bounding boxes drawn on
Here's a potential solution:
Obtain binary image. We load the image, convert to grayscale, apply a Gaussian blur, and then Otsu's threshold
Detect horizontal lines. We create a horizontal kernel and draw detected horizontal lines onto a mask
Detect vertical lines. We create a vertical kernel and draw detected vertical lines onto a mask
Perform morphological opening. We create a rectangular kernel and perform morph opening to smooth out noise and separate any connected contours
Find contours, draw rectangle, and extract ROI. We find contours and draw the bounding rectangle onto the image
Here's a visualization of each step:
Binary image
Detected horizontal and vertical lines drawn onto a mask
Morphological opening
Result
Individual extracted saved ROI
Note: To extract only the hand written numbers/letters out of each ROI, take a look at a previous answer in Remove borders from image but keep text written on borders (preprocessing before OCR)
Code
import cv2
import numpy as np
# Load image, grayscale, blur, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(mask, [c], -1, (255,255,255), 3)
# Find vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,50))
detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(mask, [c], -1, (255,255,255), 3)
# Morph open
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)
# Draw rectangle and save each ROI
number = 0
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(number), ROI)
number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('mask', mask)
cv2.imshow('opening', opening)
cv2.imshow('image', image)
cv2.waitKey()
Since the squares have a quite straight lines, it's good to use Hough transform:
1- Make the image grayscale, then do an Otsu threshold on it, then reverse the binary image
2- Do Hough transform (HoughLinesP) and draw the lines on a new image
3- With findContours and drawContours, make the 3 roi clean
4- Erode the final image a little to make the boxes neater
I wrote the code in C++, it's easily convertible to python:
Mat img = imread("D:/1.jpg", 0);
threshold(img, img, 0, 255, THRESH_OTSU);
imshow("Binary image", img);
img = 255 - img;
imshow("Reversed binary image", img);
Mat img_1 = Mat::zeros(img.size(), CV_8U);
Mat img_2 = Mat::zeros(img.size(), CV_8U);
vector<Vec4i> lines;
HoughLinesP(img, lines, 1, 0.1, 95, 10, 1);
for (size_t i = 0; i < lines.size(); i++)
line(img_1, Point(lines[i][0], lines[i][1]), Point(lines[i][2], lines[i][3]),
Scalar(255, 255, 255), 2, 8);
imshow("Hough Lines", img_1);
vector<vector<Point>> contours;
findContours(img_1,contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);
for (int i = 0; i< contours.size(); i++)
drawContours(img_2, contours, i, Scalar(255, 255, 255), -1);
imshow("final result after drawcontours", img_2); waitKey(0);
Thank you to those who shared solutions. I ended up taking a slightly different path in the end.
Grayscale, Gaussian Blur, Otsu threshold
Get contours
Filter contours by aspect ratio and extent
Return the minimum upright bounding box of the contour.
Remove any bounding boxes that encapsulate smaller bounding boxes (because you get two boxes, one for the inside contour, and one for the outside).
Here's the code if anyone's interested (except for step 5 - that was just basic numpy manipulation)
orig_img = cv2.imread('example0.jpg')
img = np.copy(orig_img)
img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
img = cv2.GaussianBlur(img,(5,5),0)
_, img = cv2.threshold(img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
boxes = list()
for i, cnt in enumerate(contours):
x,y,w,h = cv2.boundingRect(cnt)
aspect_ratio = float(w)/h
area = cv2.contourArea(cnt)
rect_area = w*h
extent = float(area)/rect_area
if abs(aspect_ratio - 1) < 0.1 and extent > 0.7:
boxes.append((x,y,w,h))
And here's an example of what came out when cutting out the boundary boxes from the original image.

How to remove noise artifacts from an image for OCR with Python OpenCV?

I have subsets of images that contains digits. Each subset is read by Tesseract for OCR. Unfortunately for some images the cropping from the original image isn't optimal.
Hence some artifacts/remains at the top and bottom of the image and hamper Tesseract to recognize characters on the image. Then I would like to get rid of these artifacts and get to a similar result:
First I considered a simple approach: I set the first row of pixels as the reference: if an artifact was found along the x-axis (i.e., a white pixel if the image is binarized), I removed it along the y-axis until the next black pixel. Code for this approach is the one below:
import cv2
inp = cv2.imread("testing_file.tif")
inp = cv2.cvtColor(inp, cv2.COLOR_BGR2GRAY)
_,inp = cv2.threshold(inp, 150, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
ax = inp.shape[1]
ay = inp.shape[0]
out = inp.copy()
for i in range(ax):
j = 0
while j in range(ay):
if out[j,i] == 255:
out[j,i] = 0
else:
break
j+=1
out = cv2.bitwise_not(out)
cv2.imwrite('output.png',out)
But the result isn't good at all:
Then I stumbled across the flood_fill function from scipy (here) but found out it was too much time consuming and still not efficient. A similar question was asked on SO here but didn't help so much. Maybe a k-nearest neighbor approach could be considered? I also found out that methods that consist in merging neighbors pixels under some criteria were called growing methods, among which the single linkage is the most common (here).
What would you recommend to remove the upper and lower artifacts?
Here's a simple approach:
Convert image to grayscale
Otsu's threshold to obtain binary image
Cerate special horizontal kernel and dilate
Detect horizontal lines, sort for largest contour, and draw onto mask
Bitwise-and
After converting to grayscale, we Otsu's threshold to get a binary image
# Read in image, convert to grayscale, and Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
Next we create a long horizontal kernel and dilate to connect the numbers together
# Create special horizontal kernel and dilate
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (70,1))
dilate = cv2.dilate(thresh, horizontal_kernel, iterations=1)
From here we detect horizontal lines and sort for the largest contour. The idea is that the largest contour will be the middle section of the numbers where the numbers are all "complete". Any smaller contours will be partial or cut off numbers so we filter them out here. We draw this largest contour onto a mask
# Detect horizontal lines, sort for largest contour, and draw on mask
mask = np.zeros(image.shape, dtype=np.uint8)
detected_lines = cv2.morphologyEx(dilate, cv2.MORPH_OPEN, horizontal_kernel, iterations=1)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
break
Now that we have the outline of the desired numbers, we simply bitwise-and with our original image and color the background white to get our result
# Bitwise-and to get result and color background white
mask = cv2.cvtColor(mask,cv2.COLOR_BGR2GRAY)
result = cv2.bitwise_and(image,image,mask=mask)
result[mask==0] = (255,255,255)
Full code for completeness
import cv2
import numpy as np
# Read in image, convert to grayscale, and Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Create special horizontal kernel and dilate
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (70,1))
dilate = cv2.dilate(thresh, horizontal_kernel, iterations=1)
# Detect horizontal lines, sort for largest contour, and draw on mask
mask = np.zeros(image.shape, dtype=np.uint8)
detected_lines = cv2.morphologyEx(dilate, cv2.MORPH_OPEN, horizontal_kernel, iterations=1)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
break
# Bitwise-and to get result and color background white
mask = cv2.cvtColor(mask,cv2.COLOR_BGR2GRAY)
result = cv2.bitwise_and(image,image,mask=mask)
result[mask==0] = (255,255,255)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.imshow('result', result)
cv2.waitKey()

Categories

Resources