How to calculate similarity between two signature images - python

I have signature images like this one:
Both of these signatures are similar and I need to print if they are similar or not.
I have tried SSIM, MSE, SIFT and Cosine similarity.
SSIM gives almost more than 0.7 index for both similar and dissimilar images.
MSE gives a very large distance once these are cleaned and compared
SIFT gave 66 features matches
And cosine similarity gave 0.08
Here is the code that helps clean the images
image = cv2.imread('test2.png')
image = cv2.resize(image, (680, 460))
image = cv2.detailEnhance(image)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (1,1), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Filter using contour area and remove small noise
cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 5500:
cv2.drawContours(thresh, [c], -1, (0,0,0), -1)
# Morph close and invert image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,1))
close = 255 - cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
cv2.imshow('thresh', close) ###for showing
cv2.waitKey()
kernel = np.ones((1,1),np.uint8)
# sign2 = cv2.dilate(close,kernel,iterations = 1)
sign2= close
# cv2.imshow('thresh', sign2) ###for showing
# cv2.waitKey()
I clean both images and then compare.
Is there any other way to compare them. All I want is if they are similar then just say yes :p

Related

How to detect corners of a square with Python OpenCV?

In the image below, I am using OpenCV harris corner detector to detect only the corners for the squares (and the smaller squares within the outer squares). However, I am also getting corners detected for the numbers on the side of the image. How do I get this to focus only on the squares and not the numbers? I need a method to ignore the numbers when performing OpenCV corner detection. The code, input image and output image are below:
import cv2 as cv
img = cv.imread(filename)
gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
gray = np.float32(gray)
dst = cv.cornerHarris(gray, 2, 3, 0.04)
dst = cv.dilate(dst,None)
# Threshold for an optimal value, it may vary depending on the image.
img[dst>0.01*dst.max()]=[0,0,255]
cv.imshow('dst', img)
Input image
Output from Harris corner detector
Here's a potential approach using traditional image processing:
Obtain binary image. We load the image, convert to grayscale, Gaussian blur, then adaptive threshold to obtain a black/white binary image. We then remove small noise using contour area filtering. At this stage we also create two blank masks.
Detect horizontal and vertical lines. Now we isolate horizontal lines by creating a horizontal shaped kernel and perform morphological operations. To detect vertical lines, we do the same but with a vertical shaped kernel. We draw the detected lines onto separate masks.
Find intersection points. The idea is that if we combine the horizontal and vertical masks, the intersection points will be the corners. We can perform a bitwise-and operation on the two masks. Finally we find the centroid of each intersection point and highlight corners by drawing a circle.
Here's a visualization of the pipeline
Input image -> binary image
Detected horizontal lines -> horizontal mask
Detected vertical lines -> vertical mask
Bitwise-and both masks -> detected intersection points -> corners -> cleaned up corners
The results aren't perfect but it's pretty close. The problem comes from the noise on the vertical mask due to the slanted image. If the image was centered without an angle, the results would be ideal. You can probably fine tune the kernel sizes or iterations to get better results.
Code
import cv2
import numpy as np
# Load image, create horizontal/vertical masks, Gaussian blur, Adaptive threshold
image = cv2.imread('1.png')
original = image.copy()
horizontal_mask = np.zeros(image.shape, dtype=np.uint8)
vertical_mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 23, 7)
# Remove small noise on thresholded image
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 150:
cv2.drawContours(thresh, [c], -1, 0, -1)
# Detect horizontal lines
dilate_horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10,1))
dilate_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, dilate_horizontal_kernel, iterations=1)
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40,1))
detected_lines = cv2.morphologyEx(dilate_horizontal, cv2.MORPH_OPEN, horizontal_kernel, iterations=1)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(image, [c], -1, (36,255,12), 2)
cv2.drawContours(horizontal_mask, [c], -1, (255,255,255), 2)
# Remove extra horizontal lines using contour area filtering
horizontal_mask = cv2.cvtColor(horizontal_mask,cv2.COLOR_BGR2GRAY)
cnts = cv2.findContours(horizontal_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area > 1000 or area < 100:
cv2.drawContours(horizontal_mask, [c], -1, 0, -1)
# Detect vertical
dilate_vertical_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (1,7))
dilate_vertical = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, dilate_vertical_kernel, iterations=1)
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1,2))
detected_lines = cv2.morphologyEx(dilate_vertical, cv2.MORPH_OPEN, vertical_kernel, iterations=4)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(image, [c], -1, (36,255,12), 2)
cv2.drawContours(vertical_mask, [c], -1, (255,255,255), 2)
# Find intersection points
vertical_mask = cv2.cvtColor(vertical_mask,cv2.COLOR_BGR2GRAY)
combined = cv2.bitwise_and(horizontal_mask, vertical_mask)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2,2))
combined = cv2.morphologyEx(combined, cv2.MORPH_OPEN, kernel, iterations=1)
# Highlight corners
cnts = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
# Find centroid and draw center point
try:
M = cv2.moments(c)
cx = int(M['m10']/M['m00'])
cy = int(M['m01']/M['m00'])
cv2.circle(original, (cx, cy), 3, (36,255,12), -1)
except ZeroDivisionError:
pass
cv2.imshow('thresh', thresh)
cv2.imshow('horizontal_mask', horizontal_mask)
cv2.imshow('vertical_mask', vertical_mask)
cv2.imshow('combined', combined)
cv2.imshow('original', original)
cv2.imshow('image', image)
cv2.waitKey()

Remove text boxes for OCR with OpenCV

I am trying to run OCR (using Google's Tesseract) on a document with the following format:
However, Tesseract assumes the short bars in between to be letters/numbers (l or i or 1).
As a pre-processing measure I tried to remove vertical and horizontal lines using the following code:
import cv2
from pdf2image import convert_from_path
pages = convert_from_path('..\\app\\1.pdf', 500)
for page in pages:
page.save('..\\app\\out.jpg', 'JPEG')
image = cv2.imread('out.jpg')
result = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Remove horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40,1))
remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(result, [c], -1, (255,255,255), 5)
# Remove vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,40))
remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(result, [c], -1, (255,255,255), 5)
cv2.imshow('thresh', thresh)
cv2.imshow('result', result)
cv2.imwrite('result.png', result)
cv2.waitKey()
I run into an issue where the output of this document removes most of the vertical and horizontal lines in the document even the start and the finish line on the left and right side of the image below but not the small bars in between.
I'm wondering if I am going about this wrong by trying to pre-process and remove lines. Is there a better way to pre-process or another way to solve this problem?
With the observation that the form fields are separate from the characters, you can simply filter using contour area to isolate the text characters. The idea is to Gaussian blur, then Otsu's threshold to obtain a binary image. From here we find contours and filter using contour area with some predetermined threshold value. We can effectively remove the lines by drawing in the contours with cv2.drawContours.
Binary image
Removed lines
Invert ready for OCR
OCR result using Pytesseract
HELLO
Code
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, blur, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours and filter using contour area
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area > 500:
cv2.drawContours(thresh, [c], -1, 0, -1)
# Invert image and OCR
invert = 255 - thresh
data = pytesseract.image_to_string(invert, lang='eng',config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imshow('invert', invert)
cv2.waitKey()
Note: If you still want to go with the remove horizontal/vertical lines approach, you need to modify the vertical kernel size. For instance, change (1,40) to (1,10). This will help to remove smaller lines but it may also remove some of the vertical lines in the text such as in L.

How to get rectangular box contours when there are overlapping distractions using OpenCV

I pieced together a quick algorithm in python to get the input boxes from a handwritten invoice.
# some preprocessing
img = np.copy(orig_img)
img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
img = cv2.GaussianBlur(img,(5,5),0)
_, img = cv2.threshold(img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
# get contours
contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
for i, cnt in enumerate(contours):
approx = cv2.approxPolyDP(cnt, 0.01*cv2.arcLength(cnt,True), True)
if len(approx) == 4:
cv2.drawContours(orig_img, contours, i, (0, 255, 0), 2)
It fails to get the 2nd one in this example because the handwriting crosses the box boundary.
Note that this picture could be taken with a mobile phone, so aspect ratios may be a little funny.
So, what are some neat recipes to get around my problem?
And as a bonus. These boxes are from an A4 page with a lot of other stuff going on. Would you recommend a whole different approach to getting the handwritten numbers out?
EDIT
This might be interesting. If I don't filter for 4 sided polys, I get the contours but they go all around the hand-drawn digit. Maybe there's a way to make contours have water-like cohesion so that they pinch off when they get close to themselves?
FURTHER EDIT
Here is the original image without bounding boxes drawn on
Here's a potential solution:
Obtain binary image. We load the image, convert to grayscale, apply a Gaussian blur, and then Otsu's threshold
Detect horizontal lines. We create a horizontal kernel and draw detected horizontal lines onto a mask
Detect vertical lines. We create a vertical kernel and draw detected vertical lines onto a mask
Perform morphological opening. We create a rectangular kernel and perform morph opening to smooth out noise and separate any connected contours
Find contours, draw rectangle, and extract ROI. We find contours and draw the bounding rectangle onto the image
Here's a visualization of each step:
Binary image
Detected horizontal and vertical lines drawn onto a mask
Morphological opening
Result
Individual extracted saved ROI
Note: To extract only the hand written numbers/letters out of each ROI, take a look at a previous answer in Remove borders from image but keep text written on borders (preprocessing before OCR)
Code
import cv2
import numpy as np
# Load image, grayscale, blur, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50,1))
detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(mask, [c], -1, (255,255,255), 3)
# Find vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,50))
detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(mask, [c], -1, (255,255,255), 3)
# Morph open
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
opening = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel, iterations=1)
# Draw rectangle and save each ROI
number = 0
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(number), ROI)
number += 1
cv2.imshow('thresh', thresh)
cv2.imshow('mask', mask)
cv2.imshow('opening', opening)
cv2.imshow('image', image)
cv2.waitKey()
Since the squares have a quite straight lines, it's good to use Hough transform:
1- Make the image grayscale, then do an Otsu threshold on it, then reverse the binary image
2- Do Hough transform (HoughLinesP) and draw the lines on a new image
3- With findContours and drawContours, make the 3 roi clean
4- Erode the final image a little to make the boxes neater
I wrote the code in C++, it's easily convertible to python:
Mat img = imread("D:/1.jpg", 0);
threshold(img, img, 0, 255, THRESH_OTSU);
imshow("Binary image", img);
img = 255 - img;
imshow("Reversed binary image", img);
Mat img_1 = Mat::zeros(img.size(), CV_8U);
Mat img_2 = Mat::zeros(img.size(), CV_8U);
vector<Vec4i> lines;
HoughLinesP(img, lines, 1, 0.1, 95, 10, 1);
for (size_t i = 0; i < lines.size(); i++)
line(img_1, Point(lines[i][0], lines[i][1]), Point(lines[i][2], lines[i][3]),
Scalar(255, 255, 255), 2, 8);
imshow("Hough Lines", img_1);
vector<vector<Point>> contours;
findContours(img_1,contours, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_NONE);
for (int i = 0; i< contours.size(); i++)
drawContours(img_2, contours, i, Scalar(255, 255, 255), -1);
imshow("final result after drawcontours", img_2); waitKey(0);
Thank you to those who shared solutions. I ended up taking a slightly different path in the end.
Grayscale, Gaussian Blur, Otsu threshold
Get contours
Filter contours by aspect ratio and extent
Return the minimum upright bounding box of the contour.
Remove any bounding boxes that encapsulate smaller bounding boxes (because you get two boxes, one for the inside contour, and one for the outside).
Here's the code if anyone's interested (except for step 5 - that was just basic numpy manipulation)
orig_img = cv2.imread('example0.jpg')
img = np.copy(orig_img)
img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
img = cv2.GaussianBlur(img,(5,5),0)
_, img = cv2.threshold(img,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
boxes = list()
for i, cnt in enumerate(contours):
x,y,w,h = cv2.boundingRect(cnt)
aspect_ratio = float(w)/h
area = cv2.contourArea(cnt)
rect_area = w*h
extent = float(area)/rect_area
if abs(aspect_ratio - 1) < 0.1 and extent > 0.7:
boxes.append((x,y,w,h))
And here's an example of what came out when cutting out the boundary boxes from the original image.

How to remove noise artifacts from an image for OCR with Python OpenCV?

I have subsets of images that contains digits. Each subset is read by Tesseract for OCR. Unfortunately for some images the cropping from the original image isn't optimal.
Hence some artifacts/remains at the top and bottom of the image and hamper Tesseract to recognize characters on the image. Then I would like to get rid of these artifacts and get to a similar result:
First I considered a simple approach: I set the first row of pixels as the reference: if an artifact was found along the x-axis (i.e., a white pixel if the image is binarized), I removed it along the y-axis until the next black pixel. Code for this approach is the one below:
import cv2
inp = cv2.imread("testing_file.tif")
inp = cv2.cvtColor(inp, cv2.COLOR_BGR2GRAY)
_,inp = cv2.threshold(inp, 150, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
ax = inp.shape[1]
ay = inp.shape[0]
out = inp.copy()
for i in range(ax):
j = 0
while j in range(ay):
if out[j,i] == 255:
out[j,i] = 0
else:
break
j+=1
out = cv2.bitwise_not(out)
cv2.imwrite('output.png',out)
But the result isn't good at all:
Then I stumbled across the flood_fill function from scipy (here) but found out it was too much time consuming and still not efficient. A similar question was asked on SO here but didn't help so much. Maybe a k-nearest neighbor approach could be considered? I also found out that methods that consist in merging neighbors pixels under some criteria were called growing methods, among which the single linkage is the most common (here).
What would you recommend to remove the upper and lower artifacts?
Here's a simple approach:
Convert image to grayscale
Otsu's threshold to obtain binary image
Cerate special horizontal kernel and dilate
Detect horizontal lines, sort for largest contour, and draw onto mask
Bitwise-and
After converting to grayscale, we Otsu's threshold to get a binary image
# Read in image, convert to grayscale, and Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
Next we create a long horizontal kernel and dilate to connect the numbers together
# Create special horizontal kernel and dilate
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (70,1))
dilate = cv2.dilate(thresh, horizontal_kernel, iterations=1)
From here we detect horizontal lines and sort for the largest contour. The idea is that the largest contour will be the middle section of the numbers where the numbers are all "complete". Any smaller contours will be partial or cut off numbers so we filter them out here. We draw this largest contour onto a mask
# Detect horizontal lines, sort for largest contour, and draw on mask
mask = np.zeros(image.shape, dtype=np.uint8)
detected_lines = cv2.morphologyEx(dilate, cv2.MORPH_OPEN, horizontal_kernel, iterations=1)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
break
Now that we have the outline of the desired numbers, we simply bitwise-and with our original image and color the background white to get our result
# Bitwise-and to get result and color background white
mask = cv2.cvtColor(mask,cv2.COLOR_BGR2GRAY)
result = cv2.bitwise_and(image,image,mask=mask)
result[mask==0] = (255,255,255)
Full code for completeness
import cv2
import numpy as np
# Read in image, convert to grayscale, and Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Create special horizontal kernel and dilate
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (70,1))
dilate = cv2.dilate(thresh, horizontal_kernel, iterations=1)
# Detect horizontal lines, sort for largest contour, and draw on mask
mask = np.zeros(image.shape, dtype=np.uint8)
detected_lines = cv2.morphologyEx(dilate, cv2.MORPH_OPEN, horizontal_kernel, iterations=1)
cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for c in cnts:
cv2.drawContours(mask, [c], -1, (255,255,255), -1)
break
# Bitwise-and to get result and color background white
mask = cv2.cvtColor(mask,cv2.COLOR_BGR2GRAY)
result = cv2.bitwise_and(image,image,mask=mask)
result[mask==0] = (255,255,255)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.imshow('result', result)
cv2.waitKey()

How to preprocess an image to remove noise and extract text Python?

I have a really noisy image that I have to perform OCR on. The snippet attached is part of a bigger image. How would I go about pre-processing this image in the most optimal way?
I have already tried pre-processing the image using Otsu Binarization, smoothing the image using various filters and Erosion-Dilation. I've also used connectedComponentWithStats to remove the noise in the image. But none of this helps with the processing of the smudged text
Edit - This text needs to be pre-processed in order to perform OCR
img = cv2.imread(file,0)
gaus = cv2.GaussianBlur(img,(5,5),0)
_, blackAndWhite = cv2.threshold(gaus, 127, 255, cv2.THRESH_BINARY_INV)
nlabels, labels, stats, centroids = cv2.connectedComponentsWithStats(blackAndWhite, None, None, None, 8, cv2.CV_32S)
sizes = stats[1:, -1]
img2 = np.zeros((labels.shape), np.uint8)
for i in range(0, nlabels - 1):
if sizes[i] >= 50:
img2[labels == i + 1] = 255
res = cv2.bitwise_not(img2)
(thresh, img_bin) = cv2.threshold(img, 128, 255,cv2.THRESH_BINARY| cv2.THRESH_OTSU)
img_bin = 255-img_bin
kernel_length = np.array(img).shape[1]//80
verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)
img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
alpha = 0.5
beta = 1.0 - alpha
img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
(thresh, img_final_bin) = cv2.threshold(img_final_bin, 128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
Here's an approach to remove the noise
Convert image to grayscale and Otsu's threshold
Perform morphological transformations to smooth image
Find contours and filter using contour area
Invert image
After converting to grayscale, we Otsu's threshold to obtain a binary image
From here we create a kernel and perform morphological opening to smooth the image. You could try using different kernels sizes here to remove more noise but increasing the kernel size will also remove text detail
Next we find contours and filter using contour area with a maximum threshold area to remove the small particles. We fill in the contour to effectively remove the noise
Finally we invert the image to get our result
import cv2
import numpy as np
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
if area < 150:
cv2.drawContours(opening, [c], -1, (0,0,0), -1)
result = 255 - opening
cv2.imshow('thresh', thresh)
cv2.imshow('opening', opening)
cv2.imshow('result', result)
cv2.waitKey()

Categories

Resources