So, I have been trying to enhance images so I can use text recognition, but since the images are extremely low quality and I am a beginner I haven't been able to perform a great job.
Below is the original image:
Original Image:
First I resized the image
img = cv2.imread('test.jpg')
cv2.imshow('Original',img)
cv2.waitKey(0)
img = cv2.resize(img,(500,500),interpolation = cv2.INTER_AREA)
cv2.imshow('Resized',img)
cv2.waitKey(0)
then I changed the background color to gray
img_gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
cv2.imshow('GRAY',img_gray)
cv2.waitKey(0)
I did some thresholding
ret, img_threshold = cv2.threshold(img_gray, 70, 255, cv2.THRESH_BINARY_INV)
cv2.imshow('THRESHOLD', img_threshold)
cv2.waitKey(0)
and I used morphology to get a better image
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(6,6))
opening = cv2.morphologyEx(img_threshold, cv2.MORPH_OPEN, kernel, iterations = 2)
kernel = np.ones((9,9),np.uint8)
open_img = cv2.morphologyEx(opening, cv2.MORPH_OPEN, kernel, iterations = 3)
cv2.imshow('OPENING',open_img)
cv2.waitKey(0)
My final product is below:
Final Image 2
My question is how can I remove the white chunks and the line crossing the numbers
I'm having a hard time detecting the thin black line in the following image. I tried all sort of opencv processing and applied findContours, but wasn't able to catch this. Is anyone able to help plz?
For example I tried this:
import cv2
img = cv2.imread('SampleLine.png')
# convert img to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, thresh_type, block, constant)
# apply morphology open then close
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (1,1))
blob = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (10,10))
blob = cv2.morphologyEx(blob, cv2.MORPH_CLOSE, kernel)
# invert blob
blob = (255 - blob)
cnts,hierarchy = cv2.findContours(blob, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(img, cnts, -1, (0, 0, 255), 3)
cv2.imwrite("output_Line.png", img)
I'm looking for an output like this:
I'm using a KNN to detect characters, however, it is sensitive to background noise. the image is basically what I'm using and I have developed a mini script to try get the best threshold image. would anyone have any suggestions/ changed to get better results? make the X more viewable. ( attached version of current output and what the input is)
import cv2
import numpy as np
image = cv2.imread("resize.png")
img = image
img = cv2.blur(img, (5, 5))
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # get grayscale image
imgBlurred = cv2.GaussianBlur(imgGray, (11, 11), 5) # blur
# cv2.imshow("test",imgBlurred)
imgThresh = cv2.adaptiveThreshold(imgBlurred, # input image
255, # make pixels that pass the threshold full white
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
# use gaussian rather than mean, seems to give better results
cv2.THRESH_BINARY_INV,
# invert so foreground will be white, background will be black
13, # size of a pixel neighborhood used to calculate threshold value
2) # constant subtracted from the mean or weighted mean
imgThreshCopy = imgThresh.copy() # make a copy of the thresh image, this in necessary b/c findContours modifies the image
kernel = np.ones((3, 3), np.uint8)
kernel2 = np.ones((7, 7), np.uint8)
kernel3 = np.ones((1, 1), np.uint8)
imgThreshCopy = cv2.morphologyEx(imgThreshCopy, cv2.MORPH_OPEN, kernel)
imgThreshCopy = cv2.morphologyEx(imgThreshCopy, cv2.MORPH_CLOSE, kernel2)
imgThreshCopy = cv2.dilate(imgThreshCopy, kernel3, iterations=150)
res = imgThreshCopy
cv2.imwrite("test.jpg", res)
cv2.imshow("image", res)
cv2.waitKey(0)
output
input
I have an image which contains a table in it. I need to extract the text from it. I tried to remove horizontal and vertical lines first, but it doesn't seems to work. Below is the code which I used.
import cv2
import numpy as np
img = cv2.imread(r'A13205.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 175, 255, cv2.THRESH_BINARY)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4, 4))
morph_img = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
#inverse the image, so that lines are black for masking
morph_img_inv = cv2.bitwise_not(morph_img)
#perform bitwise_and to mask the lines with provided mask
masked_img = cv2.bitwise_xor(thresh, thresh, mask = morph_img)
Input from which text must be extracted
I am trying to get the corners of the box in image. Following are example images, their threshold results and on the right after the arrow are the results that I need. You might have seen these images before too on slack because I am using these images for my example questions on slack.
Following is the code that allows me reach till the middle image.
import cv2
import numpy as np
img_file = 'C:/Users/box.jpg'
img = cv2.imread(img_file, cv2.IMREAD_COLOR)
img = cv2.blur(img, (5, 5))
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
thresh0 = cv2.adaptiveThreshold(s, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
thresh1 = cv2.adaptiveThreshold(v, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
thresh2 = cv2.adaptiveThreshold(v, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
thresh = cv2.bitwise_or(thresh0, thresh1)
cv2.imshow('Image-thresh0', thresh0)
cv2.waitKey(0)
cv2.imshow('Image-thresh1', thresh1)
cv2.waitKey(0)
cv2.imshow('Image-thresh2', thresh2)
cv2.waitKey(0)
Is there any method in opencv that can do it for me. I tried dilation cv2.dilate() and erosion cv2.erode() but it doesn't work in my cases.Or if not then what could be alternative ways of doing it ?
Thanks
Canny version of the image ... On the left with low threshold and on the right with high threshold
Below is a python implementation of #dhanushka's approach
import cv2
import numpy as np
# load color image
im = cv2.imread('input.jpg')
# smooth the image with alternative closing and opening
# with an enlarging kernel
morph = im.copy()
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
# take morphological gradient
gradient_image = cv2.morphologyEx(morph, cv2.MORPH_GRADIENT, kernel)
# split the gradient image into channels
image_channels = np.split(np.asarray(gradient_image), 3, axis=2)
channel_height, channel_width, _ = image_channels[0].shape
# apply Otsu threshold to each channel
for i in range(0, 3):
_, image_channels[i] = cv2.threshold(~image_channels[i], 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)
image_channels[i] = np.reshape(image_channels[i], newshape=(channel_height, channel_width, 1))
# merge the channels
image_channels = np.concatenate((image_channels[0], image_channels[1], image_channels[2]), axis=2)
# save the denoised image
cv2.imwrite('output.jpg', image_channels)
The above code doesn't give good results if the image you are dealing are invoices(or has large amount of text on a white background).
In order to get good results on such images, remove
gradient_image = cv2.morphologyEx(morph, cv2.MORPH_GRADIENT, kernel)
and pass morph obj to the split function and remove the ~ symbol inside for loop
You can smooth the image to some degree by applying alternative morphological closing and opening operations with an enlarging structuring element.Here are the original and smoothed versions.
Then take the morphological gradient of the image.
Then apply Otsu threshold to each of the channels, and merge those channels.
If your image sizes are different (larger), you might want to either change some of the parameters of the code or resize the images roughly to the sizes used here. The code is in c++ but it won't be difficult to port it to python.
/* load color image */
Mat im = imread(INPUT_FOLDER_PATH + string("2.jpg"));
/*
smooth the image with alternative closing and opening
with an enlarging kernel
*/
Mat morph = im.clone();
for (int r = 1; r < 4; r++)
{
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(2*r+1, 2*r+1));
morphologyEx(morph, morph, CV_MOP_CLOSE, kernel);
morphologyEx(morph, morph, CV_MOP_OPEN, kernel);
}
/* take morphological gradient */
Mat mgrad;
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(morph, mgrad, CV_MOP_GRADIENT, kernel);
Mat ch[3], merged;
/* split the gradient image into channels */
split(mgrad, ch);
/* apply Otsu threshold to each channel */
threshold(ch[0], ch[0], 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
threshold(ch[1], ch[1], 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
threshold(ch[2], ch[2], 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
/* merge the channels */
merge(ch, 3, merged);
Not sure about how robust that solution will be but the idea is pretty simple. The edges of the box should be more pronounced than all the other high frequencies on those images. Thus using some basic preprocessing should allow to emphasize them.
I used your code to make a prototype but the contour finding doesn't have to be the right path. Also sorry for the iterative unsharp masking - didn't have time to adjust the parameters.
import cv2
import numpy as np
def unsharp_mask(img, blur_size = (9,9), imgWeight = 1.5, gaussianWeight = -0.5):
gaussian = cv2.GaussianBlur(img, (5,5), 0)
return cv2.addWeighted(img, imgWeight, gaussian, gaussianWeight, 0)
img_file = 'box.png'
img = cv2.imread(img_file, cv2.IMREAD_COLOR)
img = cv2.blur(img, (5, 5))
img = unsharp_mask(img)
img = unsharp_mask(img)
img = unsharp_mask(img)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
thresh = cv2.adaptiveThreshold(s, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
_, contours, heirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(contours, key = cv2.contourArea, reverse = True)
#for cnt in cnts:
canvas_for_contours = thresh.copy()
cv2.drawContours(thresh, cnts[:-1], 0, (0,255,0), 3)
cv2.drawContours(canvas_for_contours, contours, 0, (0,255,0), 3)
cv2.imshow('Result', canvas_for_contours - thresh)
cv2.imwrite("result.jpg", canvas_for_contours - thresh)
cv2.waitKey(0)
method 1: using AI models
always try image segmentation models if feasible to your project, robust models will work better on a wider domain than any thresholding technique.
for example Rembg , try online on a Huggingface space
here are the results:
method 2:
almost similar to other answers but with another approach.
instead of closing and opening to blur the "noise", we use cv2.bilateralFilter which is similar to photoshop's surface blur, read more
im = cv2.imread('1.png')
blur = cv2.bilateralFilter(im,21,75,75)
use sobel filter to find edges
from skimage.filters import sobel
gray = cv2.cvtColor(blur,cv2.COLOR_BGR2GRAY)
mm = sobel(gray)
mm = ((mm/mm.max())*255).astype('uint8')
apply thresholding, I use Sauvola Thresholding here:
from skimage.filters import threshold_sauvola
mm2 = np.invert(mm)
thresh_sauvola = threshold_sauvola(mm2, window_size=51)
th = mm2 < thresh_sauvola
dilate and fill holes:
def fill_hole(input_mask):
h, w = input_mask.shape
canvas = np.zeros((h + 2, w + 2), np.uint8)
canvas[1:h + 1, 1:w + 1] = input_mask.copy()
mask = np.zeros((h + 4, w + 4), np.uint8)
cv2.floodFill(canvas, mask, (0, 0), 1)
canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool)
return ~canvas | input_mask
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
th2 =cv2.morphologyEx((th*255).astype('uint8'), cv2.MORPH_DILATE, kernel)
filled = fill_hole(th2==255)