noise reduction for character recognition improvements? - python

I'm using a KNN to detect characters, however, it is sensitive to background noise. the image is basically what I'm using and I have developed a mini script to try get the best threshold image. would anyone have any suggestions/ changed to get better results? make the X more viewable. ( attached version of current output and what the input is)
import cv2
import numpy as np
image = cv2.imread("resize.png")
img = image
img = cv2.blur(img, (5, 5))
imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # get grayscale image
imgBlurred = cv2.GaussianBlur(imgGray, (11, 11), 5) # blur
# cv2.imshow("test",imgBlurred)
imgThresh = cv2.adaptiveThreshold(imgBlurred, # input image
255, # make pixels that pass the threshold full white
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
# use gaussian rather than mean, seems to give better results
cv2.THRESH_BINARY_INV,
# invert so foreground will be white, background will be black
13, # size of a pixel neighborhood used to calculate threshold value
2) # constant subtracted from the mean or weighted mean
imgThreshCopy = imgThresh.copy() # make a copy of the thresh image, this in necessary b/c findContours modifies the image
kernel = np.ones((3, 3), np.uint8)
kernel2 = np.ones((7, 7), np.uint8)
kernel3 = np.ones((1, 1), np.uint8)
imgThreshCopy = cv2.morphologyEx(imgThreshCopy, cv2.MORPH_OPEN, kernel)
imgThreshCopy = cv2.morphologyEx(imgThreshCopy, cv2.MORPH_CLOSE, kernel2)
imgThreshCopy = cv2.dilate(imgThreshCopy, kernel3, iterations=150)
res = imgThreshCopy
cv2.imwrite("test.jpg", res)
cv2.imshow("image", res)
cv2.waitKey(0)
output
input

Related

Unable to read image text with python tesseract and OpenCV

I am trying read text from this
using Python with OpenCV. However, it is not able to read it.
import cv2 as cv
import numpy as np
from matplotlib import pyplot as plt
img=cv.imread(file_path,0)
img = cv.medianBlur(img,5)
ret,th1 = cv.threshold(img,127,255,cv.THRESH_BINARY)
th2 =cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_MEAN_C,\
cv.THRESH_BINARY,11,2)
th3 = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv.THRESH_BINARY,11,2)
titles = ['Original Image', 'Global Thresholding (v = 127)',
'Adaptive Mean Thresholding', 'Adaptive Gaussian Thresholding']
images = [img, th1, th2, th3]
for i in range(4):
plt.subplot(2,2,i+1),plt.imshow(images[i],'gray')
plt.title(titles[i])
plt.xticks([]),plt.yticks([])
plt.show()
anyway to do this?
Instead of working on the grayscale image, working on saturation channel of the HSV color space makes the subsequent steps easier.
img = cv2.imread(image_path_to_captcha)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
s_component = hsv[:,:,1]
s_component
Next, apply a Gaussian blur of appropriate kernel size and sigma value, and later threshold.
blur = cv2.GaussianBlur(s_component,(7,7), 7)
ret,th3 = cv2.threshold(blur,127,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
th3
Next, finding contours in the image above and preserving those above a certain area threshold in the black image variable which will be used as mask later on.
contours, hierarchy = cv2.findContours(th3, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
black = np.zeros((img.shape[0], img.shape[1]), np.uint8)
for contour in contours:
if cv2.contourArea(contour) >600 :
cv2.drawContours(black, [contour], 0, 255, -1)
black
Using the black image variable as mask over the threshold image
res = cv2.bitwise_and(th3, th3, mask = black)
res
Finally, applying morphological thinning to the above result
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
erode = cv2.erode(res, kernel, iterations=1)
erode
The end result is not what you expect. You can try experimenting different morphology operations prior to drawing contours as well.
EDIT
You can perform distance transform on the above image and use the result:
dist = cv2.distanceTransform(res, cv2.DIST_L2, 3)
dst = cv2.normalize(dist, dst=None, alpha=0, beta=255,norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
dst

Flood fill function not producing good results

I applied the floodfill function in opencv to extract the foreground from the background but some of the objects in the image were not recognized by the algorithm so I would like to know how I can improve my detections and what modifications are necessary.
image = cv2.imread(args["image"])
image = cv2.resize(image, (800, 800))
h,w,chn = image.shape
ratio = image.shape[0] / 800.0
orig = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(gray, 75, 200)
# show the original image and the edge detected image
print("STEP 1: Edge Detection")
cv2.imshow("Image", image)
cv2.imshow("Edged", edged)
warped1 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
T = threshold_local(warped1, 11, offset = 10, method = "gaussian")
warped1 = (warped1 > T).astype("uint8") * 255
print("STEP 3: Apply perspective transform")
seed = (10, 10)
foreground, birdEye = floodFillCustom(image, seed)
cv2.circle(birdEye, seed, 50, (0, 255, 0), -1)
cv2.imshow("originalImg", birdEye)
cv2.circle(birdEye, seed, 100, (0, 255, 0), -1)
cv2.imshow("foreground", foreground)
cv2.imshow("birdEye", birdEye)
gray = cv2.cvtColor(foreground, cv2.COLOR_BGR2GRAY)
cv2.imshow("gray", gray)
cv2.imwrite("gray.jpg", gray)
threshImg = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)[1]
h_threshold,w_threshold = threshImg.shape
area = h_threshold*w_threshold
cv2.imshow("threshImg", threshImg)[![enter image description here][1]][1]
The floodFillCustom function is as follows -
def floodFillCustom(originalImage, seed):
originalImage = np.maximum(originalImage, 10)
foreground = originalImage.copy()
cv2.floodFill(foreground, None, seed, (0, 0, 0),
loDiff=(10, 10, 10), upDiff=(10, 10, 10))
return [foreground, originalImage]
A little bit late, but here's an alternative solution for segmenting the tools. It involves converting the image to the CMYK color space and extracting the K (Key) component. This component can be thresholded to get a nice binary mask of the tools, the procedure is very straightforward:
Convert the image to the CMYK color space
Extract the K (Key) component
Threshold the image via Otsu's thresholding
Apply some morphology (a closing) to clean up the mask
(Optional) Get bounding rectangles of all the tools
Let's see the code:
# Imports
import cv2
import numpy as np
# Read image
imagePath = "C://opencvImages//"
inputImage = cv2.imread(imagePath+"DAxhk.jpg")
# Create deep copy for results:
inputImageCopy = inputImage.copy()
# Convert to float and divide by 255:
imgFloat = inputImage.astype(np.float) / 255.
# Calculate channel K:
kChannel = 1 - np.max(imgFloat, axis=2)
# Convert back to uint 8:
kChannel = (255*kChannel).astype(np.uint8)
The first step is to convert the BGR image to CMYK. There's no direct conversion in OpenCV for this, so I applied directly the conversion formula. We can get every color space component from that formula, but we are only interested on the K channel. The conversion is easy, but we need to be careful with the data types. We need to operate on float arrays. After getting the K channel, we convert back the image to an unsigned 8-bit array, this is the resulting image:
Let's threshold this image using Otsu's thresholding method:
# Threshold via Otsu:
_, binaryImage = cv2.threshold(kChannel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
This yields the following binary image:
Looks very nice! Additionally, we can clean it up a little bit (joining the little gaps) using a morphological closing. Let's apply a rectangular structuring element of size 5 x 5 and use 2 iterations:
# Use a little bit of morphology to clean the mask:
# Set kernel (structuring element) size:
kernelSize = 5
# Set morph operation iterations:
opIterations = 2
# Get the structuring element:
morphKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernelSize, kernelSize))
# Perform closing:
binaryImage = cv2.morphologyEx(binaryImage, cv2.MORPH_CLOSE, morphKernel, None, None, opIterations, cv2.BORDER_REFLECT101)
Which results in this:
Very cool. What follows is optional. We can get the bounding rectangles for every tool by looking for the outer (external) contours:
# Find the contours on the binary image:
contours, hierarchy = cv2.findContours(binaryImage, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Look for the outer bounding boxes (no children):
for _, c in enumerate(contours):
# Get the contours bounding rectangle:
boundRect = cv2.boundingRect(c)
# Get the dimensions of the bounding rectangle:
rectX = boundRect[0]
rectY = boundRect[1]
rectWidth = boundRect[2]
rectHeight = boundRect[3]
# Set bounding rectangle:
color = (0, 0, 255)
cv2.rectangle( inputImageCopy, (int(rectX), int(rectY)),
(int(rectX + rectWidth), int(rectY + rectHeight)), color, 5 )
cv2.imshow("Bounding Rectangles", inputImageCopy)
cv2.waitKey(0)
Which produces the final image:

Preprocess images using OpenCV for pytesseract OCR

I want to use OCR (pytesseract) to recognize the text located in images like these:
I have thousands of these arrows. Until now the procedure is as follows: I first resize the image (for another process). Then I crop the image to get rid of the most part of the arrow. Next I draw a white rectangle as a frame to remove further noise but still have distance between text and image borders for better text recognition. I resize the image again to ensure a height of capital letters to ~30 px (https://groups.google.com/forum/#!msg/tesseract-ocr/Wdh_JJwnw94/24JHDYQbBQAJ). Finally I binarize the image with a threshold of 150.
Full code:
import cv2
image_file = '001.jpg'
# load the input image and grab the image dimensions
image = cv2.imread(image_file, cv2.IMREAD_GRAYSCALE)
(h_1, w_1) = image.shape[:2]
# resize the image and grab the new image dimensions
image = cv2.resize(image, (int(w_1*320/h_1), 320))
(h_1, w_1) = image.shape
# crop image
image_2 = image[70:h_1-70, 20:w_1-20]
# get image_2 height, width
(h_2, w_2) = image_2.shape
# draw white rectangle as a frame around the number -> remove noise
cv2.rectangle(image_2, (0, 0), (w_2, h_2), (255, 255, 255), 40)
# resize image, that capital letters are ~ 30 px in height
image_2 = cv2.resize(image_2, (int(w_2*50/h_2), 50))
# image binarization
ret, image_2 = cv2.threshold(image_2, 150, 255, cv2.THRESH_BINARY)
# save image to file
cv2.imwrite('processed_' + image_file, image_2)
# tesseract part can be commented out
import pytesseract
config_7 = ("-c tessedit_char_whitelist=0123456789AB --oem 1 --psm 7")
text = pytesseract.image_to_string(image_2, config=config_7)
print("OCR TEXT: " + "{}\n".format(text))
The problem is that the text located in the arrow is never centered. Sometimes I remove part of the text with the method described above (e.g. in image 50A).
Is there a method in image processing to get rid of the arrow in a more elegant way? For instance using contour detection and deletion? I am more interested in the OpenCV part than the tesseract part to recognize the text.
Any help is appreciated.
If you look at the pictures you will see that there is a white arrow in the image which is also the biggest contour (especially if you draw a black border on the image). If you make a blank mask and draw the arrow (biggest contour on the image) then erode it a little bit you can perform a per element bitwise conjunction of the actual image and eroded mask. If it is not clear look at the bottom code and comments and you will see that it is actually pretty simple.
# imports
import cv2
import numpy as np
img = cv2.imread("number.png") # read image
# you can resize the image here if you like - it should still work for both sizes
h, w = img.shape[:2] # get the actual images height and width
img = cv2.resize(img, (int(w*320/h), 320))
h, w = img.shape[:2]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # transform to grayscale
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1] # perform OTSU threhold
cv2.rectangle(thresh, (0, 0), (w, h), (0, 0, 0), 2)
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # search for contours
max_cnt = max(contours, key=cv2.contourArea) # select biggest one
mask = np.zeros((h, w), dtype=np.uint8) # create a black mask
cv2.drawContours(mask, [max_cnt], -1, (255, 255, 255), -1) # draw biggest contour on the mask
kernel = np.ones((15, 15), dtype=np.uint8) # make a kernel with appropriate values - in both cases (resized and original) 15 is ok
erosion = cv2.erode(mask, kernel, iterations=1) # erode the mask with given kernel
reverse = cv2.bitwise_not(img.copy()) # reversed image of the actual image 0 becomes 255 and 255 becomes 0
img = cv2.bitwise_and(reverse, reverse, mask=erosion) # per-element bit-wise conjunction of the actual image and eroded mask (erosion)
img = cv2.bitwise_not(img) # revers the image again
# save image to file and display
cv2.imwrite("res.png", img)
cv2.imshow("img", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
You can try simple Python script:
import cv2
import numpy as np
img = cv2.imread('mmubS.png', cv2.IMREAD_GRAYSCALE)
thresh = cv2.threshold(img, 200, 255, cv2.THRESH_BINARY_INV )[1]
im_flood_fill = thresh.copy()
h, w = thresh.shape[:2]
im_flood_fill=cv2.rectangle(im_flood_fill, (0,0), (w-1,h-1), 255, 2)
mask = np.zeros((h + 2, w + 2), np.uint8)
cv2.floodFill(im_flood_fill, mask, (0, 0), 0)
im_flood_fill = cv2.bitwise_not(im_flood_fill)
cv2.imshow('clear text', im_flood_fill)
cv2.imwrite('text.png', im_flood_fill)
Result:

How could crop the image along with the edge using python-opencv

I want to use opencv Canny edge detection to crop the image along with the edge. But I have wrote down the edge detection, but still don't know how to
crop the image along with the edge.
import cv2
import numpy as np
image = cv2.imread("test.png")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # convert image to gray
blur_image = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blur_image, 180, 900)
# cv2.imshow("Image", edged)
# cv2.waitKey(0)
# applying close function
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
# opened = cv2.morphologyEx(edged, cv2.MORPH_OPEN, kernel)
close = cv2.morphologyEx(edged, cv2.MORPH_CLOSE, kernel)
# cv2.imshow("Closed", close)
# cv2.waitKey(0)
_,contours, hierarchy = cv2.findContours( close.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# work files
# cv2.drawContours(image,contours,-1,(0,0,255),3)
#
# cv2.imshow("result", image)
# mask = np.zeros(image.shape,dtype=np.uint8)
#mask = np.ones((image.height,image.width),dtype=np.uint8)
#m#ask[:,:]= 0
# croped = np.zeros()
cv2.drawContours(image,contours,-1,(0,0,255),3)
cv2.imshow("result", image)
cv2.waitKey(0)
I just want to crop the image along with the red color of the edge.

Remove noise from threshold image opencv python

I am trying to get the corners of the box in image. Following are example images, their threshold results and on the right after the arrow are the results that I need. You might have seen these images before too on slack because I am using these images for my example questions on slack.
Following is the code that allows me reach till the middle image.
import cv2
import numpy as np
img_file = 'C:/Users/box.jpg'
img = cv2.imread(img_file, cv2.IMREAD_COLOR)
img = cv2.blur(img, (5, 5))
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
thresh0 = cv2.adaptiveThreshold(s, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
thresh1 = cv2.adaptiveThreshold(v, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
thresh2 = cv2.adaptiveThreshold(v, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
thresh = cv2.bitwise_or(thresh0, thresh1)
cv2.imshow('Image-thresh0', thresh0)
cv2.waitKey(0)
cv2.imshow('Image-thresh1', thresh1)
cv2.waitKey(0)
cv2.imshow('Image-thresh2', thresh2)
cv2.waitKey(0)
Is there any method in opencv that can do it for me. I tried dilation cv2.dilate() and erosion cv2.erode() but it doesn't work in my cases.Or if not then what could be alternative ways of doing it ?
Thanks
Canny version of the image ... On the left with low threshold and on the right with high threshold
Below is a python implementation of #dhanushka's approach
import cv2
import numpy as np
# load color image
im = cv2.imread('input.jpg')
# smooth the image with alternative closing and opening
# with an enlarging kernel
morph = im.copy()
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
# take morphological gradient
gradient_image = cv2.morphologyEx(morph, cv2.MORPH_GRADIENT, kernel)
# split the gradient image into channels
image_channels = np.split(np.asarray(gradient_image), 3, axis=2)
channel_height, channel_width, _ = image_channels[0].shape
# apply Otsu threshold to each channel
for i in range(0, 3):
_, image_channels[i] = cv2.threshold(~image_channels[i], 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)
image_channels[i] = np.reshape(image_channels[i], newshape=(channel_height, channel_width, 1))
# merge the channels
image_channels = np.concatenate((image_channels[0], image_channels[1], image_channels[2]), axis=2)
# save the denoised image
cv2.imwrite('output.jpg', image_channels)
The above code doesn't give good results if the image you are dealing are invoices(or has large amount of text on a white background).
In order to get good results on such images, remove
gradient_image = cv2.morphologyEx(morph, cv2.MORPH_GRADIENT, kernel)
and pass morph obj to the split function and remove the ~ symbol inside for loop
You can smooth the image to some degree by applying alternative morphological closing and opening operations with an enlarging structuring element.Here are the original and smoothed versions.
Then take the morphological gradient of the image.
Then apply Otsu threshold to each of the channels, and merge those channels.
If your image sizes are different (larger), you might want to either change some of the parameters of the code or resize the images roughly to the sizes used here. The code is in c++ but it won't be difficult to port it to python.
/* load color image */
Mat im = imread(INPUT_FOLDER_PATH + string("2.jpg"));
/*
smooth the image with alternative closing and opening
with an enlarging kernel
*/
Mat morph = im.clone();
for (int r = 1; r < 4; r++)
{
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(2*r+1, 2*r+1));
morphologyEx(morph, morph, CV_MOP_CLOSE, kernel);
morphologyEx(morph, morph, CV_MOP_OPEN, kernel);
}
/* take morphological gradient */
Mat mgrad;
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(morph, mgrad, CV_MOP_GRADIENT, kernel);
Mat ch[3], merged;
/* split the gradient image into channels */
split(mgrad, ch);
/* apply Otsu threshold to each channel */
threshold(ch[0], ch[0], 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
threshold(ch[1], ch[1], 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
threshold(ch[2], ch[2], 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
/* merge the channels */
merge(ch, 3, merged);
Not sure about how robust that solution will be but the idea is pretty simple. The edges of the box should be more pronounced than all the other high frequencies on those images. Thus using some basic preprocessing should allow to emphasize them.
I used your code to make a prototype but the contour finding doesn't have to be the right path. Also sorry for the iterative unsharp masking - didn't have time to adjust the parameters.
import cv2
import numpy as np
def unsharp_mask(img, blur_size = (9,9), imgWeight = 1.5, gaussianWeight = -0.5):
gaussian = cv2.GaussianBlur(img, (5,5), 0)
return cv2.addWeighted(img, imgWeight, gaussian, gaussianWeight, 0)
img_file = 'box.png'
img = cv2.imread(img_file, cv2.IMREAD_COLOR)
img = cv2.blur(img, (5, 5))
img = unsharp_mask(img)
img = unsharp_mask(img)
img = unsharp_mask(img)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
thresh = cv2.adaptiveThreshold(s, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
_, contours, heirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(contours, key = cv2.contourArea, reverse = True)
#for cnt in cnts:
canvas_for_contours = thresh.copy()
cv2.drawContours(thresh, cnts[:-1], 0, (0,255,0), 3)
cv2.drawContours(canvas_for_contours, contours, 0, (0,255,0), 3)
cv2.imshow('Result', canvas_for_contours - thresh)
cv2.imwrite("result.jpg", canvas_for_contours - thresh)
cv2.waitKey(0)
method 1: using AI models
always try image segmentation models if feasible to your project, robust models will work better on a wider domain than any thresholding technique.
for example Rembg , try online on a Huggingface space
here are the results:
method 2:
almost similar to other answers but with another approach.
instead of closing and opening to blur the "noise", we use cv2.bilateralFilter which is similar to photoshop's surface blur, read more
im = cv2.imread('1.png')
blur = cv2.bilateralFilter(im,21,75,75)
use sobel filter to find edges
from skimage.filters import sobel
gray = cv2.cvtColor(blur,cv2.COLOR_BGR2GRAY)
mm = sobel(gray)
mm = ((mm/mm.max())*255).astype('uint8')
apply thresholding, I use Sauvola Thresholding here:
from skimage.filters import threshold_sauvola
mm2 = np.invert(mm)
thresh_sauvola = threshold_sauvola(mm2, window_size=51)
th = mm2 < thresh_sauvola
dilate and fill holes:
def fill_hole(input_mask):
h, w = input_mask.shape
canvas = np.zeros((h + 2, w + 2), np.uint8)
canvas[1:h + 1, 1:w + 1] = input_mask.copy()
mask = np.zeros((h + 4, w + 4), np.uint8)
cv2.floodFill(canvas, mask, (0, 0), 1)
canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool)
return ~canvas | input_mask
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
th2 =cv2.morphologyEx((th*255).astype('uint8'), cv2.MORPH_DILATE, kernel)
filled = fill_hole(th2==255)

Categories

Resources