Python: Letter repair using OpenCV for text recognition - python

I am trying to repair image contains letters each letter split into two halves from the middle.
original Image
After Applying the following code:
gray = cv2.cvtColor(cropped_bot, cv2.COLOR_BGR2GRAY)
new_img = ((gray >= 230)*255).astype('uint8')
bottom_image = 255-new_img
I get this Image
My Problem is to fix the line that split the letters into two part.
I have tried adaptiveThreshold
cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, 2)
But it doesn't help. How I can solve this

Here are a few steps you can follow:
Perform Otsu threshold on the gray scale image
Apply morphological close operation with a kernel
Code :
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
cv2.imshow('thresh1', thresh1)
k = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]], np.uint8)
closing = cv2.morphologyEx(thresh1, cv2.MORPH_CLOSE, k)
cv2.imshow(closing, closing)
UPDATE:
k1 = np.ones((3, 3), np.uint8)
erosion = cv2.erode(closing, k1, iterations = 1)
cv2.imshow(erosion, erosion)

Related

How to put the contours in a image numbers? (OCR)

I've been studying CV for a few months now but I ran into a problem on my second project, I needed to remove the noise from a sequence of numbers, in order to apply ocr. I managed to clean it up, but the numbers lost some internal pixels.
See the initial and current final image.
Initial
Final
Code used:
blur = cv2.GaussianBlur(img, (15, 15), 2)
hsv = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV)
lower_gray = np.array([1, 1, 1])
upper_gray = np.array([102, 102, 102])
mask = cv2.inRange(hsv, lower_gray, upper_gray)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
opened_mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
masked_img = cv2.bitwise_and(img, img, mask=opened_mask)
coloured = masked_img.copy()
coloured[mask == 0] = (255, 255, 255)
gray = cv2.cvtColor(coloured, cv2.COLOR_BGR2GRAY)
des = cv2.bitwise_not(gray)
contour, hier = cv2.findContours(des, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contour:
cv2.drawContours(des, [cnt], 0, 255, -1)
#des is the final image
Is there a better way to clean the background for OCR, or maybe close the lost pixels in the characters?
I managed to solve it, I didn't use the method you mentioned, but it was a good way, I was apprehensive that it would cause an expansion in the characters and wouldn't be good for OCR reading.
This is my final result:
for mrz in mrz_list:
try:
thresh = cv2.threshold(mrz, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
dist = cv2.distanceTransform(thresh, cv2.DIST_L2, 5)
dist = cv2.normalize(dist, dist, 0, 1.0, cv2.NORM_MINMAX)
dist = (dist * 255).astype("uint8")
thresh = cv2.threshold(dist, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
cnts = cv2.findContours(opening.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
chars = []
for c in cnts:
(x, y, w, h) = cv2.boundingRect(c)
if w >= 20 and h >= 25:
chars.append(c)
chars = np.vstack([chars[i] for i in range(0, len(chars))])
hull = cv2.convexHull(chars)
mask = np.zeros(mrz.shape[:2], dtype="uint8")
cv2.drawContours(mask, [hull], -1, 255, -1)
mask = cv2.dilate(mask, None, iterations=2)
final = cv2.bitwise_and(opening, opening, mask=mask)`
Thanks everyone.
It is easy to get a clean background by morphological dilation or closing, a smooth outline by Gaussian filtering, and then binarization. But I found no way to separate the characters.

Unable to OCR alphanumerical image with Tesseract

I'm trying to read some alphanumerical strings in python with pytesseract.
I pre-process the images to reduce noise and make them black and white, but I consistently have issues reading the digits inside the string.
original:
after cleanup:
Extracted text: WISOMW
Code used:
def convert(path):
image = cv2.imread(path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3, 3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
invert = 255 - thresh
cv2.imwrite("processed.jpg", invert)
# Perform text extraction
return pytesseract.image_to_string(invert, config="--psm 7")
I've tried different configuration options for tesseract:
oem: tried 1, 3
psm: tried different modes
tessedit_char_whitelist: limited to alphanumerical characters
I feel I'm missing something obvious given that it reliably reads the alpha characters. Any ideas of what can it be?
You were so close. A dilate helps increase white/decrease black. The resolution is low, so a small kernel is used for dilate. If you remove the _INV from your threshold step, you don't need to do another inversion.
import cv2
import numpy as np
import pytesseract
img = cv2.imread('wis9mw.jpg', cv2.IMREAD_GRAYSCALE )
img = cv2.GaussianBlur(img, (3, 3), 0)
img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
kernel = np.ones((1,1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
cv2.imwrite('processed.jpg', img)
text = pytesseract.image_to_string(img, config="--psm 6")
print(text)
gives
WIS9MW

How to use openCV to change white to black?

I have a bean on a white background damper, the issue is that the damper is not perfectly white (as in 255,255,255). I have tried using cv2.threshold() method but I kept getting a deformed image with spots. What is the best way to achieve this?
My Image
My Code
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
img[thresh == 255] = 0
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
erosion = cv2.erode(img, kernel, iterations = 1)
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
cv2.imshow("image", erosion)
In the end, the beans should only be surrounded by black images!
It could be the solution:
import cv2
import numpy as np
img = cv2.imread("data/bob.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_blured = cv2.blur(gray, (5, 5))
ret, thres = cv2.threshold(img_blured, 130, 255, cv2.THRESH_BINARY)
neg = cv2.bitwise_not(thres)
erosion = cv2.erode(neg, np.ones((6, 6), np.uint8), iterations=1)
cv2.imshow("erosion", erosion)
# ret, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
img[erosion == 0] = 0
cv2.imshow("image", img)
cv2.waitKey(0)
Here I am using cv2.threshold but for bigger range than you used, but before it I blured the image. Then I negate and erode it.
But this cuts off the bean itself a little, if this is critical, then you should use a completely different algorithm. For example, the cv2.Canny method to find the contours of a bean and somehow further process it.

Count gear (Python, OpenCV)

For a prototype I need to build a 3d model of a gear. This have a "many" number of teeth.
So I am trying to count them using OpenCV and Python. I found this (only?) post which explain how to do it in C++.
I am following the steps and, for now this is the code I made.
import numpy as np
import cv2
img = cv2.imread('C:\\Users\\Link\\Desktop\\gear.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
kernel = np.ones((3, 3), np.uint8)
img_erosion = cv2.erode(thresh, kernel, iterations=1)
edges = cv2.Canny(img_erosion, 50, 150)
img_dilate = cv2.dilate(edges, kernel, iterations=1)
cv2.imshow('i', thresh)
cv2.waitKey(0)
cv2.imshow('i', img_erosion)
cv2.waitKey(0)
cv2.imshow('i', edges)
cv2.waitKey(0)
cv2.imshow('i', img_dilate)
cv2.waitKey(0)
What stopped me from go ahead is this: the image at some point became really a mess.
This is the original on which I am working:
And this is the output of image_dilate
As you can see, the teeth at the bottom is not displayed properly, maybe because of the shaddow in the original image. How can I get rid of this ?
Because your source image is cleaner than the link your post, so you can do approx on the max-area-contour, then get half number of points, the result is 84.
Sample code:
#!/usr/bin/python3
# 2018.01.22 11:53:24 CST
import cv2
import myutils
## Read
img = cv2.imread("img13_2.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
## threshold and find contours
ret, threshed = cv2.threshold(gray, 50, 255, cv2.THRESH_BINARY_INV)
cnts= cv2.findContours(threshed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[-2]
## Find the max-area-contour
cnt = max(contours, key=cv2.contourArea)
## Approx the contour
arclen = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.002*arclen, True)
## Draw and output the result
for pt in approx:
cv2.circle(img, (pt[0][0],pt[0][1]), 3, (0,255,0), -1, cv2.LINE_AA)
msg = "Total: {}".format(len(approx)//2)
cv2.putText(img, msg, (20,40),cv2.FONT_HERSHEY_PLAIN, 2, (0,0,255), 2, cv2.LINE_AA)
## Display
cv2.imshow("res", img);cv2.waitKey()
Result:
Solved it..
This is the code. The count is wrong by one because one teeth, on the right is lower than the others and because it found two points by itself. Don't know why this happens.
Also, it has been made with another image. It's not the source I posted above as long as it is in low definition.
import numpy as np
import cv2
img = cv2.imread('C:\\Users\\Link\\Desktop\\gear.png')
img2 = cv2.imread('C:\\Users\\Link\\Desktop\\gear.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
kernel = np.ones((3, 3), np.uint8)
img_dilate = cv2.dilate(thresh, kernel, iterations=1)
im2, contours, hierarchy = cv2.findContours(img_dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cv2.drawContours(img, contours, -1, (0, 255, 0), -1)
edges = cv2.Canny(cnts, 350, 350)
cnt = contours[0]
hull = cv2.convexHull(cnt, returnPoints=False)
defects = cv2.convexityDefects(cnt, hull)
for i in range(defects.shape[0]):
s, e, f, d = defects[i, 0]
start = tuple(cnt[s][0])
end = tuple(cnt[e][0])
far = tuple(cnt[f][0])
cv2.line(edges, start, end, [0, 255, 255], 1)
circles = cv2.circle(img2, end, 5, [0, 255, 0], -1)
# print(len(defects)) - number of points
cv2.imshow('thresh', thresh)
cv2.waitKey(0)
cv2.imshow('dilate', img_dilate)
cv2.waitKey(0)
cv2.imshow('edges', edges)
cv2.waitKey(0)
cv2.imshow('cnts', cnts)
cv2.waitKey(0)
cv2.imshow('points', circles)
cv2.waitKey(0)

Remove noise from threshold image opencv python

I am trying to get the corners of the box in image. Following are example images, their threshold results and on the right after the arrow are the results that I need. You might have seen these images before too on slack because I am using these images for my example questions on slack.
Following is the code that allows me reach till the middle image.
import cv2
import numpy as np
img_file = 'C:/Users/box.jpg'
img = cv2.imread(img_file, cv2.IMREAD_COLOR)
img = cv2.blur(img, (5, 5))
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
thresh0 = cv2.adaptiveThreshold(s, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
thresh1 = cv2.adaptiveThreshold(v, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
thresh2 = cv2.adaptiveThreshold(v, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
thresh = cv2.bitwise_or(thresh0, thresh1)
cv2.imshow('Image-thresh0', thresh0)
cv2.waitKey(0)
cv2.imshow('Image-thresh1', thresh1)
cv2.waitKey(0)
cv2.imshow('Image-thresh2', thresh2)
cv2.waitKey(0)
Is there any method in opencv that can do it for me. I tried dilation cv2.dilate() and erosion cv2.erode() but it doesn't work in my cases.Or if not then what could be alternative ways of doing it ?
Thanks
Canny version of the image ... On the left with low threshold and on the right with high threshold
Below is a python implementation of #dhanushka's approach
import cv2
import numpy as np
# load color image
im = cv2.imread('input.jpg')
# smooth the image with alternative closing and opening
# with an enlarging kernel
morph = im.copy()
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
# take morphological gradient
gradient_image = cv2.morphologyEx(morph, cv2.MORPH_GRADIENT, kernel)
# split the gradient image into channels
image_channels = np.split(np.asarray(gradient_image), 3, axis=2)
channel_height, channel_width, _ = image_channels[0].shape
# apply Otsu threshold to each channel
for i in range(0, 3):
_, image_channels[i] = cv2.threshold(~image_channels[i], 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY)
image_channels[i] = np.reshape(image_channels[i], newshape=(channel_height, channel_width, 1))
# merge the channels
image_channels = np.concatenate((image_channels[0], image_channels[1], image_channels[2]), axis=2)
# save the denoised image
cv2.imwrite('output.jpg', image_channels)
The above code doesn't give good results if the image you are dealing are invoices(or has large amount of text on a white background).
In order to get good results on such images, remove
gradient_image = cv2.morphologyEx(morph, cv2.MORPH_GRADIENT, kernel)
and pass morph obj to the split function and remove the ~ symbol inside for loop
You can smooth the image to some degree by applying alternative morphological closing and opening operations with an enlarging structuring element.Here are the original and smoothed versions.
Then take the morphological gradient of the image.
Then apply Otsu threshold to each of the channels, and merge those channels.
If your image sizes are different (larger), you might want to either change some of the parameters of the code or resize the images roughly to the sizes used here. The code is in c++ but it won't be difficult to port it to python.
/* load color image */
Mat im = imread(INPUT_FOLDER_PATH + string("2.jpg"));
/*
smooth the image with alternative closing and opening
with an enlarging kernel
*/
Mat morph = im.clone();
for (int r = 1; r < 4; r++)
{
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(2*r+1, 2*r+1));
morphologyEx(morph, morph, CV_MOP_CLOSE, kernel);
morphologyEx(morph, morph, CV_MOP_OPEN, kernel);
}
/* take morphological gradient */
Mat mgrad;
Mat kernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
morphologyEx(morph, mgrad, CV_MOP_GRADIENT, kernel);
Mat ch[3], merged;
/* split the gradient image into channels */
split(mgrad, ch);
/* apply Otsu threshold to each channel */
threshold(ch[0], ch[0], 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
threshold(ch[1], ch[1], 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
threshold(ch[2], ch[2], 0, 255, CV_THRESH_BINARY | CV_THRESH_OTSU);
/* merge the channels */
merge(ch, 3, merged);
Not sure about how robust that solution will be but the idea is pretty simple. The edges of the box should be more pronounced than all the other high frequencies on those images. Thus using some basic preprocessing should allow to emphasize them.
I used your code to make a prototype but the contour finding doesn't have to be the right path. Also sorry for the iterative unsharp masking - didn't have time to adjust the parameters.
import cv2
import numpy as np
def unsharp_mask(img, blur_size = (9,9), imgWeight = 1.5, gaussianWeight = -0.5):
gaussian = cv2.GaussianBlur(img, (5,5), 0)
return cv2.addWeighted(img, imgWeight, gaussian, gaussianWeight, 0)
img_file = 'box.png'
img = cv2.imread(img_file, cv2.IMREAD_COLOR)
img = cv2.blur(img, (5, 5))
img = unsharp_mask(img)
img = unsharp_mask(img)
img = unsharp_mask(img)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
thresh = cv2.adaptiveThreshold(s, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
_, contours, heirarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(contours, key = cv2.contourArea, reverse = True)
#for cnt in cnts:
canvas_for_contours = thresh.copy()
cv2.drawContours(thresh, cnts[:-1], 0, (0,255,0), 3)
cv2.drawContours(canvas_for_contours, contours, 0, (0,255,0), 3)
cv2.imshow('Result', canvas_for_contours - thresh)
cv2.imwrite("result.jpg", canvas_for_contours - thresh)
cv2.waitKey(0)
method 1: using AI models
always try image segmentation models if feasible to your project, robust models will work better on a wider domain than any thresholding technique.
for example Rembg , try online on a Huggingface space
here are the results:
method 2:
almost similar to other answers but with another approach.
instead of closing and opening to blur the "noise", we use cv2.bilateralFilter which is similar to photoshop's surface blur, read more
im = cv2.imread('1.png')
blur = cv2.bilateralFilter(im,21,75,75)
use sobel filter to find edges
from skimage.filters import sobel
gray = cv2.cvtColor(blur,cv2.COLOR_BGR2GRAY)
mm = sobel(gray)
mm = ((mm/mm.max())*255).astype('uint8')
apply thresholding, I use Sauvola Thresholding here:
from skimage.filters import threshold_sauvola
mm2 = np.invert(mm)
thresh_sauvola = threshold_sauvola(mm2, window_size=51)
th = mm2 < thresh_sauvola
dilate and fill holes:
def fill_hole(input_mask):
h, w = input_mask.shape
canvas = np.zeros((h + 2, w + 2), np.uint8)
canvas[1:h + 1, 1:w + 1] = input_mask.copy()
mask = np.zeros((h + 4, w + 4), np.uint8)
cv2.floodFill(canvas, mask, (0, 0), 1)
canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool)
return ~canvas | input_mask
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
th2 =cv2.morphologyEx((th*255).astype('uint8'), cv2.MORPH_DILATE, kernel)
filled = fill_hole(th2==255)

Categories

Resources