Input image:
i want to extract the data from the image ( ocr )
code which i tried:
import cv2
import textract
import numpy as np
img = cv2.imread('/home/ajay/Desktop/name.jpg',0)
# img = cv2.imread('path_to_your_image', 0)
_, blackAndWhite = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
nlabels, labels, stats, centroids = cv2.connectedComponentsWithStats(blackAndWhite, None, None, None, 8, cv2.CV_32S)
sizes = stats[1:, -1] #get CC_STAT_AREA component
img2 = np.zeros((labels.shape), np.uint8)
for i in range(0, nlabels - 1):
if sizes[i] >= 50: #filter small dotted regions
img2[labels == i + 1] = 255
res = cv2.bitwise_not(img2)
cv2.imwrite('ress.png', res)
a = textract.process('ress.png',method = 'tesseract')
a = a.decode()
print(a)
A simple method is:
Apply a sharpening kernel
Otsu's threshold
Apply slight Gaussian blur
Invert image
OCR
Here's a visualization of the steps:
Input image
Sharpen
Otsu's threshold
Slight Gaussian blur
Invert image
Here's the OCR results using Pytesseract
DST INTERNATIONAL D-307# 3266 01 Dec 2007. HowellJerde Jan!
2007" 125802AM RafaelaBoyer Keon3#gmnil.com Fhvio Abernathy Sr.
Code
import cv2
import numpy as np
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
sharpen = cv2.filter2D(gray, -1, kernel)
thresh = cv2.threshold(sharpen, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
blur = cv2.GaussianBlur(thresh, (3,3), 0)
invert = 255 - blur
data = pytesseract.image_to_string(invert, lang='eng',config='--psm 6')
print(data)
cv2.imshow('sharpen', sharpen)
cv2.imshow('thresh', thresh)
cv2.imshow('blur', blur)
cv2.imshow('invert', invert)
cv2.waitKey()
Related
My code it's not detecting well binary image!
LpImg = cv2.imread('/content/drive/My Drive/TESTING/Placas_detectadas/CPVL92.png')
if (len(LpImg)): #check if there is at least one license image
# Scales, calculates absolute values, and converts the result to 8-bit.
plate_image = cv2.convertScaleAbs(LpImg[0], alpha=(255.0))
plate_image = LpImg #image_cropped
# convert to grayscale and blur the image
gray = cv2.cvtColor(plate_image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(7,7),0)
# Applied inversed thresh_binary
thresh_inv = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 39, 1)
#binary = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel3 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
thre_mor = cv2.morphologyEx(thresh_inv, cv2.MORPH_DILATE, kernel3)
# visualize results
fig = plt.figure(figsize=(12,7))
plt.rcParams.update({"font.size":18})
grid = gridspec.GridSpec(ncols=2,nrows=3,figure = fig)
plot_image = [plate_image, gray, blur, thresh_inv,thre_mor]
plot_name = ["plate_image","gray","blur","binary","dilation"]
for i in range(len(plot_image)):
fig.add_subplot(grid[i])
plt.axis(False)
plt.title(plot_name[i])
if i ==0:
plt.imshow(plot_image[i])
else:
plt.imshow(plot_image[i],cmap="gray")
This is the image:
With this results:
If I use adaptive threshhold
binary = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
to this line
thresh_inv = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 39, 1)
I have got this result:
Why this is happening? How can I solve it?
I was thinking use this:
LpImg = cv2.imread('/content/image.png')
# Set scaling factors and add
gamma1 = 0.3
gamma2 = 1.5
Iout = gamma1*Ioutlow[0:rows,0:cols] + gamma2*Iouthigh[0:rows,0:cols]
# Anti-log then rescale to [0,1]
Ihmf = np.expm1(Iout)
Ihmf = (Ihmf - np.min(Ihmf)) / (np.max(Ihmf) - np.min(Ihmf))
Ihmf2 = np.array(255*LpImg, dtype="uint8")
# Threshold the image - Anything below intensity 65 gets set to white
Ithresh = Ihmf2 < 65 #65
Ithresh = 255*Ithresh.astype("uint8")
Ihmf2 = np.array(255*Ihmf, dtype="uint8")
# Threshold the image - Anything below intensity 65 gets set to white
Ithresh = Ihmf2 < 65 #65
Ithresh = 255*Ithresh.astype("uint8")
That have this result:
But I still want to use this filters:
Grayscale
Blur
Binarization
Segmentation
Another approach is to use division normalization in Python/OpenCV.
Read the input
Convert to gray
Apply morphology dilation
Divide the input by the dilated image
Threshold
Save the results
Input:
import cv2
import numpy as np
# read the image
img = cv2.imread('license_chile.png')
# convert to gray
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# apply morphology
kernel = cv2.getStructuringElement(cv2.MORPH_RECT , (75,75))
smooth = cv2.morphologyEx(gray, cv2.MORPH_DILATE, kernel)
# divide gray by morphology image
division = cv2.divide(gray, smooth, scale=255)
# threshold
result = cv2.threshold(division, 0, 255, cv2.THRESH_OTSU )[1]
# save results
cv2.imwrite('license_chile_thresh.jpg',result)
# show results
cv2.imshow('smooth', smooth)
cv2.imshow('division', division)
cv2.imshow('result', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
I am trying to remove the background of the image (the background can be any other color or contain noise, dust, etc)
This is the image:
And this is my code:
import cv2
img = cv2.imread('image.jpg', 0)
norm_img = np.zeros(img.shape)
normim = cv2.normalize(img, norm_img, 0, 255, cv2.NORM_MINMAX)
_, thresh1 = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
kernel = np.ones((5,5),np.uint8)
opening = cv2.morphologyEx(thresh1, cv2.MORPH_OPEN, kernel)
mask_inv = cv2.bitwise_not(opening)
seg = cv2.add(mask_inv, normim)
Output:
The code is about to normalize the original image then add with the image that applied morphological which is a binary image.
Result of normalizing the original image and applying morphological the original image:
So what happens with my code, how can I remove the background?
You can do that using Numpy and Python/OpenCV as follows:
Input:
Mask:
import cv2
import numpy as np
# read image
img = cv2.imread('fingerprint.jpg')
# read mask as grayscale
mask = cv2.imread('mask.jpg', cv2.IMREAD_GRAYSCALE)
# threshold mask
thresh = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
# apply mask to image
result = img.copy()
result[thresh==0] = (255,255,255)
# save results
cv2.imwrite('fingerprint_masked.jpg', result)
cv2.imshow('masked image', result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:
You can try use percentile for normalization.
import cv2
from numpy import percentile
img = cv2.imread('mSEsr.jpg', cv2.IMREAD_GRAYSCALE)
cv2.normalize(img, img, 0, 255, cv2.NORM_MINMAX)
lower=percentile(img, 5)
upper=percentile(img,50)
cv2.normalize(img, img, -lower, 255+255-upper, cv2.NORM_MINMAX) # tune parameters
cv2.imwrite('finger_norm.png', img)
Result:
I'm using pytesseract (0.3.2) with openCV (4.1.2) to identify digits in images. While image_to_string is working, image_to_data and image_to_boxes are not. I need to be able to draw the bounding boxes on the images and this has stumped me. I've tried different images, older versions of pytesseract, etc. I'm using Windows and Jupyter Notebooks.
import cv2
import pytesseract
#erosion
def erode(image):
kernel = np.ones((5,5),np.uint8)
return cv2.erode(image, kernel, iterations = 1)
#grayscale
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#thresholding
def thresholding(image):
#return cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
return cv2.threshold(image, 200, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
img = cv2.imread('my_image.jpg')
pytesseract.pytesseract.tesseract_cmd = r'C:\mypath\tesseract.exe'
gray = get_grayscale(img)
thresh = thresholding(gray)
erode = remove_noise(thresh)
custom_config = r'-c tessedit_char_whitelist=0123456789 --psm 6'
print(pytesseract.image_to_string(erode, config=custom_config))
cv2.imwrite("test.jpg", erode)
#these return nothing
print(pytesseract.image_to_boxes(Image.open('test.jpg')))
print(pytesseract.image_to_data(Image.open('test.jpg')))
Instead of using image_to_boxes, an alternative approach is to simply find contours with cv2.findContours, obtain the bounding rectangle coordinates with cv2.boundingRect, and draw the bounding box with cv2.rectangle
Using this sample input image
Drawn boxes
Result from OCR
1234567890
Code
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Draw bounding boxes
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
# OCR
data = pytesseract.image_to_string(255 - thresh, lang='eng',config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.waitKey()
Please try the following code:
from pytesseract import Output
import pytesseract
import cv2
image = cv2.imread("my_image.jpg")
#swap color channel ordering from BGR (OpenCV’s default) to RGB (compatible with Tesseract and pytesseract).
# By default OpenCV stores images in BGR format and since pytesseract assumes RGB format,
# we need to convert from BGR to RGB format/mode:
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
pytesseract.pytesseract.tesseract_cmd = r'C:\mypath\tesseract.exe'
custom_config = r'-c tessedit_char_whitelist=0123456789 --psm 6'
results = pytesseract.image_to_data(rgb, output_type=Output.DICT,lang='eng',config=custom_config)
boxresults = pytesseract.image_to_boxes(rgb,output_type=Output.DICT,lang='eng',config=custom_config)
print(results)
print(boxresults)
for i in range(0, len(results["text"])):
# extract the bounding box coordinates of the text region from the current result
tmp_tl_x = results["left"][i]
tmp_tl_y = results["top"][i]
tmp_br_x = tmp_tl_x + results["width"][i]
tmp_br_y = tmp_tl_y + results["height"][i]
tmp_level = results["level"][i]
conf = results["conf"][i]
text = results["text"][i]
if(tmp_level == 5):
cv2.putText(image, text, (tmp_tl_x, tmp_tl_y - 10), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 0, 255), 1)
cv2.rectangle(image, (tmp_tl_x, tmp_tl_y), (tmp_br_x, tmp_br_y), (0, 0, 255), 1)
for j in range(0,len(boxresults["left"])):
left = boxresults["left"][j]
bottom = boxresults["bottom"][j]
right = boxresults["right"][j]
top = boxresults["top"][j]
cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 1)
cv2.imshow("image",image)
cv2.waitKey(0)
I'm trying to remove the blue background color on below image.
The blue color can be light or deep.
I tried to use cv2.inRange() function but failed.
How can I do that?
import sys
import cv2
import numpy as np
image = cv2.imread(sys.argv[1])
hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
lower_blue = np.array([85, 50, 40])
upper_blue = np.array([135, 255, 255])
mask = cv2.inRange(hsv, lower_blue, upper_blue)
image[mask>0]=(255, 255, 255)
cv2.imshow('image',image)
cv2.waitKey(0)
I removed the background and also did OCR on the image. Here is the result:
And the code I used:
import pytesseract
import cv2
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe'
img = cv2.imread('idText.png')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
adaptiveThresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 35, 90)
config = '-l eng --oem 1 --psm 3'
text = pytesseract.image_to_string(adaptiveThresh, config=config)
print("Result: " + text)
cv2.imshow('original', img)
cv2.imshow('adaptiveThresh', adaptiveThresh)
cv2.waitKey(0)
Hope I helped you.
You can try thresholding to obtain a binary image and morphological transformations to smooth the text
import cv2
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray,105, 255, cv2.THRESH_BINARY_INV)[1]
thresh = 255 - thresh
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
result = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
cv2.imshow('thresh', thresh)
cv2.imshow('result', result)
cv2.imwrite('result.png', result)
cv2.waitKey()
I have an image that is blurred and contains some noise. I have tried Image Denoising from the following example.
The code to remove the Gaussian noise from a color image using the Non-local Means Denoising algorithm:
import numpy as np
import cv2
from matplotlib import pyplot as plt
img = cv2.imread("data_5/1.png")
b,g,r = cv2.split(img) # get b,g,r
rgb_img = cv2.merge([r,g,b]) # switch it to rgb
# Denoising
dst = cv2.fastNlMeansDenoisingColored(img,None,10,10,7,21)
b,g,r = cv2.split(dst) # get b,g,r
rgb_dst = cv2.merge([r,g,b]) # switch it to rgb
cv2.imshow('denoising black and white', rgb_dst)
cv2.waitKey(0)
The output of the above code:
The above code removes some noise. But here some numbers are blurred and the table lines are blurred.
Can anyone suggest me a better solution to remove blurriness and Noise from the above image?
import numpy as np
import cv2
from PIL import Image
from tesserocr import PyTessBaseAPI, RIL
if __name__ == '__main__':
image = cv2.imread('image.png',cv2.IMREAD_UNCHANGED)
image = cv2.resize(image, (0,0), fx=0.5, fy=0.5)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret,binary = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
binary = cv2.medianBlur(binary, 3)
(rows,cols) = image.shape[:2]
H = cv2.Sobel(binary, cv2.CV_8U, 1, 0, ksize = 5)
V = cv2.Sobel(binary, cv2.CV_8U, 0, 1, ksize = 5)
_,contours,_ = cv2.findContours(V, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
(x,y,w,h) = cv2.boundingRect(cnt)
if w < cols/3 and h < rows/3:
cv2.drawContours(V, [cnt], -1, 0, -1)
_,contours,_ = cv2.findContours(H, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
(x,y,w,h) = cv2.boundingRect(cnt)
if w < cols/3 and h < rows/3:
cv2.drawContours(H, [cnt], -1, 0, -1)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
V = cv2.morphologyEx(V, cv2.MORPH_DILATE, kernel, iterations = 3)
H = cv2.morphologyEx(H, cv2.MORPH_DILATE, kernel, iterations = 3)
binary[V == 255] = 0
binary[H == 255] = 0
binary = cv2.bitwise_not(binary)
api = PyTessBaseAPI()
api.SetImage(Image.fromarray(binary))
text = api.GetUTF8Text()
text = text.split()
boxes = api.GetComponentImages(RIL.TEXTLINE, True)
for i, (_, box, _, _) in enumerate(boxes):
(x,y,w,h) = box['x'], box['y'], box['w'], box['h']
cv2.rectangle(image, (x,y), (x+w,y+h), (0,0,255))
cv2.putText(image, text[i], (x,y), cv2.FONT_HERSHEY_PLAIN, 1, (255,0,0))
cv2.imshow('image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
I have tried applying a Gaussian Blur then processing it with adaptive thresholding and result removed noise in the image and blurriness.
import cv2 as cv
#input
img = cv.imread('data_5/1.png',0)
#gaussian Blur
img = cv.GaussianBlur(img, (15,15),0)
#adaptive threshold
th3 = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv.THRESH_BINARY,11,2)
cv2.imshow('Noise Filtered Image', th3)
cv2.waitKey(0)
cv.imwrite('data_5/result.png',th3)
The output of the above code:
Can anyone help me to smoothen this image? I want an output quality similar to this table below. Removal of table lines is ok.
My goal is to have an image with clear text.