Drawing bounding boxes with Pytesseract / OpenCV - python

I'm using pytesseract (0.3.2) with openCV (4.1.2) to identify digits in images. While image_to_string is working, image_to_data and image_to_boxes are not. I need to be able to draw the bounding boxes on the images and this has stumped me. I've tried different images, older versions of pytesseract, etc. I'm using Windows and Jupyter Notebooks.
import cv2
import pytesseract
#erosion
def erode(image):
kernel = np.ones((5,5),np.uint8)
return cv2.erode(image, kernel, iterations = 1)
#grayscale
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#thresholding
def thresholding(image):
#return cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
return cv2.threshold(image, 200, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
img = cv2.imread('my_image.jpg')
pytesseract.pytesseract.tesseract_cmd = r'C:\mypath\tesseract.exe'
gray = get_grayscale(img)
thresh = thresholding(gray)
erode = remove_noise(thresh)
custom_config = r'-c tessedit_char_whitelist=0123456789 --psm 6'
print(pytesseract.image_to_string(erode, config=custom_config))
cv2.imwrite("test.jpg", erode)
#these return nothing
print(pytesseract.image_to_boxes(Image.open('test.jpg')))
print(pytesseract.image_to_data(Image.open('test.jpg')))

Instead of using image_to_boxes, an alternative approach is to simply find contours with cv2.findContours, obtain the bounding rectangle coordinates with cv2.boundingRect, and draw the bounding box with cv2.rectangle
Using this sample input image
Drawn boxes
Result from OCR
1234567890
Code
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Draw bounding boxes
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
# OCR
data = pytesseract.image_to_string(255 - thresh, lang='eng',config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.waitKey()

Please try the following code:
from pytesseract import Output
import pytesseract
import cv2
image = cv2.imread("my_image.jpg")
#swap color channel ordering from BGR (OpenCV’s default) to RGB (compatible with Tesseract and pytesseract).
# By default OpenCV stores images in BGR format and since pytesseract assumes RGB format,
# we need to convert from BGR to RGB format/mode:
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
pytesseract.pytesseract.tesseract_cmd = r'C:\mypath\tesseract.exe'
custom_config = r'-c tessedit_char_whitelist=0123456789 --psm 6'
results = pytesseract.image_to_data(rgb, output_type=Output.DICT,lang='eng',config=custom_config)
boxresults = pytesseract.image_to_boxes(rgb,output_type=Output.DICT,lang='eng',config=custom_config)
print(results)
print(boxresults)
for i in range(0, len(results["text"])):
# extract the bounding box coordinates of the text region from the current result
tmp_tl_x = results["left"][i]
tmp_tl_y = results["top"][i]
tmp_br_x = tmp_tl_x + results["width"][i]
tmp_br_y = tmp_tl_y + results["height"][i]
tmp_level = results["level"][i]
conf = results["conf"][i]
text = results["text"][i]
if(tmp_level == 5):
cv2.putText(image, text, (tmp_tl_x, tmp_tl_y - 10), cv2.FONT_HERSHEY_SIMPLEX,0.5, (0, 0, 255), 1)
cv2.rectangle(image, (tmp_tl_x, tmp_tl_y), (tmp_br_x, tmp_br_y), (0, 0, 255), 1)
for j in range(0,len(boxresults["left"])):
left = boxresults["left"][j]
bottom = boxresults["bottom"][j]
right = boxresults["right"][j]
top = boxresults["top"][j]
cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 1)
cv2.imshow("image",image)
cv2.waitKey(0)

Related

Multiple license plate detection from a single image

How to detect multiple license plates from a single image
Here is my code to detect a single license plate from an image which is mostly copied from an website and works well to detect a single license plate:
import cv2
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
image = cv2.imread('test.jpg')
image = imutils.resize(image, width=300)
cv2.imshow("original image", image)
cv2.waitKey(0)
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow("greyed image", gray_image)
cv2.waitKey(0)
gray_image = cv2.bilateralFilter(gray_image, 11, 17, 17)
cv2.imshow("smoothened image", gray_image)
cv2.waitKey(0)
edged = cv2.Canny(gray_image, 30, 200)
cv2.imshow("edged image", edged)
cv2.waitKey(0)
cnts, new = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
image1 = image.copy()
cv2.drawContours(image1, cnts, -1, (0, 255, 0), 3)
cv2.imshow("contours", image1)
cv2.waitKey(0)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:30]
screenCnt = None
image2 = image.copy()
cv2.drawContours(image2, cnts, -1, (0, 255, 0), 3)
cv2.imshow("Top 30 contours", image2)
cv2.waitKey(0)
i = 7
for c in cnts:
perimeter = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.018 * perimeter, True)
if len(approx) == 4:
screenCnt = approx
x, y, w, h = cv2.boundingRect(c)
new_img = image[y:y+h, x:x+w]
cv2.imwrite('./'+str(i)+'.png', new_img)
i += 1
break
cv2.drawContours(image, [screenCnt], -1, (0, 255, 0), 3)
cv2.imshow("image with detected license plate", image)
cv2.waitKey(0)
Cropped_loc = './7.png'
cv2.imshow("cropped", cv2.imread(Cropped_loc))
plate = pytesseract.image_to_string(Cropped_loc, lang='eng')
print("Number plate is:", plate)
cv2.waitKey(0)
cv2.destroyAllWindows()
Here is my code which is developed by modifying my previous code to detect multiple license plates from a single image. I have tried it where two cars were present in that image, but it could detect only one license plate.
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
# Read the image
img = cv2.imread("double.jpg")
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply Gaussian blur
gray = cv2.GaussianBlur(gray, (5, 5), 0)
# Apply adaptive thresholding
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
# Find contours
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Initialize a list to store the license plate numbers
license_plates = []
# Loop over the contours
for contour in contours:
# Get the rectangle bounding the contour
x, y, w, h = cv2.boundingRect(contour)
# Check if the aspect ratio of the rectangle is approximately 3:1
if w/h > 2.5 and w/h < 4:
# Crop the license plate from the image
crop_img = img[y:y+h, x:x+w]
# Recognize the text on the license plate using pytesseract
text = pytesseract.image_to_string(crop_img, lang='eng')
# Add the recognized text to the list of license plates
license_plates.append(text)
for i in license_plates:
if len(i) > 10:
i = list(i)
i.pop(0)
i.pop(len(i)-1)
i = ''.join(map(str, i))
print(i)
`
How can I solve this issue? Do I have to use any machine learning model? (I am not an expert in machine learning. So, please try to provide detailed code or instruction if training a model is necessary)
Any modification on the code is highly appreciated.

Segmenting image files with text (and pictures) into blocks

I'm trying to create bounding boxes for the text in an image I have. An example is the one below.
I would like to add a bounding box around each This is a test line. Unfortunately I'm not sure why this method is not automatically identifying the bounding boxes
import re
import cv2
import numpy as np
import pytesseract
from pytesseract import Output
from matplotlib import pyplot as plt
# Plot character boxes on image using pytesseract.image_to_boxes() function
image = cv2.imread('Image.jpg')
b, g, r = cv2.split(image)
image = cv2.merge([r,g,b])
d = pytesseract.image_to_data(image, output_type=Output.DICT)
print('DATA KEYS: \n', d.keys())
n_boxes = len(d['text'])
for i in range(n_boxes):
# condition to only pick boxes with a confidence > 60%
if int(d['conf'][i]) > 60:
(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
image = cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
b, g, r = cv2.split(image)
rgb_img = cv2.merge([r, g, b])
plt.figure(figsize=(16, 12))
plt.imshow(rgb_img)
plt.title('SAMPLE IMAGE WITH WORD LEVEL BOXES')
plt.show()
Here is a different way to do that with Python/OpenCV.
Read the input
Convert to gray
(OTSU) Threshold (white text on black background)
Apply morphology dilate with horizontal kernel longer than letter spacing and then smaller vertical kernel to remove thin horizontal lines remaining from line in page.
Find contours
Draw bounding boxes of contours on input
Save result
Input:
import cv2
import numpy as np
# load image
img = cv2.imread("test_text.jpg")
# convert to gray
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# threshold the grayscale image
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# use morphology erode to blur horizontally
#kernel = np.ones((500,3), np.uint8)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (250, 3))
morph = cv2.morphologyEx(thresh, cv2.MORPH_DILATE, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 17))
morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
# find contours
cntrs = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
# Draw contours
result = img.copy()
for c in cntrs:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(result, (x, y), (x+w, y+h), (0, 0, 255), 2)
# write result to disk
cv2.imwrite("test_text_threshold.png", thresh)
cv2.imwrite("test_text_morph.png", morph)
cv2.imwrite("test_text_lines.jpg", result)
cv2.imshow("GRAY", gray)
cv2.imshow("THRESH", thresh)
cv2.imshow("MORPH", morph)
cv2.imshow("RESULT", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Thresholded image:
Dilated image:
Result:

Remove image background and extract the object in image

I know that there are many threads here about this issue, but I'm not able to solve my problem with those answers. I tried many times with different codes to remove the background of an image like this:
to this:
with the following code:
img2 = Image.open("foo.jpg")
c_red, c_green, c_blue = cv2.split(img2)
img2 = cv2.merge((c_red, c_green, c_blue, mask.astype('float32') / 255.0))
img.paste(img2,(0,0))
or with this code:
img2 = Image.open("foo.jpg")
img2 = img2.convert("RGBA")
datas = img2.getdata()
newData = []
for item in datas:
if item[0] == 255 and item[1] == 255 and item[2] == 255:
newData.append((255, 255, 255, 0))
else:
newData.append(item)
img2.putdata(newData)
or:
threshold=100
dist=5
img2 = Image.open("foo.jpg")
img2 = img2.convert("RGBA")
arr=np.array(np.asarray(img2))
r,g,b,a=np.rollaxis(arr,axis=-1)
mask=((r>threshold)
& (g>threshold)
& (b>threshold)
& (np.abs(r-g)<dist)
& (np.abs(r-b)<dist)
& (np.abs(g-b)<dist)
)
arr[mask,3]=0
img2=Image.fromarray(arr,mode='RGBA')
But none of them does not work. The thing I want to do is remove the background of any color (transparent) and change the borders of an image to its object's border as I showed above. Any help is appreciated.
Here's one way to do it using OpenCV. The idea is to obtain a binary image then use cv2.boundingRect to obtain the bounding rectangle coordinates. We can crop the image using Numpy slicing then add an alpha channel. Here's the results:
Input image
Binary image and region to extract
Extracted ROI
Code
import cv2
import numpy as np
# Load image, convert to grayscale, Gaussian blur, Otsu's threshold
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Obtain bounding rectangle and extract ROI
x,y,w,h = cv2.boundingRect(thresh)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
# Add alpha channel
b,g,r = cv2.split(ROI)
alpha = np.ones(b.shape, dtype=b.dtype) * 50
ROI = cv2.merge([b,g,r,alpha])
cv2.imshow('thresh', thresh)
cv2.imshow('image', image)
cv2.imshow('ROI', ROI)
cv2.waitKey()

Recognize single characters on a page with Tesseract

this image returns empty string;
basically I am trying to make a bot for WOW game, but I am really new to this OCR thing. I cannot make tesseract to read this image; I want an unordered list of characters and if possible coordinates of each square containing them. Is there anyway to do this?
Thank you for your time!
here is my code:
from PIL import Image
import cv2
from pytesseract import image_to_string
column = Image.open('photo.png')
gray = column.convert('L')
blackwhite = gray.point(lambda x: 255 if x < 200 else 0, '1')
blackwhite.save("code_bw.jpg")
print(image_to_string(cv2.imread("code_bw.jpg")))
You need to do some preprocessing to isolate the text characters. A simple approach is to Otsu's threshold to obtain a binary image then we can find contours and filter using aspect ratio + contour area. This will give us the bounding box coordinates of the text where we can draw this onto a mask. We bitwise-and the mask with the input image to get our cleaned image then throw it into OCR. Here's the result:
Detected text characters
Result
Result from OCR
A
A R
P
Code
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.jpg')
original = image.copy()
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours and filter using aspect ratio and area
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
x,y,w,h = cv2.boundingRect(c)
ar = w / float(h)
if area > 1000 and ar > .85 and ar < 1.2:
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
cv2.rectangle(mask, (x, y), (x + w, y + h), (255,255,255), -1)
ROI = original[y:y+h, x:x+w]
# Bitwise-and to isolate characters
result = cv2.bitwise_and(original, mask)
result[mask==0] = 255
# OCR
data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')
print(data)
cv2.imshow('image', image)
cv2.imshow('thresh', thresh)
cv2.imshow('result', result)
cv2.waitKey()

How to remove blurriness in an image that contains table?

I have an image that is blurred and contains some noise. I have tried Image Denoising from the following example.
The code to remove the Gaussian noise from a color image using the Non-local Means Denoising algorithm:
import numpy as np
import cv2
from matplotlib import pyplot as plt
img = cv2.imread("data_5/1.png")
b,g,r = cv2.split(img) # get b,g,r
rgb_img = cv2.merge([r,g,b]) # switch it to rgb
# Denoising
dst = cv2.fastNlMeansDenoisingColored(img,None,10,10,7,21)
b,g,r = cv2.split(dst) # get b,g,r
rgb_dst = cv2.merge([r,g,b]) # switch it to rgb
cv2.imshow('denoising black and white', rgb_dst)
cv2.waitKey(0)
The output of the above code:
The above code removes some noise. But here some numbers are blurred and the table lines are blurred.
Can anyone suggest me a better solution to remove blurriness and Noise from the above image?
import numpy as np
import cv2
from PIL import Image
from tesserocr import PyTessBaseAPI, RIL
if __name__ == '__main__':
image = cv2.imread('image.png',cv2.IMREAD_UNCHANGED)
image = cv2.resize(image, (0,0), fx=0.5, fy=0.5)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret,binary = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
binary = cv2.medianBlur(binary, 3)
(rows,cols) = image.shape[:2]
H = cv2.Sobel(binary, cv2.CV_8U, 1, 0, ksize = 5)
V = cv2.Sobel(binary, cv2.CV_8U, 0, 1, ksize = 5)
_,contours,_ = cv2.findContours(V, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
(x,y,w,h) = cv2.boundingRect(cnt)
if w < cols/3 and h < rows/3:
cv2.drawContours(V, [cnt], -1, 0, -1)
_,contours,_ = cv2.findContours(H, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
(x,y,w,h) = cv2.boundingRect(cnt)
if w < cols/3 and h < rows/3:
cv2.drawContours(H, [cnt], -1, 0, -1)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
V = cv2.morphologyEx(V, cv2.MORPH_DILATE, kernel, iterations = 3)
H = cv2.morphologyEx(H, cv2.MORPH_DILATE, kernel, iterations = 3)
binary[V == 255] = 0
binary[H == 255] = 0
binary = cv2.bitwise_not(binary)
api = PyTessBaseAPI()
api.SetImage(Image.fromarray(binary))
text = api.GetUTF8Text()
text = text.split()
boxes = api.GetComponentImages(RIL.TEXTLINE, True)
for i, (_, box, _, _) in enumerate(boxes):
(x,y,w,h) = box['x'], box['y'], box['w'], box['h']
cv2.rectangle(image, (x,y), (x+w,y+h), (0,0,255))
cv2.putText(image, text[i], (x,y), cv2.FONT_HERSHEY_PLAIN, 1, (255,0,0))
cv2.imshow('image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
I have tried applying a Gaussian Blur then processing it with adaptive thresholding and result removed noise in the image and blurriness.
import cv2 as cv
#input
img = cv.imread('data_5/1.png',0)
#gaussian Blur
img = cv.GaussianBlur(img, (15,15),0)
#adaptive threshold
th3 = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,\
cv.THRESH_BINARY,11,2)
cv2.imshow('Noise Filtered Image', th3)
cv2.waitKey(0)
cv.imwrite('data_5/result.png',th3)
The output of the above code:
Can anyone help me to smoothen this image? I want an output quality similar to this table below. Removal of table lines is ok.
My goal is to have an image with clear text.

Categories

Resources