Recognize single characters on a page with Tesseract - python

this image returns empty string;
basically I am trying to make a bot for WOW game, but I am really new to this OCR thing. I cannot make tesseract to read this image; I want an unordered list of characters and if possible coordinates of each square containing them. Is there anyway to do this?
Thank you for your time!
here is my code:
from PIL import Image
import cv2
from pytesseract import image_to_string
column = Image.open('photo.png')
gray = column.convert('L')
blackwhite = gray.point(lambda x: 255 if x < 200 else 0, '1')
blackwhite.save("code_bw.jpg")
print(image_to_string(cv2.imread("code_bw.jpg")))

You need to do some preprocessing to isolate the text characters. A simple approach is to Otsu's threshold to obtain a binary image then we can find contours and filter using aspect ratio + contour area. This will give us the bounding box coordinates of the text where we can draw this onto a mask. We bitwise-and the mask with the input image to get our cleaned image then throw it into OCR. Here's the result:
Detected text characters
Result
Result from OCR
A
A R
P
Code
import cv2
import pytesseract
import numpy as np
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.jpg')
original = image.copy()
mask = np.zeros(image.shape, dtype=np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours and filter using aspect ratio and area
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
area = cv2.contourArea(c)
x,y,w,h = cv2.boundingRect(c)
ar = w / float(h)
if area > 1000 and ar > .85 and ar < 1.2:
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
cv2.rectangle(mask, (x, y), (x + w, y + h), (255,255,255), -1)
ROI = original[y:y+h, x:x+w]
# Bitwise-and to isolate characters
result = cv2.bitwise_and(original, mask)
result[mask==0] = 255
# OCR
data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')
print(data)
cv2.imshow('image', image)
cv2.imshow('thresh', thresh)
cv2.imshow('result', result)
cv2.waitKey()

Related

What kind of parameters should I use to find and crop objects in an image?

I am new to deep learning and try to implement a ML algorithm for image clustering. The problem is that I can't crop the objects in an image in Python using OpenCV.
Here is the code I have implemented and it works for some objects if the color of the object is very different(in RGB values) from the background but it doesn't work for the image I need for ML algorithm. What kind of parameters should I have/change? Any suggestions?
import cv2
import numpy as np
from PIL import Image
import tkinter as tk
from tkinter import filedialog as fd
from tkinter import*
import random
#!/usr/bin/python
from PIL import Image
import sys
myFile = 'Path' + '/crop.png'
nr_of_im = 1
q = 0
r = 0
x_list = []
y_list = []
img = cv2.imread(myFile, cv2.IMREAD_UNCHANGED)
ret, thresh = cv2.threshold(cv2.cvtColor(img.copy(), cv2.COLOR_BGR2GRAY) , 30, 255, cv2.THRESH_BINARY)
contours, hier = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
print("len",len(contours))
if cv2.contourArea(contour) > 80:
x, y, w, h = cv2.boundingRect(contour)
q = w
r = h
x_list.append(x)
y_list.append(y)
font = cv2.FONT_HERSHEY_SIMPLEX
ROI = img[y-10:y+10+h, x-10:x+10+w]
ROI = cv2.resize(ROI,(300,300))
file_all = "/images/%d.jpg"%nr_of_im
nr_of_im += 1
cv2.imwrite(file_all,ROI)
There are 21 objects in the image but the length of contours returns 1. The image looks like so
crop.png:
Your threshold is too low and produces a totally white image for me. You need to increase your threshold. Always view your thresholding to be sure it is working the way you expect. You can always remove the viewing later.
The following works for me using Otsu thresholding with a threshold value of 97. I get 21 contours.
Input:
import cv2
import numpy as np
# read image
img = cv2.imread('blocks.jpg')
# convert to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# threshold
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
print(ret)
# apply morphology fill and separate large regions and remove small ones
kernel = cv2.getStructuringElement(cv2.MORPH_RECT , (9,9))
morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT , (15,15))
morph = cv2.morphologyEx(morph, cv2.MORPH_OPEN, kernel)
# get contours
result = img.copy()
contours = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# get count of contours
print(len(contours))
# draw bounding boxes on contours
for cntr in contours:
x,y,w,h = cv2.boundingRect(cntr)
cv2.rectangle(result, (x, y), (x+w, y+h), (0, 0, 255), 2)
#print("x,y,w,h:",x,y,w,h)
# save results
cv2.imwrite("blocks_thresh.jpg", thresh)
cv2.imwrite("blocks_morphology.jpg", morph)
cv2.imwrite("blocks_bboxes.jpg", result)
# show thresh and result
cv2.imshow("thresh", thresh)
cv2.imshow("morph", morph)
cv2.imshow("result", result)
cv2.waitKey(0)
cv2.destroyAllWindows()
Threshold image:
Morphology cleaned image:
Resulting bounding boxes from contours:

opencv, python, how to read grouped text in boxes

I would like to get from the image in the groups that are on the image
I have managed to remove first contour (as described below), but issue is that when I try to read the text, I have some missing text, I expect that this is because of other contours that have stayed on the image, but while I try to remove them, I loose the grouping or part of text...
for i in range(len(contours)):
if 800 < cv2.contourArea(contours[i]) < 2000:
x, y, width, height = cv2.boundingRect(contours[i])
roi = img[y:y + height, x:x + width]
roi_h = roi.shape[0]
roi_w = roi.shape[1]
resize_roi = cv2.resize(roi,(int(roi_w*6),int(roi_h*6)), interpolation=cv2.INTER_LINEAR)
afterd = cv2.cvtColor(resize_roi, cv2.COLOR_BGR2GRAY)
retim, threshm = cv2.threshold(afterd, 210, 225, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
contoursm, hierarchym = cv2.findContours(threshm, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
mask = np.ones(resize_roi.shape[:2], dtype="uint8") * 255
for m in range(len(contoursm)):
if 10000 < cv2.contourArea(contoursm[m]) < 33000:
cv2.drawContours(mask, contoursm, m, 0, 7)
afterd = cv2.bitwise_not(afterd)
afterd = cv2.bitwise_and(afterd, afterd, mask=mask)
afterd = cv2.bitwise_not(afterd)
print(pytesseract.image_to_string(afterd, lang='eng', config='--psm 3'))
Instead of dealing with all the boxes, I suggest deleting them by finding connected components, and filling the large clusters with background color.
You may use the following stages:
Convert image to Grayscale, apply threshold, and invert polarity.
Delete all clusters having more than 100 pixels (assume letters are smaller).
Dilate thresh for uniting text areas to single "blocks".
Find contours on the dilated thresh image.
Find bounding rectangles, and apply OCR to the rectangle.
Here is the complete code sample:
import numpy as np
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # I am using Windows
img = cv2.imread('img.png') # Read input image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Convert to Grayscale.
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) # Convert to binary and invert polarity
nlabel,labels,stats,centroids = cv2.connectedComponentsWithStats(thresh, connectivity=8)
thresh_size = 100
# Delete all lines by filling large clusters with zeros.
for i in range(1, nlabel):
if stats[i, cv2.CC_STAT_AREA] > thresh_size:
thresh[labels == i] = 0
# Dilate thresh for uniting text areas to single blocks.
dilated_thresh = cv2.dilate(thresh, np.ones((5,5)))
# Find contours on dilated thresh
contours, hierarchy = cv2.findContours(dilated_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# Iterate contours, find bounding rectangles
for c in contours:
# Get bounding rectangle
x, y, w, h = cv2.boundingRect(c)
# Draw green rectangle for testing
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), thickness = 1)
# Get the slice with the text (slice with margins).
afterd = thresh[y-3:y+h+3, x-3:x+w+3]
# Show afterd as image for testing
# cv2.imshow('afterd', afterd)
# cv2.waitKey(100)
# The OCR works only when image is enlarged and black text?
resized_afterd = cv2.resize(afterd, (afterd.shape[1]*5, afterd.shape[0]*5), interpolation=cv2.INTER_LANCZOS4)
print(pytesseract.image_to_string(255 - resized_afterd, lang='eng', config='--psm 3'))
cv2.imshow('thresh', thresh)
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result strings after OCR:
DF6DF645
RFFTW
2345
2277
AABBA
DF1267
ABCET5456
Input image with green boxes around the text:
Update:
Grouping contours:
For contours contours you may use the hierarchy result of cv2.findContours with cv2.RETR_TREE.
See Contours Hierarchy documentation.
You may use the parent-child relationship for grouping contours.
Here is an incomplete sample code for using the hierarchy:
img = cv2.imread('img.png') # Read input image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Convert to Grayscale.
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) # Convert to binary and invert polarity
nlabel,labels,stats,centroids = cv2.connectedComponentsWithStats(thresh, connectivity=8)
thresh_boxes = np.zeros_like(thresh)
thresh_size = 100
# Delete all lines by filling large clusters with zeros.
# Make new image that contains only boxes - without text
for i in range(1, nlabel):
if stats[i, cv2.CC_STAT_AREA] > thresh_size:
thresh[labels == i] = 0
thresh_boxes[labels == i] = 255
# Find contours on thresh_boxes, use cv2.RETR_TREE to build tree with hierarchy
contours, hierarchy = cv2.findContours(thresh_boxes, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
# Iterate contours, and hierarchy
for c, i in zip(contours, range(len(contours))):
h = hierarchy[0, i, :]
h_child = h[2]
# if contours has no child (last level)
if h_child == -1:
h_parent = h[3]
x, y, w, h = cv2.boundingRect(c)
cv2.putText(img, str(h_parent), (x+w//2-4, y+h//2+8), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0, 0, 255), thickness=2)
cv2.imshow('thresh', thresh)
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Result:

How to separate images using watershed algorithm in Python

How to separate indiviual images among multiple images after image segmentaion using watershed algorithm in Python
The attached image is consists of 4 images , from which we need to apply image segmentation and separate individual image from those 4 images
We will flood fill it first
import cv2;
import numpy as np;
# Read image
im_in = cv2.imread("2SNAT.jpg", cv2.IMREAD_GRAYSCALE);
# Threshold.
# Set values equal to or above 220 to 0.
# Set values below 220 to 255.
th, im_th = cv2.threshold(im_in, 220, 255, cv2.THRESH_BINARY_INV);
# Copy the thresholded image.
im_floodfill = im_th.copy()
# Mask used to flood filling.
# Notice the size needs to be 2 pixels than the image.
h, w = im_th.shape[:2]
mask = np.zeros((h+2, w+2), np.uint8)
# Floodfill from point (0, 0)
cv2.floodFill(im_floodfill, mask, (0,0), 255);
# Invert floodfilled image
im_floodfill_inv = cv2.bitwise_not(im_floodfill)
# Combine the two images to get the foreground.
im_out = im_th | im_floodfill_inv
Then find contour and crop out
im, contours, hierarchy = cv2.findContours(im_out.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
final_contours = []
for contour in contours:
area = cv2.contourArea(contour)
if area > 1000:
final_contours.append(contour)
Crop out step, also drawing rectangle on original image
counter = 0
for c in final_contours:
counter = counter + 1
# for c in [final_contours[0]]:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.01 * peri, True)
x,y,w,h = cv2.boundingRect(approx)
print(x, y, w, h)
aspect_ratio = w / float(h)
if (aspect_ratio >= 0.8 and aspect_ratio <= 4):
cv2.rectangle(im_in,(x,y),(x+w,y+h),(0,255,0),2)
cv2.imwrite('splitted_{}.jpg'.format(counter), im_in[y:y+h, x:x+w])
cv2.imwrite('rectangled_split.jpg', im_in)
Instead of using watershed, here's a simple approach using thresholding + morphological operations. The idea is to obtain a binary image then perform morph close to combine each object as a single contour. We then find contours and extract/save each ROI using Numpy slicing.
Here's each individual object highlighted in green
Individual saved object
Code
import cv2
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph close
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
# Find contours and extract ROI
cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
num = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(num), ROI)
num += 1
cv2.imshow('image', image)
cv2.waitKey()

How to extract multiple objects from an image using Python OpenCV?

I am trying to extract object from an image using the color using OpenCV, I have tried by inverse thresholding and grayscale combined with cv2.findContours() but I am unable to use it recursively. Furthermore I can't figure out how to "cut out" the match from the original image and save it to a single file.
EDIT
~
import cv2
import numpy as np
# load the images
empty = cv2.imread("empty.jpg")
full = cv2.imread("test.jpg")
# save color copy for visualization
full_c = full.copy()
# convert to grayscale
empty_g = cv2.cvtColor(empty, cv2.COLOR_BGR2GRAY)
full_g = cv2.cvtColor(full, cv2.COLOR_BGR2GRAY)
empty_g = cv2.GaussianBlur(empty_g, (51, 51), 0)
full_g = cv2.GaussianBlur(full_g, (51, 51), 0)
diff = full_g - empty_g
# thresholding
diff_th =
cv2.adaptiveThreshold(full_g,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY,11,2)
# combine the difference image and the inverse threshold
zone = cv2.bitwise_and(diff, diff_th, None)
# threshold to get the mask instead of gray pixels
_, zone = cv2.threshold(bag, 100, 255, 0)
# dilate to account for the blurring in the beginning
kernel = np.ones((15, 15), np.uint8)
bag = cv2.dilate(bag, kernel, iterations=1)
# find contours, sort and draw the biggest one
contours, _ = cv2.findContours(bag, cv2.RETR_TREE,
cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:3]
i = 0
while i < len(contours):
x, y, width, height = cv2.boundingRect(contours[i])
roi = full_c[y:y+height, x:x+width]
cv2.imwrite("piece"+str(i)+".png", roi)
i += 1
Where empty is just a white image size 1500 * 1000 as the one above and test is the one above.
This is what I came up with, only downside, I have a third image instead of only the 2 expected showing a shadow zone now...
Here's a simple approach:
Obtain binary image. Load the image, grayscale, Gaussian blur, Otsu's threshold, then dilate to obtain a binary black/white image.
Extract ROI. Find contours, obtain bounding boxes, extract ROI using Numpy slicing, and save each ROI
Binary image (Otsu's thresholding + dilation)
Detected ROIs highlighted in green
To extract each ROI, you can find the bounding box coordinates using cv2.boundingRect(), crop the desired region, then save the image
x,y,w,h = cv2.boundingRect(c)
ROI = original[y:y+h, x:x+w]
First object
Second object
import cv2
# Load image, grayscale, Gaussian blur, Otsu's threshold, dilate
image = cv2.imread('1.jpg')
original = image.copy()
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5,5), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7,7))
dilate = cv2.dilate(thresh, kernel, iterations=1)
# Find contours, obtain bounding box coordinates, and extract ROI
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
image_number = 0
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
cv2.imwrite("ROI_{}.png".format(image_number), ROI)
image_number += 1
cv2.imshow('image', image)
cv2.imshow('thresh', thresh)
cv2.imshow('dilate', dilate)
cv2.waitKey()

Extract all bounding boxes using OpenCV Python

I have an image that contains more than one bounding box.
I need to extract everything that has bounding boxes in them. So far, from this site I've gotten this answer:
y = img[by:by+bh, bx:bx+bw]
cv2.imwrite(string + '.png', y)
It works, however, it only gets one. How should I modify the code? I tried putting it in the loop for contours but it still spews out one image instead of multiple ones.
Thank you so much in advance.
there you go:
import cv2
im = cv2.imread('c:/data/ph.jpg')
gray=cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
contours, hierarchy = cv2.findContours(gray,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[-2:]
idx =0
for cnt in contours:
idx += 1
x,y,w,h = cv2.boundingRect(cnt)
roi=im[y:y+h,x:x+w]
cv2.imwrite(str(idx) + '.jpg', roi)
#cv2.rectangle(im,(x,y),(x+w,y+h),(200,0,0),2)
cv2.imshow('img',im)
cv2.waitKey(0)
A simple approach is to find contours, obtain the bounding rectangle coordinates using cv2.boundingRect()
then extract the ROI using Numpy slicing. We can keep a counter to save each ROI then save it with cv2.imwrite(). Here's a working example:
Input image:
Detected ROIs to extract highlighted in green
Saved ROIs
Code
import cv2
import numpy as np
# Load image, grayscale, Otsu's threshold
image = cv2.imread('1.png')
original = image.copy()
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Find contours, obtain bounding box, extract and save ROI
ROI_number = 0
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(image, (x, y), (x + w, y + h), (36,255,12), 2)
ROI = original[y:y+h, x:x+w]
cv2.imwrite('ROI_{}.png'.format(ROI_number), ROI)
ROI_number += 1
cv2.imshow('image', image)
cv2.waitKey()

Categories

Resources