I'm working on parsing coupon codes from receipts, and unfortunately, the letters are not solid lines. They composed of small individual dots. I managed to do some image manipulation and find the dots, but this is where I'm stuck. Is there a way to connect or merge the dots that are close to each other? Is there a simple solution to this?
Here is the original image and also images after finding the dots.
Here is the code I came up with.
import cv2
import numpy as np
def load_local_image(image):
c_img = cv2.imread(image, cv2.IMREAD_COLOR)
g_img = cv2.imread(image, cv2.IMREAD_GRAYSCALE)
return (cv2.resize(c_img, (800, 800)), cv2.resize(g_img, (800, 800)))
def find_letters(binary_image, rgb_image, settings):
contours, hierarchy = cv2.findContours(binary_image.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
letters = []
for contour in contours:
if cv2.contourArea(contour) > settings['contour_area_threshold']:
# four points of bounding box for each character
x, y, w, h = cv2.boundingRect(contour)
# draw the bounding rectangle from points above
cv2.rectangle(rgb_image, (x, y), (x + w, y + h), settings['outline_color'], settings['outline_thickness'])
# print 'x:{}, y:{}, width:{}, height:{}'.format(x, y, w, h)
letters.append((x, y, w, h))
return sorted(letters, key=lambda x: x[0])
def alter_image(img):
blur = cv2.GaussianBlur(g, (3, 3), 0)
ret, thresh1 = cv2.threshold(blur, 50, 255, cv2.THRESH_BINARY)
bitwise = cv2.bitwise_not(thresh1)
erosion = cv2.erode(bitwise, np.ones((1, 1) ,np.uint8), iterations=1)
dilation = cv2.dilate(erosion, np.ones((3, 3) ,np.uint8), iterations=1)
return dilation
c, g = load_local_image('img.jpg')
altered_img = alter_image(g)
contour_settings = {
'contour_area_threshold': 1,
'outline_thickness': 1,
'outline_color': (66, 116, 244)
}
letters_crop = find_letters(altered_img, c, contour_settings)
cv2.imshow('color', c)
cv2.imshow('gray', altered_img)
cv2.waitKey()
cv2.destroyAllWindows()
Related
I am trying to implement one of the stages of the OCR system. The character segmentation stage. The code is shown below. The code is quite simple:
the image is being read
grayscale image translation
image binarization
application of dilation operation
selection of contours
It is assumed that each selected contour is a symbol.
The results of the algorithm are not satisfactory. Sometimes-the characters stand out well. Sometimes only parts of characters are highlighted, sometimes several characters are highlighted. Please help with the code, I really want it to correctly highlight the characters.
UPDATE 1. I am trying to implement a character segmentation system for different fonts. It turned out that there are no universal parameters of erosion and dilation operations for different fonts
Test image:
Result of character selection 1 (Small parts of characters):
Result of character selection 2 (Big parts of characters):
Full result (All parts of characters):
import cv2
import numpy as np
def letters_extract(image_file):
img = cv2.imread(image_file)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
img_dilate = cv2.dilate(thresh, np.ones((1, 1), np.uint8), iterations=1)
# img_erode = cv2.erode(img_dilate, np.ones((3, 3), np.uint8), iterations=1)
# Get contours
contours, hierarchy = cv2.findContours(img_dilate, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
letters = []
for idx, contour in enumerate(contours):
(x, y, w, h) = cv2.boundingRect(contour)
if hierarchy[0][idx][3] == 0:
letter_crop = gray[y:y + h, x:x + w]
letters.append(letter_crop)
cv2.imwrite(r'D:\projects\proj\test\tnr\{}.png'.format(idx), letter_crop)
return letters
letters_extract(r'D:\projects\proj\test\test_tnr.png')
Run your code (a bit modified for debugging) and it looks pretty good (I've only changed the dilation mask):
import cv2
import numpy as np
import matplotlib.pyplot as plt
def letters_extract(image_file):
img = cv2.imread(image_file)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
plt.figure(figsize=(20, 20))
plt.imshow(thresh)
plt.show()
img_dilate = cv2.erode(thresh, np.ones((2,1), np.uint8))
plt.figure(figsize=(20, 20))
plt.imshow(img_dilate)
plt.show()
contours, hierarchy = cv2.findContours(img_dilate, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
im_with_aabb = img.copy()
for idx, contour in enumerate(contours):
(x, y, w, h) = cv2.boundingRect(contour)
if hierarchy[0][idx][3] == 0:
color = (255, 0, 0)
thickness = 1
im_with_aabb = cv2.rectangle(im_with_aabb, (x,y), (x+w,y+h), color, thickness)
return im_with_aabb
im_with_aabb = letters_extract('test.png')
plt.figure(figsize=(20, 20))
plt.imshow(im_with_aabb)
plt.show()
But there are problems with several chars still. If your input images looks this good (no high variability between the same char in different places) I can suggest perhaps tamplate matching with each char as template.
If the data is with high variability maybe you should use a pretrained NN like tesseract.
If your data is always as clear as the image you have shared, you do not have to do dilation or erosion. I set threshold to 190 and inverse the gray image with cv2.THRESH_BINARY_INV parameter such that countours will be find around the letters. Finally, I change contour search algorithm to find only external contours with cv2.RETR_EXTERNAL parameter.
import cv2
import numpy as np
def letters_extract(image_file):
img = cv2.imread(image_file)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 190, 255, cv2.THRESH_BINARY_INV)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
letters = []
for idx, contour in enumerate(contours):
(x, y, w, h) = cv2.boundingRect(contour)
letter_crop = gray[y:y + h, x:x + w]
letters.append(letter_crop)
cv2.rectangle(img, (x,y), (x + w, y + h), (0,0,255))
cv2.namedWindow("win", cv2.WINDOW_FREERATIO)
cv2.imshow("win",img)
cv2.waitKey()
return letters
letters_extract('text.png')
Final image is as follows:
One question, is it possible to dectect rectangle on image when it touch noise lines and other shapes
This is my function to detect contoures on image:
def findContours(img_in):
w, h, c = img_in.shape # img_in is the input image
resize_coeff = 0.25
img_in = cv2.resize(img_in,(int(resize_coeff * h), int(resize_coeff * w)))
img_in = ip.findObjects(img_in)
blr = cv2.GaussianBlur(img_in, (9, 9), 0)
img = cv2.Canny(blr, 50, 250, L2gradient=False)
kernel = np.ones((5, 5), np.uint8)
img_dilate = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img_dilate, kernel, iterations=1)
contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
max_index, max_area = max(enumerate([cv2.contourArea(x) for x in contours]), key=lambda x: x[1])
max_contour = contours[max_index]
img_out = cv2.resize(img, (int(resize_coeff * h), int(resize_coeff * w)))
cv2.drawContours(img_in, [max_contour], 0, (0, 0, 255), 2)
re.rectangle(img, [max_contour])
cv2.imshow("test",img_in)
cv2.imshow("test1",img)
cv2.waitKey()
return img
I got this result:
The result I want:
When I use shape detecion I got result that it have 15 angles and not four. Function:
def rectangle(img, contours):
for contour in contours:
approx = cv2.approxPolyDP(contour, 0.01 * cv2.arcLength(contour, True), True)
print(len(approx))
x = approx.ravel()[0]
y = approx.ravel()[1] - 5
if len(approx) == 4:
print("Rect")
x, y, w, h = cv2.boundingRect(approx)
aspectRatio = float(w) / h
print(aspectRatio)
cv2.putText(img, "rectangle", (x, y), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 0))
EDIT:
Original image:
What if you can remove noise around that shape? I think your mask is good for more processing:
import numpy as np
import sys
import cv2
# Load the mask
dir = sys.path[0]
im = cv2.imread(dir+'/img.png')
H, W = im.shape[:2]
# Make gray scale image
gry = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
# Make binary image
bw = cv2.threshold(gry, 127, 255, cv2.THRESH_BINARY)[1]
bw = ~bw
# Focuse on edges
bw = cv2.erode(bw, np.ones((5, 5)))
# Use flood fill to remove noise
cv2.floodFill(bw, np.zeros((H+2, W+2), np.uint8), (0, 0), 0)
bw = cv2.medianBlur(bw, 7)
# Remove remained noise with another flood fill
nonRectArea = bw.copy()
cv2.floodFill(nonRectArea, np.zeros((H+2, W+2), np.uint8), (W//2, H//2), 0)
bw[np.where(nonRectArea == 255)] = 0
# Find contours and sort them by width
cnts, _ = cv2.findContours(bw, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
cnts.sort(key=lambda p: cv2.boundingRect(p)[2], reverse=True)
# Find biggest blob
x, y, w, h = cv2.boundingRect(cnts[0])
cv2.rectangle(im, (x, y), (x+w, y+h), 127, 1)
# Save output
cv2.imwrite(dir+'/img_1.png', im)
cv2.imwrite(dir+'/img_2.png', bw)
cv2.imwrite(dir+'/img_3.png', nonRectArea)
I am new and I wonder how can I find the contours of the image like the below with Python OpenCV (cv2 library):
I am going to fill in each square a number and then convert it into numpy array, so I think I need to figure out how to get the contours of each square in the matrix first (maybe the coordinates of the square in the picture)
I try to use some code snippet:
img = cv2.imread(img_path, 1)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
binary = cv2.bitwise_not(gray)
contours, hierarchy = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
for contour in contours:
(x, y, w, h) = cv2.boundingRect(contour)
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
But it doesn't work
Try this:
img = cv2.imread(img_path, 1)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gauss = cv2.adaptiveThreshold(gray,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 3, 0)
ret,thresh = cv2.threshold(gauss,0,255,cv2.THRESH_BINARY|cv2.THRESH_OTSU)
rev=255-thresh
_ ,contours, hierarchy = cv2.findContours(thresh, cv2.RETR_LIST ,cv2.CHAIN_APPROX_SIMPLE)
print(contours)
min_rect_len = 15
max_rect_len = 20
for contour in contours:
(x, y, w, h) = cv2.boundingRect(contour)
if h>min_rect_len and w>min_rect_len:
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 1)
cv2.imwrite(img_path[:-4] + "_with_contours.jpg", img)
It produces the following image for the given image :
My task:
My task is to extract bounding box coordinates of following image:
I have following code. I am trying to get these coordinates using roi, but I am not sure how to get them.
import cv2
import numpy as np
large = cv2.imread('1.jpg')
small = cv2.cvtColor(large, cv2.COLOR_BGR2GRAY)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
mask = np.zeros(bw.shape, dtype=np.uint8)
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y+h, x:x+w] = 0
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
if r > 0.45 and w > 8 and h > 8:
cv2.rectangle(large, (x, y), (x+w-1, y+h-1), (0, 255, 0), 1)
roi=large[y:y+h, x:x+w]
print(roi)
Result should be something like this:
1675,1335,2338,1338,2337,1455,1674,1452. :Box1
3067,519,3604,521,3603,651,3066,648 :Box2
1017,721,1729,726,1728,857,1016,852 :Box3
I have referred:
Extract all bounding boxes using OpenCV Python . On this link they are extracting images inside bounding boxes when they already have annotated image with rectangular GUI as a input. I want to extract the detected regions into a text file. How do I do it?
x, y, w, h = cv2.boundingRect(contours[idx]) is the coordinates you want, then write it to a txt file:
...
with open("coords.txt","w+") as file:
for idx in range(len(contours)):
x, y, w, h = cv2.boundingRect(contours[idx])
mask[y:y+h, x:x+w] = 0
file.write("Box {0}: ({1},{2}), ({3},{4}), ({5},{6}), ({7},{8})".format(idx,x,y,x+w,y,x+w,y+h,x,y+h))
cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
...
The result will contain 4 points for each box, like this.
Box 0: (360,259), (364,259), (364,261), (360,261)
Box 1: (380,258), (385,258), (385,262), (380,262)
Box 2: (365,258), (370,258), (370,262), (365,262)
Box 3: (386,256), (393,256), (393,260), (386,260)
Box 4: (358,256), (361,256), (361,258), (358,258)
import cv2
import numpy as np
# Load an image in grayscale
img = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)
# Perform OTSU thresholding
thresh, img_bin = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# Find contours
contours, _ = cv2.findContours(img_bin, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Iterate through all contours
for cnt in contours:
# Get bounding box coordinates
x, y, w, h = cv2.boundingRect(cnt)
# Draw bounding box on the original image
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
# Show the original image with bounding boxes
cv2.imshow("Bounding Boxes", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
I have this type of image from that I only want to extract the characters.
After binarization, I am getting this image
img = cv2.imread('the_image.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 9)
Then find contours on this image.
(im2, cnts, _) = cv2.findContours(thresh.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
for contour in cnts[:2000]:
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = h/w
area = cv2.contourArea(contour)
cv2.drawContours(img, [contour], -1, (0, 255, 0), 2)
I am getting
I need a way to filter the contours so that it selects only the characters. So I can find the bounding boxes and extract roi.
I can find contours and filter them based on the size of areas, but the resolution of the source images are not consistent. These images are taken from mobile cameras.
Also as the borders of the boxes are disconnected. I can't accurately detect the boxes.
Edit:
If I deselect boxes which has an aspect ratio less than 0.4. Then it works up to some extent. But I don't know if it will work or not for different resolution of images.
for contour in cnts[:2000]:
x, y, w, h = cv2.boundingRect(contour)
aspect_ratio = h/w
area = cv2.contourArea(contour)
if aspect_ratio < 0.4:
continue
print(aspect_ratio)
cv2.drawContours(img, [contour], -1, (0, 255, 0), 2)
Not so difficult...
import cv2
img = cv2.imread('img.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imshow('gray', gray)
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)
cv2.imshow('thresh', thresh)
im2, ctrs, hier = cv2.findContours(thresh.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
for i, ctr in enumerate(sorted_ctrs):
x, y, w, h = cv2.boundingRect(ctr)
roi = img[y:y + h, x:x + w]
area = w*h
if 250 < area < 900:
rect = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow('rect', rect)
cv2.waitKey(0)
Result
You can tweak the code like you want (here it can save ROI using original image; for eventually OCR recognition you have to save them in binary format - better methods than sorting by area are available)
Source: Extract ROI from image with Python and OpenCV and some of my knowledge.
Just kidding, take a look at my questions/answers.