Need your help. Now I'm writing python script to recognize text in a shape. This shape can be captured from RTSP (IP Camera) at any angle.
For the example see attached file. My code is here, but coords to crop rotated shape is sets manually
import cv2
import numpy as np
def main():
fn = cv2.VideoCapture("rtsp://admin:Admin123-#")
flag, img =
cnt = np.array([
[[64, 49]],
[[122, 11]],
[[391, 326]],
[[308, 373]]
print("shape of cnt: {}".format(cnt.shape))
rect = cv2.minAreaRect(cnt)
print("rect: {}".format(rect))
box = cv2.boxPoints(rect)
box = np.int0(box)
print("bounding box: {}".format(box))
cv2.drawContours(img, [box], 0, (0, 255, 0), 2)
img_crop, img_rot = crop_rect(img, rect)
print("size of original img: {}".format(img.shape))
print("size of rotated img: {}".format(img_rot.shape))
print("size of cropped img: {}".format(img_crop.shape))
new_size = (int(img_rot.shape[1]/2), int(img_rot.shape[0]/2))
img_rot_resized = cv2.resize(img_rot, new_size)
new_size = (int(img.shape[1]/2)), int(img.shape[0]/2)
img_resized = cv2.resize(img, new_size)
cv2.imshow("original contour", img_resized)
cv2.imshow("rotated image", img_rot_resized)
cv2.imshow("cropped_box", img_crop)
# cv2.imwrite("crop_img1.jpg", img_crop)
def crop_rect(img, rect):
# get the parameter of the small rectangle
center = rect[0]
size = rect[1]
angle = rect[2]
center, size = tuple(map(int, center)), tuple(map(int, size))
# get row and col num in img
height, width = img.shape[0], img.shape[1]
print("width: {}, height: {}".format(width, height))
M = cv2.getRotationMatrix2D(center, angle, 1)
img_rot = cv2.warpAffine(img, M, (width, height))
img_crop = cv2.getRectSubPix(img_rot, size, center)
return img_crop, img_rot
if __name__ == "__main__":
example pic
You may start with the example in the following post.
The code sample detects the license plate, and it also detects your "shape" with text.
After detecting the "shape" with the text, you may use the following stages:
Apply threshold the cropped area.
Find contours, and find the contour with maximum area.
Build a mask, and mask area outside the contour (like in the license plate example).
Use minAreaRect (as fmw42 commented), and get the angle of the rectangle.
Rotate the cropped area (by angle+90 degrees).
Apply OCR using pytesseract.image_to_string.
Here is the complete code:
import cv2
import numpy as np
import imutils
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # I am using Windows
# Read the input image
img = cv2.imread('Admin123.jpg')
# Reused code:
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #convert to grey scale
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200) #Perform Edge detection
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screenCnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.018 * peri, True)
# if our approximated contour has four points, then
# we can assume that we have found our screen
if len(approx) == 4:
screenCnt = approx
# Masking the part other than the "shape"
mask = np.zeros(gray.shape,np.uint8)
new_image = cv2.drawContours(mask,[screenCnt],0,255,-1,)
new_image = cv2.bitwise_and(img,img,mask=mask)
# Now crop
(x, y) = np.where(mask == 255)
(topx, topy) = (np.min(x), np.min(y))
(bottomx, bottomy) = (np.max(x), np.max(y))
cropped = gray[topx:bottomx+1, topy:bottomy+1]
# Apply threshold the cropped area
_, thresh = cv2.threshold(cropped, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Find contours
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
cnts = imutils.grab_contours(cnts)
# Get contour with maximum area
c = max(cnts, key=cv2.contourArea)
# Build a mask (same as the code above)
mask = np.zeros(cropped.shape, np.uint8)
new_cropped = cv2.drawContours(mask, [c], 0, 255, -1)
new_cropped = cv2.bitwise_and(cropped, cropped, mask=mask)
# Draw green rectangle for testing
test = cv2.cvtColor(new_cropped, cv2.COLOR_GRAY2BGR)
cv2.drawContours(test, [c], -1, (0, 255, 0), thickness=2)
# Use minAreaRect as fmw42 commented
rect = cv2.minAreaRect(c)
angle = rect[2] # Get angle of the rectangle
# Rotate the cropped rectangle.
rotated_cropped = imutils.rotate(new_cropped, angle + 90)
# Read the text in the "shape"
text = pytesseract.image_to_string(rotated_cropped, config='--psm 3')
print("Extracted text is:\n\n", text)
# Show images for testing:
cv2.imshow('cropped', cropped)
cv2.imshow('thresh', thresh)
cv2.imshow('test', test)
cv2.imshow('rotated_cropped', rotated_cropped)
OCR output result:
I want to crop images according to their right frame. I have about 10000 of hand X-ray images to preprocess, and what I have done so far:
Apply Gaussian Blur and Threshold (Binary + Otsu) on the image.
Apply dilation to get a single object (in this case a hand).
Used cv2.findContours() to draw outline along the edges around the hand.
Used cv2.boundingRect() to find the right frame, and then cv2.minAreaRect() and cv2.boxPoints to get the right points for the bounding box.
Used cv2.warpPerspective to adjust image according to height and width.
The code below describes the above:
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt
# Load image, create mask, grayscale, Gaussian blur, Otsu's threshold
img_path = "sample_image.png"
image = cv2.imread(image_path)
original = image.copy()
blank = np.zeros(image.shape[:2], dtype = np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (33,33), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Merge text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations = 3)
# Find contours
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = lambda x: cv2.boundingRect(x)[0])
for c in cnts:
# Filter using contour area and aspect ratio (x1 = width, y1 = height)
x, y, x1, y1 = cv2.boundingRect(c)
if (x1 > 500) and (y1 > 700):
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
box = np.int0(box)
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1], [0, 0],
[width-1, 0], [width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(image, M, (width, height))
If you have a look at some of the images in the folder, those are the inputs. When I run these images through the code above, I get an output like this. Some of them are cropped nicely (straightened), however, some of them are cropped with 90 degree rotations. Is there a code to counter the 'rotating 90 degrees output' problem?
Here are some images:
Image Inputs: Four X-ray examples
Image Outputs: Returns images that are 90 degrees rotated
Image Outputs wanted: Straightened image (Just used Photoshop to straighten them. Dont want to do this for 10000 images...)
I edited the code according to below-mentioned suggestions. After running the some samples, it now returns images that are now 90 degrees slanted to the right.
Input images:
Output images:
I doubt it's because of the quality of the images. Maybe it's got to do with OpenCV's minAreaRect()? or boxPoints?
According to #Prashant Maurya, the code was updated with a function added to detect whether the position of the hand is left or right. And then mapping src_pts to right dst_pts. Full code is shown below.
Hi there are two changes which will correct the output:
The width and height taken in the code is in the wrong order ie: width: 1470 & height: 1118 just switch the values:
Map src_pts to right dst_pts the current code is mapping top left
corner to bottom left therefore the image is being rotated.
Added function to detect whether image is right tilted or left and rotate and rotate it accordingly
Full code with changes is:
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt
# Load image, create mask, grayscale, Gaussian blur, Otsu's threshold
img_path = "xray1.png"
image = cv2.imread(img_path)
cv2.imshow("image original", image)
original = image.copy()
blank = np.zeros(image.shape[:2], dtype = np.uint8)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (33,33), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# Merge text into a single contour
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
dilate = cv2.dilate(thresh, kernel, iterations = 3)
# Find contours
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = lambda x: cv2.boundingRect(x)[0])
def get_tilt(box):
tilt = "Left"
x_list = [coord[0] for coord in box]
y_list = [coord[1] for coord in box]
x_list = sorted(x_list)
y_list = sorted(y_list)
for coord in box:
if coord[0] == x_list[0]:
index = y_list.index(coord[1])
print("Index: ", index)
if index == 1:
tilt = "Left"
tilt = "Right"
return tilt
for c in cnts:
# Filter using contour area and aspect ratio (x1 = width, y1 = height)
x, y, x1, y1 = cv2.boundingRect(c)
if (x1 > 500) and (y1 > 700):
rect = cv2.minAreaRect(c)
box = cv2.boxPoints(rect)
box = np.int0(box)
# print("rect:", box)
tilt = get_tilt(box)
src_pts = box.astype("float32")
if tilt == "Left":
width = int(rect[1][1])
height = int(rect[1][0])
dst_pts = np.array([[0, 0],
[width-1, 0], [width-1, height-1], [0, height-1]], dtype="float32")
width = int(rect[1][0])
height = int(rect[1][1])
dst_pts = np.array([[0, height-1], [0, 0],
[width-1, 0], [width-1, height-1]], dtype="float32")
print("Src pts:", src_pts)
print("Dst pts:", dst_pts)
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(image, M, (width, height))
print("Showing image ..")
# plt.imshow(warped)
cv2.imshow("image crop", warped)
I need to extract the bounding box of text and save it as sub-images of the main image. I am not getting the right code documentation for this task.
Please can anyone provide me code documentation or help links or any python modules which can help to crop text from scanned images.
Below I have attached a scanned image and expected output.
below image scanned copy need to crop text from image.
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd ='C:\\Program Files (x86)\\Tesseract-OCR\\tesseract'
img = cv2.imread("test.jpg")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))
dilation = cv2.dilate(thresh1, rect_kernel, iterations = 1)
contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL,
im2 = img.copy()
file = open("recognized.txt", "w+")
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (0, 255, 0), 2)
cropped = im2[y:y + h, x:x + w]
file = open("recognized.txt", "a")
text = pytesseract.image_to_string(cropped)
crop_img = img[y:y+h, x:x+w] # just the region you are interested
second image expected croped image:
Here is one approach in Python/OpenCV.
Read the input
Get the Canny edges
Get the outer contours of the edges
Filter the contours to remove small extraneous spots
Get the convex hull of the main cluster of edges
Draw the convex hull as white filled on a black background as a mask
Mask to black the outside region of the input
Get the rotated rectangle from the convex hull
From the negative angle and center of the rotated rectangle rectify the orientation using perspective warping
Save the results
import cv2
import numpy as np
# Read image
img = cv2.imread('receipt.jpg')
hh, ww = img.shape[:2]
# get edges
canny = cv2.Canny(img, 50, 200)
# get contours
contours = cv2.findContours(canny, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = contours[0] if len(contours) == 2 else contours[1]
# filter out small regions
cimg = np.zeros_like(canny)
for cntr in contours:
area = cv2.contourArea(cntr)
if area > 20:
cv2.drawContours(cimg, [cntr], 0, 255, 1)
# get convex hull and draw on input
points = np.column_stack(np.where(cimg.transpose() > 0))
hull = cv2.convexHull(points)
himg = img.copy()
cv2.polylines(himg, [hull], True, (0,0,255), 1)
# draw convex hull as filled mask
mask = np.zeros_like(cimg, dtype=np.uint8)
cv2.fillPoly(mask, [hull], 255)
# blacken out input using mask
mimg = img.copy()
mimg = cv2.bitwise_and(mimg, mimg, mask=mask)
# get rotate rectangle
rotrect = cv2.minAreaRect(hull)
(center), (width,height), angle = rotrect
box = cv2.boxPoints(rotrect)
boxpts = np.int0(box)
# draw rotated rectangle on copy of input
rimg = img.copy()
cv2.drawContours(rimg, [boxpts], 0, (0,0,255), 1)
# from
# the `cv2.minAreaRect` function returns values in the
# range [-90, 0); as the rectangle rotates clockwise the
# returned angle tends to 0 -- in this special case we
# need to add 90 degrees to the angle
if angle < -45:
angle = -(90 + angle)
# otherwise, check width vs height
if width > height:
angle = -(90 + angle)
angle = -angle
# negate the angle to unrotate
neg_angle = -angle
print('unrotation angle:', neg_angle)
# Get rotation matrix
# center = (width // 2, height // 2)
M = cv2.getRotationMatrix2D(center, neg_angle, scale=1.0)
# unrotate to rectify
result = cv2.warpAffine(mimg, M, (ww, hh), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=(0,0,0))
# save results
cv2.imwrite('receipt_mask.jpg', mask)
cv2.imwrite('receipt_edges.jpg', canny)
cv2.imwrite('receipt_filtered_edges.jpg', cimg)
cv2.imwrite('receipt_hull.jpg', himg)
cv2.imwrite('receipt_rotrect.jpg', rimg)
cv2.imwrite('receipt_masked_result.jpg', result)
cv2.imshow('canny', canny)
cv2.imshow('cimg', cimg)
cv2.imshow('himg', himg)
cv2.imshow('mask', mask)
cv2.imshow('rimg', rimg)
cv2.imshow('result', result)
Canny Edges:
Filtered Edges from Contours:
Convex Hull:
Rotated Rectangle:
Rectified Result:
In OpenCV you can use cv2.findContours to draw the bounding boxes. See this article which explains how to do that:
Then after you have your bounding box locations (your region of interest where text is located, and you want to crop) you can use use slicing to crop the image:
import cv2
img = cv2.imread("lenna.png")
crop_img = img[y:y+h, x:x+w] # just the region you are interested
cv2.imshow("cropped", crop_img)
If you want to extract the text directly, I think you can use tesseract ocr a python package (How to get started: . You can also make use of OpenCV built in OCR functions. Read more:
from PIL import image
original_image =".nameofimage.jpg")
rotate_image = Original_image.rotate(330)
x = 100
y = 80
h = 200
w = 200
cropped_image = rotate_image[y:y+h, x:x+w]
I have been working with OpenCV in order to detect an squared obstacle. So far this is the image I get after applying filters and canny.
The obstacle I am trying to identify is the horizontal one, the three vertical rectangles are guide lines on the floor.My goal is to keep only the horizontal rectangle, separating it from the others, but after applying find Contours I only get I single object that includes all the shapes.This is the code I have been using in order to fin only the biggest rectangle by their area:
# find the biggest countour (c) by the area
if contours != 0:
if not contours:
bigone = max(contours, key=cv2.contourArea) if max else None
area = cv2.contourArea(bigone)
if area > 10000:
x, y, w, h = cv2.boundingRect(bigone)
cv2.rectangle(img, (x, y), (x+w, y+h), (0, 0, 255), 2)
cv2.putText(img, "Obstacle", (x+w/2, y-20),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
pts = np.array(
[[[x, y], [x+w, y], [x+w, y+h], [x, y+h]]], dtype=np.int32)
cv2.fillPoly(mask, pts, (255, 255, 255))
#values = img[np.where((mask == (255, 255, 255)).all(axis=2))]
res = cv2.bitwise_and(img, mask) # View only the obstacle
obs_area = w*h
if obs_area <= 168000:
img, "GO", (380, 400), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 255), 1)
cv2.putText(img, "STOP", (380, 400),
cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 255, 255), 1)
# show the output image
cv2.imshow("Image", img)
And this is the result I am getting:
Is there a way of separating my obstacle from the lines on the floor with some kind of filter or algorithm?
Here is an example image to work with:
Here is one way to do that using Python/OpenCV.
- Read the input
- Convert to HSV and extract only the saturation channel (black/white/gray have zero saturation)
- Threshold
- Apply morphology open and close to remove the extranous white regions
- Get the contour and approximate to simple polygon
- Draw the polygon on the input
- Save the results
import cv2
import numpy as np
# read image
img = cv2.imread('board.png')
# convert to HSV and extract saturation channel
sat = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)[:,:,1]
# threshold
thresh = cv2.threshold(sat, 90, 255, 0)[1]
# apply morphology close to fill interior regions in mask
kernel = np.ones((7,7), np.uint8)
morph = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
kernel = np.ones((13,13), np.uint8)
morph = cv2.morphologyEx(morph, cv2.MORPH_CLOSE, kernel)
# get contours (presumably only 1) and fit to simple polygon (quadrilateral)
cntrs = cv2.findContours(morph, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cntrs = cntrs[0] if len(cntrs) == 2 else cntrs[1]
c = cntrs[0]
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.05 * peri, True)
# draw polygon on input
result = img.copy()
cv2.polylines(result, [np.int32(approx)], True, (0,0,255), 1, cv2.LINE_AA)
# write result to disk
cv2.imwrite("board_saturation.png", sat)
cv2.imwrite("board_thresh.png", thresh)
cv2.imwrite("board_morph.png", morph)
cv2.imwrite("board_contour.png", result)
# display it
cv2.imshow("IMAGE", img)
cv2.imshow("SAT", sat)
cv2.imshow("THRESH", thresh)
cv2.imshow("MORPH", morph)
cv2.imshow("RESULT", result)
Saturation channel image:
Thresholded image:
Morphology cleaned image:
Contour on input:
In your image the problem seems white rectangles. My approach is checking each line and if line consist many pixels which are close to white(255,255,255) then make the line black.
Here is my code:
import cv2
import numpy as np
import random as rng
height, width, channels = img.shape
# Check each line and eliminate white rectangles(if line consist white pixels more than limit)
for x in range(0,height):
white_counter = 0
for y in range(0,width):
if img[x,y,0] >= 180 and img[x,y,1] >= 180 and img[x,y,2] >= 180:
white_counter = white_counter + 1
if white_counter>10:
for y in range(0,width):
img[x,y,0] = 0
img[x,y,1] = 0
img[x,y,2] = 0
cv2.imshow('Elimination White Rectangles', img)
# Find contours and draw rectangle for each
src_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshold = 300
canny_output = cv2.Canny(src_gray, threshold, threshold * 2)
contours, _ = cv2.findContours(canny_output, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_poly = [None]*len(contours)
boundRect = [None]*len(contours)
for i, c in enumerate(contours):
contours_poly[i] = cv2.approxPolyDP(c, 3, True)
boundRect[i] = cv2.boundingRect(contours_poly[i])
drawing = np.zeros((canny_output.shape[0], canny_output.shape[1], 3), dtype=np.uint8)
for i in range(len(contours)):
color = (rng.randint(0,256), rng.randint(0,256), rng.randint(0,256))
cv2.rectangle(drawing, (int(boundRect[i][0]), int(boundRect[i][1])), \
(int(boundRect[i][0]+boundRect[i][2]), int(boundRect[i][1]+boundRect[i][3])), color, 2)
cv2.imshow('Output', drawing)
Eliminate White Rectangles:
I have lots of scanned images of handwritten digit inside a rectangle(small one).
Please help me to crop each image containing digits and save them by giving the same name to each row.
import cv2
img = cv2.imread('Data\Scan_20170612_4.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
_, contours, hierarchy = cv2.findContours(edged, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
i = 0
for c in contours:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.09 * peri, True)
if len(approx) == 4:
screenCnt = approx
cv2.drawContours(img, [screenCnt], -1, (0, 255, 0), 3)
cv2.imwrite('cropped\\' + str(i) + '_img.jpg', img)
i += 1
Here is My Version
import cv2
import numpy as np
fileName = ['9','8','7','6','5','4','3','2','1','0']
img = cv2.imread('Data\Scan_20170612_17.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
kernel = np.ones((5,5),np.uint8)
erosion = cv2.erode(gray,kernel,iterations = 2)
kernel = np.ones((4,4),np.uint8)
dilation = cv2.dilate(erosion,kernel,iterations = 2)
edged = cv2.Canny(dilation, 30, 200)
_, contours, hierarchy = cv2.findContours(edged, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
rects = [cv2.boundingRect(cnt) for cnt in contours]
rects = sorted(rects,key=lambda x:x[1],reverse=True)
i = -1
j = 1
y_old = 5000
x_old = 5000
for rect in rects:
x,y,w,h = rect
area = w * h
if area > 47000 and area < 70000:
if (y_old - y) > 200:
i += 1
y_old = y
if abs(x_old - x) > 300:
x_old = x
x,y,w,h = rect
out = img[y+10:y+h-10,x+10:x+w-10]
cv2.imwrite('cropped\\' + fileName[i] + '_' + str(j) + '.jpg', out)
That's an easy thing if u try. Here's my output- (The image and its one small bit)
What i did?
Resized the image first because it was too big in my screen
Erode, Dilate to remove small dots and thicken the lines
Threshold the image
Flood fill, beginning at the right point
Invert the flood fill
Find contours and draw one at a time which are in range of approximately the
area on the rectangle. For my resized (500x500) image i put Area of
contour in range 500 to 2500 (trial and error anyway).
Find bounding rectangle and crop that mask from main image.
Then save that piece with proper name- which i didn't do.
Maybe, there's a simpler way, but i liked this. Not putting the code because
i made it all clumsy. Will put if u still need it.
Here's how the mask looks when you find contours each at a time
import cv2;
import numpy as np;
# Run the code with the image name, keep pressing space bar
# Change the kernel, iterations, Contour Area, position accordingly
# These values work for your present image
img = cv2.imread("your_image.jpg", 0);
h, w = img.shape[:2]
kernel = np.ones((15,15),np.uint8)
e = cv2.erode(img,kernel,iterations = 2)
d = cv2.dilate(e,kernel,iterations = 1)
ret, th = cv2.threshold(d, 150, 255, cv2.THRESH_BINARY_INV)
mask = np.zeros((h+2, w+2), np.uint8)
cv2.floodFill(th, mask, (200,200), 255); # position = (200,200)
out = cv2.bitwise_not(th)
out= cv2.dilate(out,kernel,iterations = 3)
cnt, h = cv2.findContours(out,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
for i in range(len(cnt)):
area = cv2.contourArea(cnt[i])
if(area>10000 and area<100000):
mask = np.zeros_like(img)
cv2.drawContours(mask, cnt, i, 255, -1)
x,y,w,h = cv2.boundingRect(cnt[i])
crop= img[ y:h+y,x:w+x]
cv2.imshow("snip",crop )
_, contours, hierarchy = cv2.findContours(edged, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
you are using cv2.RETR_LIST to find contours in the image. For your image to get better output use cv2.RETR_EXTERNAL. Before using that first remove black border line from the image.
cv2.RETR_LIST gives you list of all contours for image
cv2.RETR_EXTERNAL gives you only external or outer contours, not internal contours
change line to
_, contours, hierarchy = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
Contours Hierarchy
I would like to detect the labels in images like this one for the purpose of extracting the text using tesseract. I have tried various combinations of thresholding and using edge detection. However I can only detect about half of the labels at a time at max. These are a few of the images I've been trying to read the labels from:
enter image description here
enter image description here
All of the labels have the same aspect ratio (the width is 3.5 times larger than the height) so I'm trying to find contours that have a minAreaRect with that same aspect ratio. The hard part is handing the labels on the lighter background. This is the code I have so far:
from PIL import Image
import pytesseract
import numpy as np
import argparse
import cv2
import os
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image to be OCR'd")
args = vars(ap.parse_args())
#function to crop an image to a minAreaRect
def crop_minAreaRect(img, rect):
# rotate img
angle = rect[2]
rows,cols = img.shape[0], img.shape[1]
M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1)
img_rot = cv2.warpAffine(img,M,(cols,rows))
# rotate bounding box
rect0 = (rect[0], rect[1], 0.0)
box = cv2.boxPoints(rect)
pts = np.int0(cv2.transform(np.array([box]), M))[0]
pts[pts < 0] = 0
# crop
img_crop = img_rot[pts[1][1]:pts[0][1],
return img_crop
# load image and apply threshold
image = cv2.imread(args["image"])
bw = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#bw = cv2.threshold(bw, 210, 255, cv2.THRESH_BINARY)[1]
bw = cv2.adaptiveThreshold(bw, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 27, 20)
#do edge detection
v = np.median(bw)
sigma = 0.5
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
bw = cv2.Canny(bw, lower, upper)
kernel = np.ones((5,5), np.uint8)
bw = cv2.dilate(bw,kernel,iterations=1)
#find contours
image2, contours, hierarchy = cv2.findContours(bw,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
bw = cv2.drawContours(bw,contours,0,(0,0,255),2)
cv2.imwrite("edge.png", bw)
#test which contours have the correct aspect ratio
largestarea = 0.0
passes = []
for contour in contours:
(x,y),(w,h),a = cv2.minAreaRect(contour)
if h > 20 and w > 20:
if h > w:
maxdim = h
mindim = w
maxdim = w
mindim = h
ratio = maxdim/mindim
print("ratio: {}".format(ratio))
if (ratio > 3.4 and ratio < 3.6):
if not passes:
print "no passes"
passboxes = []
i = 1
#crop out each label and attemp to extract text
for ps in passes:
rect = cv2.minAreaRect(ps)
bw = crop_minAreaRect(image, rect)
cv2.imwrite("{}.png".format(i), bw)
i += 1
h, w = bw.shape[:2]
print str(h) + "x" + str(w)
if w and h:
bw = cv2.cvtColor(bw, cv2.COLOR_BGR2GRAY)
bw = cv2.threshold(bw, 50, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
cv2.imwrite("output.png", bw)
im ="output.png")
w, h = im.size
print "W:{} H:{}".format(w,h)
if h > w:
print ("rotating")
print pytesseract.image_to_string("output.png"))
print pytesseract.image_to_string("output.png"))
box = cv2.boxPoints(cv2.minAreaRect(ps))
cnts = cv2.drawContours(image,passboxes,0,(0,0,255),2)
cnts = cv2.drawContours(cnts,contours,-1,(255,255,0),2)
cnts = cv2.drawContours(cnts, passes, -1, (0,255,0), 3)
cv2.imwrite("output2.png", image)
I believe the problem I have could be the parameters for the thresholding. Or I could be over complicating this.
Only the white labels with "A-08337" and such? The following detects all of them on both images:
import numpy as np
import cv2
img = cv2.imread('labels.jpg')
#downscale the image because Canny tends to work better on smaller images
w, h, c = img.shape
resize_coeff = 0.25
img = cv2.resize(img, (int(resize_coeff*h), int(resize_coeff*w)))
#find edges, then contours
canny = cv2.Canny(img, 100, 200)
_, contours, _ = cv2.findContours(canny, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#draw the contours, do morphological close operation
#to close possible small gaps, then find contours again on the result
w, h, c = img.shape
blank = np.zeros((w, h)).astype(np.uint8)
cv2.drawContours(blank, contours, -1, 1, 1)
blank = cv2.morphologyEx(blank, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
_, contours, _ = cv2.findContours(blank, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#keep only contours of more or less correct area and perimeter
contours = [c for c in contours if 800 < cv2.contourArea(c) < 1600]
contours = [c for c in contours if cv2.arcLength(c, True) < 200]
cv2.drawContours(img, contours, -1, (0, 0, 255), 1)
cv2.imwrite("contours.png", img)
Probably with some additional convexity check you can get rid of the "Verbatim" contours and such (for example, only keep contours with near zero difference between their area and their convex hull's area).