I have different type of invoice files, I want to find table in each invoice file. In this table position is not constant. So I go for image processing. First I tried to convert my invoice into image, then I found contour based on table borders, Finally I can catch table position.
For the task I used below code.
with Image(page) as page_image:
page_image.alpha_channel = False #eliminates transperancy
img_buffer=np.asarray(bytearray(page_image.make_blob()), dtype=np.uint8)
img = cv2.imdecode(img_buffer, cv2.IMREAD_UNCHANGED)
ret, thresh = cv2.threshold(img, 127, 255, 0)
im2, contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
margin=[]
for contour in contours:
# get rectangle bounding contour
[x, y, w, h] = cv2.boundingRect(contour)
# Don't plot small false positives that aren't text
if (w >thresh1 and h> thresh2):
margin.append([x, y, x + w, y + h])
#data cleanup on margin to extract required position values.
In this code thresh1, thresh2 i'll update based on the file.
So using this code I can successfully read positions of tables in images, using this position i'll work on my invoice pdf file. For example
Sample 1:
Sample 2:
Sample 3:
Output:
Sample 1:
Sample 2:
Sample 3:
But, now I have a new format which doesn't have any borders but it's a table. How to solve this? Because my entire operation depends only on borders of the tables. But now I don't have a table borders. How can I achieve this? I don't have any idea to move out from this problem. My question is, Is there any way to find position based on table structure?.
For example My problem input looks like below:
I would like to find its position like below:
How can I solve this?
It is really appreciable to give me an idea to solve the problem.
Thanks in advance.
Vaibhav is right. You can experiment with the different morphological transforms to extract or group pixels into different shapes, lines, etc. For example, the approach can be the following:
Start from the Dilation to convert the text into the solid spots.
Then apply the findContours function as a next step to find text
bounding boxes.
After having the text bounding boxes it is possible to apply some
heuristics algorithm to cluster the text boxes into groups by their
coordinates. This way you can find a groups of text areas aligned
into rows and columns.
Then you can apply sorting by x and y coordinates and/or some
analysis to the groups to try to find if the grouped text boxes can
form a table.
I wrote a small sample illustrating the idea. I hope the code is self explanatory. I've put some comments there too.
import os
import cv2
import imutils
# This only works if there's only one table on a page
# Important parameters:
# - morph_size
# - min_text_height_limit
# - max_text_height_limit
# - cell_threshold
# - min_columns
def pre_process_image(img, save_in_file, morph_size=(8, 8)):
# get rid of the color
pre = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Otsu threshold
pre = cv2.threshold(pre, 250, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
# dilate the text to make it solid spot
cpy = pre.copy()
struct = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
cpy = cv2.dilate(~cpy, struct, anchor=(-1, -1), iterations=1)
pre = ~cpy
if save_in_file is not None:
cv2.imwrite(save_in_file, pre)
return pre
def find_text_boxes(pre, min_text_height_limit=6, max_text_height_limit=40):
# Looking for the text spots contours
# OpenCV 3
# img, contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# OpenCV 4
contours, hierarchy = cv2.findContours(pre, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# Getting the texts bounding boxes based on the text size assumptions
boxes = []
for contour in contours:
box = cv2.boundingRect(contour)
h = box[3]
if min_text_height_limit < h < max_text_height_limit:
boxes.append(box)
return boxes
def find_table_in_boxes(boxes, cell_threshold=10, min_columns=2):
rows = {}
cols = {}
# Clustering the bounding boxes by their positions
for box in boxes:
(x, y, w, h) = box
col_key = x // cell_threshold
row_key = y // cell_threshold
cols[row_key] = [box] if col_key not in cols else cols[col_key] + [box]
rows[row_key] = [box] if row_key not in rows else rows[row_key] + [box]
# Filtering out the clusters having less than 2 cols
table_cells = list(filter(lambda r: len(r) >= min_columns, rows.values()))
# Sorting the row cells by x coord
table_cells = [list(sorted(tb)) for tb in table_cells]
# Sorting rows by the y coord
table_cells = list(sorted(table_cells, key=lambda r: r[0][1]))
return table_cells
def build_lines(table_cells):
if table_cells is None or len(table_cells) <= 0:
return [], []
max_last_col_width_row = max(table_cells, key=lambda b: b[-1][2])
max_x = max_last_col_width_row[-1][0] + max_last_col_width_row[-1][2]
max_last_row_height_box = max(table_cells[-1], key=lambda b: b[3])
max_y = max_last_row_height_box[1] + max_last_row_height_box[3]
hor_lines = []
ver_lines = []
for box in table_cells:
x = box[0][0]
y = box[0][1]
hor_lines.append((x, y, max_x, y))
for box in table_cells[0]:
x = box[0]
y = box[1]
ver_lines.append((x, y, x, max_y))
(x, y, w, h) = table_cells[0][-1]
ver_lines.append((max_x, y, max_x, max_y))
(x, y, w, h) = table_cells[0][0]
hor_lines.append((x, max_y, max_x, max_y))
return hor_lines, ver_lines
if __name__ == "__main__":
in_file = os.path.join("data", "page.jpg")
pre_file = os.path.join("data", "pre.png")
out_file = os.path.join("data", "out.png")
img = cv2.imread(os.path.join(in_file))
pre_processed = pre_process_image(img, pre_file)
text_boxes = find_text_boxes(pre_processed)
cells = find_table_in_boxes(text_boxes)
hor_lines, ver_lines = build_lines(cells)
# Visualize the result
vis = img.copy()
# for box in text_boxes:
# (x, y, w, h) = box
# cv2.rectangle(vis, (x, y), (x + w - 2, y + h - 2), (0, 255, 0), 1)
for line in hor_lines:
[x1, y1, x2, y2] = line
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
for line in ver_lines:
[x1, y1, x2, y2] = line
cv2.line(vis, (x1, y1), (x2, y2), (0, 0, 255), 1)
cv2.imwrite(out_file, vis)
I've got the following output:
Of course to make the algorithm more robust and applicable to a variety of different input images it has to be adjusted correspondingly.
Update: Updated the code with respect to the OpenCV API changes for findContours. If you have older version of OpenCV installed - use the corresponding call. Related post.
You can try applying some morphological transforms (such as Dilation, Erosion or Gaussian Blur) as a pre-processing step before your findContours function
For example
blur = cv2.GaussianBlur(g, (3, 3), 0)
ret, thresh1 = cv2.threshold(blur, 150, 255, cv2.THRESH_BINARY)
bitwise = cv2.bitwise_not(thresh1)
erosion = cv2.erode(bitwise, np.ones((1, 1) ,np.uint8), iterations=5)
dilation = cv2.dilate(erosion, np.ones((3, 3) ,np.uint8), iterations=5)
The last argument, iterations shows the degree of dilation/erosion that will take place (in your case, on the text). Having a small value will results in small independent contours even within an alphabet and large values will club many nearby elements. You need to find the ideal value so that only that block of your image gets.
Please note that I've taken 150 as the threshold parameter because I've been working on extracting text from images with varying backgrounds and this worked out better. You can choose to continue with the value you've taken since it's a black & white image.
There are many types of tables in the document images with too much variations and layouts. No matter how many rules you write, there will always appear a table for which your rules will fail. These types of problems are genrally solved using ML(Machine Learning) based solutions. You can find many pre-implemented codes on github for solving the problem of detecting tables in the images using ML or DL (Deep Learning).
Here is my code along with the deep learning models, the model can detect various types of tables as well as the structure cells from the tables: https://github.com/DevashishPrasad/CascadeTabNet
The approach achieves state of the art on various public datasets right now (10th May 2020) as far as the accuracy is concerned
More details : https://arxiv.org/abs/2004.12629
this would be helpful for you.
I've drawn a bounding box for each word in my invoice, then I will chose only fields that I want. You can use for that ROI (Region Of Interest)
import pytesseract
import cv2
img = cv2.imread(r'path\Invoice2.png')
d = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
n_boxes = len(d['level'])
for i in range(n_boxes):
(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
img = cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 1)
cv2.imshow('img', img)
cv2.waitKey(0)
You will get this output:
Related
I have a number of blueprints where I would like to detect the numbers on the blueprint such that I can turn them into proper models.
for example I have the following image and would like all the numbers on this image so I ran the following code:
import pytesseract
from pytesseract import Output
import cv2
import numpy as np
img = cv2.imread('vdb7C.jpg')
custom_config = r' (--oem 2 --psm 10'
d = pytesseract.image_to_data(img,config=custom_config,lang='eng', output_type=Output.DICT)
n_boxes = len(d['level'])
for i in range(n_boxes):
text=d["text"][i]
print(text+ str(str.isdigit(text)))
if str.isdigit(text):
(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imwrite("output.jpg" , img)
This gave me the following result: . As you can see it does correctly identify a number of numbers on the blueprint, however it misses quite a few others and falsely detect a few that aren't really there. I care more about getting all the numbers than a few false positives but would still like to keep those to a minimum so any suggestions there?
I have already tried thinning operations, re-scaling the images, rotating the images and smoothing the images but all of those don't appear to make much difference, extreme rescaling (*0.1 or *10) does change a few things but any gains made in one part of the image are undone by faults appearing in other parts.
Especially difficult are situations such as on the left building where we have lines numbers close to or even overlapping part of the design.
Here we see 2 examples of such situations
also note that font usage is not consistent between images.
It's worth noting that the lines are almost always obviously thinner then the fond used for the numbers so perhaps something could be done with that?
I have also tried using the EAST OCR system with the following code:
img = cv2.imread('vdb7C.jpg')
W=5664
H=4000
dim = (W, H)
img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
net = cv2.dnn.readNet("frozen_east_text_detection.pb")
blob = cv2.dnn.blobFromImage(img, 1.0, (W, H),
(123.68, 116.78, 103.94), swapRB=True, crop=False)
net.setInput(blob)
(scores, geometry) = net.forward(["feature_fusion/Conv_7/Sigmoid",
"feature_fusion/concat_3"])
(numRows, numCols) = scores.shape[2:4]
rects = []
confidences = []
# loop over the number of rows
for y in range(0, numRows):
# extract the scores (probabilities), followed by the geometrical
# data used to derive potential bounding box coordinates that
# surround text
scoresData = scores[0, 0, y]
xData0 = geometry[0, 0, y]
xData1 = geometry[0, 1, y]
xData2 = geometry[0, 2, y]
xData3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
for x in range(0, numCols):
if scoresData[x] < confidence:
continue
(offsetX, offsetY) = (x * 4.0, y * 4.0)
angle = anglesData[x]
cos = np.cos(angle)
sin = np.sin(angle)
h = xData0[x] + xData2[x]
w = xData1[x] + xData3[x]
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
startX = int(endX - w)
startY = int(endY - h)
rects.append((startX, startY, endX, endY))
confidences.append(scoresData[x])
boxes = non_max_suppression(np.array(rects), probs=confidences)
for box in boxes:
(y,h,x,w) = box
print(box)
print(np.shape(img))
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imwrite("output.jpg" , img)
however this causes quite a number of bounding boxes to be outside of the image and in general the bounding boxes seem unrelated to the content, so anyone know what's up there?
Any suggestions? I have 8000 images right now and need to eventually process a total of about 400k images.
I suggest using a solution that applies neural networks like keras-ocr which applies CRAFT and CRNN. It does a better job in detecting text that overlaps with the design. This is what I got using it out of the box:
import matplotlib.pyplot as plt
import keras_ocr
detector = keras_ocr.detection.Detector()
image = keras_ocr.tools.read('vdb7C.jpg')
boxes = detector.detect(images=[image])[0]
canvas = keras_ocr.tools.drawBoxes(image, boxes)
plt.imshow(canvas)
Result:
Run your tesseract piece of code, but only use results with 3 or more digits. This should provide you with enough good examples of digits. Extract each digit to separate file and save their positions. Now you can go two ways.
You can go the simple way if you will see that the fonts of the digits are quite similar. Then you can create a set of templates for the digits (say 15-30). Remember that you can get the size of the digits for specific image? Resize your digits template to the right size and run the most trivial template matching. This will definitely create some false detections (especially for "1"s), and you will have to find a way to reduce their amount to acceptable level.
More complex way is to build a custom CNN detector and train it on your data. So from the first stage you will get several hundred examples of digits (and their positions) that you want to detect. You can look at this project or this one as references. Also this article can provide you some guidance.
One more thing that can be useful. Your images have lots of long perpendicular lines. If you align them to the axis, you can remove the lines very easily by binarizing the original, shifting the result (right or down) by several pixels and ANDing them. This will left only the long lines. Find their length, and you will be able to remove lines above certain length in the original image.
I am very new to OpenCV and Python. I have followed a tutorial to use YOLO using yolov3-tiny. It can detect vehicles fine. But what I need to complete my project is to count the number of vehicles that passes a particular lane. If I use the method where the vehicle is detected (the bounding box appears) to count, the count becomes very inaccurate since, the bounding box keeps blinking (meaning, it keeps on locating the same vehicle again, sometimes up to 5 times), so this is not a good way to count.
So I figured, how about if I just count a vehicle if it gets to a certain point. I have seen a lot of codes that seems to make this but, since I am a beginner, it really is hard for me to understand let alone, run it in my system. Their samples need to install so many things that I can't do because it throws errors.
See my sample code below:
cap = cv2.VideoCapture('rtsp://username:password#xxx.xxx.xxx.xxx:xxx/cam/realmonitor?channel=1')
whT = 320
confThreshold = 0.75
nmsThreshold = 0.3
list_of_vehicles = ["bicycle","car","motorbike","bus","truck"]
classesFile = 'coco.names'
classNames = []
with open(classesFile, 'r') as f:
classNames = f.read().rstrip('\n').split('\n')
modelConfiguration = 'yolov3-tiny.cfg'
modelWeights = 'yolov3-tiny.weights'
net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
total_vehicle_count = 0
def getVehicleCount(boxes, class_name):
global total_vehicle_count
dict_vehicle_count = {}
if(class_name in list_of_vehicles):
total_vehicle_count += 1
# print(total_vehicle_count)
return total_vehicle_count, dict_vehicle_count
def findObjects(ouputs, img):
hT, wT, cT = img.shape
bbox = []
classIds = []
confs = []
for output in outputs:
for det in output:
scores = det[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > confThreshold:
w, h = int(det[2] * wT), int(det[3] * hT)
x, y = int((det[0] * wT) - w/2), int((det[1] * hT) - h/2)
bbox.append([x, y, w, h])
classIds.append(classId)
confs.append(float(confidence))
indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold)
for i in indices:
i = i[0]
box = bbox[i]
getVehicleCount(bbox, classNames[classIds[i]])
x, y, w, h = box[0], box[1], box[2], box[3]
cv2.rectangle(img, (x,y), (x+w, y+h), (255,0,255), 1)
cv2.putText(img, f'{classNames[classIds[i]].upper()} {int(confs[i]*100)}%', (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,0,255), 2)
while True:
success, img = cap.read()
blob = cv2.dnn.blobFromImage(img, 1/255, (whT,whT), [0,0,0], 1, crop=False)
net.setInput(blob)
layerNames = net.getLayerNames()
outputnames = [layerNames[i[0]-1] for i in net.getUnconnectedOutLayers()]
# print(outputnames)
outputs = net.forward(outputnames)
findObjects(outputs, img)
cv2.imshow('Image', img)
if cv2.waitKey(1) & 0XFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
With this code, 1 vehicle is counted sometimes up to 50 count, depending on the size, which is highly inaccurate. How can I create an ROI so that when the detected vehicle passes that point, that will be the only time it will count.
First, I would recommend that you consider using a visual tracker to track each detected rectangle. This is important even if you have an ROI to crop the image close to your counting zone/line. That is because even if the ROI is localized, the detection might still blink a couple of times causing a miscount. That is especially valid if another vehicle can enter the ROI while the first one is still passing it.
I recommend using the easy-to-use tracker provided by the widely used dlib library. Please refer to this example on how to use it.
Instead of counting detections within an ROI, you need to define an ROI line (within your ROI). Then, track detections rectangles centers in each frame. Finally, increase your counter once a rectangle center passes the ROI line.
Regarding how to count a rectangle passing the ROI line:
Select two points to define your ROI line.
Use your points to find the parameters for the general line formula ax + by + c = 0
For each frame, plug the rectangle center coordinates in the formula and keep track of the sign of the result.
If the sign of the result changes that means that the rectangle center has passed the line.
I cleaned a patent plate to read the characters. Now, I am stuck in a part where I must segment the characters.
For the phase of cleaning the plate I do this:
To this:
Now the idea is to be able to segment the characters and then be able to read it with a neural network that I developed, For segmentation I carry this but still don't understand why it doesn't work:
# Create sort_contours() function to grab the contour of each digit from left to right
def sort_contours(cnts,reverse = False):
i = 0
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b: b[1][i], reverse=reverse))
return cnts
cont, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# creat a copy version "test_roi" of plat_image to draw bounding box
test_roi = plate_image.copy()
# Initialize a list which will be used to append charater image
crop_characters = []
# define standard width and height of character
digit_w, digit_h = 40, 80
for c in sort_contours(cont):
(x, y, w, h) = cv2.boundingRect(c)
ratio = h/w
if 1<=ratio<=3.5: # Only select contour with defined ratio
if h/plate_image.shape[0]>=0.1: # Select contour which has the height larger than 50% of the plate
# Draw bounding box arroung digit number
cv2.rectangle(test_roi, (x, y), (x + w, y + h), (0, 255,0), 2)
# Sperate number and gibe prediction
curr_num = thre_mor[y:y+h,x:x+w]
curr_num = cv2.resize(curr_num, dsize=(digit_w, digit_h))
_, curr_num = cv2.threshold(curr_num, 220, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
crop_characters.append(curr_num)
print("Detect {} letters...".format(len(crop_characters)))
fig = plt.figure(figsize=(10,6))
plt.axis(False)
plt.imshow(test_roi)
How could I make the implementation correct? Any help is welcome!
If you want to label them by a neural network the first step of cleaning up is not needed. If you look it to region propose neural network (rnn, fast-rnn, faster-rnn, yolo, mask-rnn ect.) they will do the segmentation and classification in one go.
If you do want to go for segmentation first look in to connected components. It will select all the positive pixels what are connected together.
While building an OCR pipeline for automatic data extraction from a fairly messy scanned document, I got stuck at the image preprocessing stage where I need to crop all cells from a table before running Tesseract on them. One major complication is the necessity to somehow label rows and columns, since extracted data needs structure to hold any meaning at all. Additionally, ability to extract only specific rows and/or columns would make my pipeline much faster when working with several documents of similar composition, which is its expected use.
Here is the code I currently use, almost directly taken from an open source box detection algorithm by Kanan Vyas found earlier on Github
import cv2
import numpy as np
import os
import glob
def sort_contours(cnts, method="left-to-right"):
reverse = False
i = 0
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b: b[1][i], reverse=reverse))
return (cnts, boundingBoxes)
def box_extraction(img_for_box_extraction_path, cropped_dir_path):
img = cv2.imread(img_for_box_extraction_path, 0)
img[int(0):int(img.shape[0]),int(0):int(5)] = [255, 255, 255]
(thresh, img_bin) = cv2.threshold(img, 128, 255,
cv2.THRESH_BINARY | cv2.THRESH_OTSU)
img_bin = 255-img_bin
cv2.imwrite("Image_bin.jpg",img_bin)
kernel_length = np.array(img).shape[1]//40
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))
hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
img_temp1 = cv2.erode(img_bin, vertical_kernel, iterations=3)
vertical_lines_img = cv2.dilate(img_temp1, vertical_kernel, iterations=3)
cv2.imwrite("vertical_lines.jpg",vertical_lines_img)
img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)
cv2.imwrite("horizontal_lines.jpg",horizontal_lines_img)
alpha = 0.5
beta = 1.0 - alpha
img_final_bin = cv2.addWeighted(vertical_lines_img, alpha, horizontal_lines_img, beta, 0.0)
img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
(thresh, img_final_bin) = cv2.threshold(img_final_bin, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
cv2.imwrite("img_final_bin.png",img_final_bin)
im2, contours, hierarchy = cv2.findContours(
img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
(contours, boundingBoxes) = sort_contours(contours, method="top-to-bottom")
idx = 0
for c in contours:
x, y, w, h = cv2.boundingRect(c)
if (w > 30 and h > 20) and w > h and h < 150:
idx += 1
new_img = img[y:y+h, x:x+w]
cv2.imwrite(cropped_dir_path+str(idx) + '.png', new_img)
cv2.drawContours(img, contours, -1, (0, 0, 255), 3)
cv2.imwrite("./Temp/img_contour.jpg", img)
output_dir = "./Cropped/"
files = glob.glob(output_dir+"*")
for f in files:
os.remove(f)
box_extraction("./prototype/page_cropped.png", output_dir)
The problem with this approach is in sorting - simple "directional" directives used here fail to preserve table structure, especially if the table is somewhat skewed in the source image because of the trade-off between dewarping table borders and preserving table contents for OCR. If it is important, I do not currently expect to deal with table cells that occupy more than one row or column.
My actual image contains sensitive business data, so I've made this dummy picture with GIMP. It should be enough for demonstration purposes, you only need to point box_extraction() function at it.
Given that incomplete cells to the right should be ignored by the box extraction algorithm, I expected to get 9 x 4 = 36 images named "1.png" (with cell 0,0), "2.png" (0,1) etc, each set of 4 in correspondence to cells of a single row (it should be possible to get all cells of a single column by selecting its header cell from the first row and every fifth image thereafter).
However, now output images end up arranged in a very weird order, with "1.png" to "4.png" holding cells of the first row in reverse order, "5.png" - first cell of the second row, "6.png" - last cell of the second row, and collapse of the pattern after that.
contours = [[1727.0220947265625, 151.22364807128906, 761.0980224609375, 91.216552734375, 0.9920176267623901], [1340.4588623046875, 1518.04248046875, 270.90380859375, 71.301025390625, 0.9890508651733398], [2110.046630859375, 1843.77099609375, 278.02294921875, 74.4261474609375, 0.9886908531188965], [2055.577392578125, 1226.511474609375, 311.0478515625, 82.458740234375, 0.984758734703064], [186.29859924316406, 1517.8702392578125, 569.6539764404297, 76.9754638671875, 0.9835792183876038], [1354.387451171875, 1321.7530517578125, 240.595947265625, 72.9495849609375, 0.9832095503807068], [1325.5714111328125, 1420.0745849609375, 285.83837890625, 73.9566650390625, 0.9797852635383606], [2059.18212890625, 1319.1517333984375, 308.333984375, 82.51513671875, 0.9795728325843811], [1232.687744140625, 631.7418823242188, 312.6070556640625, 77.5128173828125, 0.9789395928382874], [2079.22216796875, 1416.3896484375, 283.681640625, 81.77978515625, 0.9754523038864136], [321.4118347167969, 636.4778442382812, 329.8473205566406, 81.48944091796875, 0.9666035771369934], [1353.6107177734375, 1227.5003662109375, 238.04345703125, 75.3751220703125, 0.9659966826438904], [1228.7125244140625, 722.1092529296875, 316.5693359375, 69.8770751953125, 0.960330069065094], [2066.78076171875, 1511.6800537109375, 296.3779296875, 80.0389404296875, 0.9578678607940674], [163.09405517578125, 1417.7535400390625, 610.5952758789062, 83.1680908203125, 0.930923342704773], [1923.8992919921875, 640.7562255859375, 259.6851806640625, 74.0850830078125, 0.9276629090309143], [156.9827880859375, 1224.2708740234375, 676.9793701171875, 87.8233642578125, 0.9211469292640686], [168.77809143066406, 1322.410888671875, 620.0236663818359, 80.5582275390625, 0.8804788589477539], [1716.60595703125, 1414.467041015625, 283.1617431640625, 81.5452880859375, 0.8343544602394104], [1718.3673095703125, 1510.7855224609375, 304.5853271484375, 81.029541015625, 0.7616285681724548], [1718.100830078125, 1226.4912109375, 287.2305908203125, 78.2430419921875, 0.745871365070343], [1722.0452880859375, 1319.096923828125, 277.3297119140625, 81.481201171875, 0.7207437753677368]]
sorted_contours = []
sorted_by_height = sorted(contours, key=lambda x: x[1])
for index,i in enumerate(sorted_by_height):
result = list(filter(lambda x: x[1] > i[1]- 6 and x[1] < i[1] + 6 and i not in sorted_contours , sorted_by_height[index:]))
sorted_contours = sorted_contours + sorted(result, key=lambda x: x[0])
print(sorted_contours)
I'm trying to split an image into several sub-images with opencv by identifying templates of the original image and then copy the regions where I matched those templates. I'm a TOTAL newbie to opencv! I've identified the sub-images using:
result = cv2.matchTemplate(img, template, cv2.TM_CCORR_NORMED)
After some cleanup I get a list of tuples called points in which I iterate to show the rectangles. tw and th is the template width and height respectively.
for pt in points:
re = cv2.rectangle(img, pt, (pt[0] + tw, pt[1] + th), 0, 2)
print('%s, %s' % (str(pt[0]), str(pt[1])))
count+=1
What I would like to accomplish is to save the octagons (https://dl.dropbox.com/u/239592/region01.png) into separated files.
How can I do this? I've read something about contours but I'm not sure how to use it. Ideally I would like to contour the octagon.
Thanks a lot for your help!
If template matching is working for you, stick to it. For instance, I considered the following template:
Then, we can pre-process the input in order to make it a binary one and discard small components. After this step, the template matching is performed. Then it is a matter of filtering the matches by means of discarding close ones (I've used a dummy method for that, so if there are too many matches you could see it taking some time). After we decide which points are far apart (and thus identify different hexagons), we can do minor adjusts to them in the following manner:
Sort by y-coordinate;
If two adjacent items start at a y-coordinate that is too close, then set them both to the same y-coord.
Now you can sort this point list in an appropriate order such that the crops are done in raster order. The cropping part is easily achieved using slicing provided by numpy.
import sys
import cv2
import numpy
outbasename = 'hexagon_%02d.png'
img = cv2.imread(sys.argv[1])
template = cv2.cvtColor(cv2.imread(sys.argv[2]), cv2.COLOR_BGR2GRAY)
theight, twidth = template.shape[:2]
# Binarize the input based on the saturation and value.
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
saturation = hsv[:,:,1]
value = hsv[:,:,2]
value[saturation > 35] = 255
value = cv2.threshold(value, 0, 255, cv2.THRESH_OTSU)[1]
# Pad the image.
value = cv2.copyMakeBorder(255 - value, 3, 3, 3, 3, cv2.BORDER_CONSTANT, value=0)
# Discard small components.
img_clean = numpy.zeros(value.shape, dtype=numpy.uint8)
contours, _ = cv2.findContours(value, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for i, c in enumerate(contours):
area = cv2.contourArea(c)
if area > 500:
cv2.drawContours(img_clean, contours, i, 255, 2)
def closest_pt(a, pt):
if not len(a):
return (float('inf'), float('inf'))
d = a - pt
return a[numpy.argmin((d * d).sum(1))]
match = cv2.matchTemplate(img_clean, template, cv2.TM_CCORR_NORMED)
# Filter matches.
threshold = 0.8
dist_threshold = twidth / 1.5
loc = numpy.where(match > threshold)
ptlist = numpy.zeros((len(loc[0]), 2), dtype=int)
count = 0
print "%d matches" % len(loc[0])
for pt in zip(*loc[::-1]):
cpt = closest_pt(ptlist[:count], pt)
dist = ((cpt[0] - pt[0]) ** 2 + (cpt[1] - pt[1]) ** 2) ** 0.5
if dist > dist_threshold:
ptlist[count] = pt
count += 1
# Adjust points (could do for the x coords too).
ptlist = ptlist[:count]
view = ptlist.ravel().view([('x', int), ('y', int)])
view.sort(order=['y', 'x'])
for i in xrange(1, ptlist.shape[0]):
prev, curr = ptlist[i - 1], ptlist[i]
if abs(curr[1] - prev[1]) < 5:
y = min(curr[1], prev[1])
curr[1], prev[1] = y, y
# Crop in raster order.
view.sort(order=['y', 'x'])
for i, pt in enumerate(ptlist, start=1):
cv2.imwrite(outbasename % i,
img[pt[1]-2:pt[1]+theight-2, pt[0]-2:pt[0]+twidth-2])
print 'Wrote %s' % (outbasename % i)
If you want only the contours of the hexagons, then crop on img_clean instead of img (but then it is pointless to sort the hexagons in raster order).
Here is a representation of the different regions that would be cut for your two examples without modifying the code above:
I am sorry, I didn't understand from your question on how do you relate matchTemplate and Contours.
Anyway, below is a small technique using contours. It is on the assumption that your other images are also like the one you provided. I am not sure if it works with your other images. But I think it would help to get a startup. Try this yourself and make necessary adjustments and modifications.
What I did :
1 - I needed the edge of octagons . So Thresholded Image using Otsu and apply dilation and erosion (or use any method you like that works well for all your images, beware of the edges in left edge of image).
2 - Then found contours (More about contours : http://goo.gl/r0ID0
3 - For each contours, find its convex hull, find its area(A) & perimeter(P)
4 - For a perfect octagon, P*P/A = 13.25 approximately. I used it here and cut it and saved it.
5 - You can see cropping it also removes some edges of octagon. If you want it, adjust the cropping dimension.
Code :
import cv2
import numpy as np
img = cv2.imread('region01.png')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
thresh = cv2.dilate(thresh,None,iterations = 2)
thresh = cv2.erode(thresh,None)
contours,hierarchy = cv2.findContours(thresh,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
number = 0
for cnt in contours:
hull = cv2.convexHull(cnt)
area = cv2.contourArea(hull)
P = cv2.arcLength(hull,True)
if ((area != 0) and (13<= P**2/area <= 14)):
#cv2.drawContours(img,[hull],0,255,3)
x,y,w,h = cv2.boundingRect(hull)
number = number + 1
roi = img[y:y+h,x:x+w]
cv2.imshow(str(number),roi)
cv2.imwrite("1"+str(number)+".jpg",roi)
cv2.imshow('img',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Those 6 octagons will be stored as separate files.
Hope it helps !!!