I've did quite a search about image stitching on python and most are for panoramic images, warping and rotating the images to combine them into one.
What I'm trying to do is using computer images, so they are digital and can be template matched without a problem, it will always be 2D without need of warping.
Basically here I have pieces of a map that is zoomed in and I want to make a massive image of this small pictures, here we have all the images used: https://imgur.com/a/HZIeT3z
import os
import numpy as np
import cv2
def stitchImagesWithoutWarp(img1, img2):
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(img1,None)
kp2, des2 = orb.detectAndCompute(img2,None)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1,des2)
matches = sorted(matches, key = lambda x:x.distance)
good_matches = matches[:10]
src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1,1,2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1,1,2)
start = (abs(int(dst_pts[0][0][0]-src_pts[0][0][0])), abs(int(dst_pts[0][0][1]-src_pts[0][0][1])))
h1, w1 = img1.shape[:2]
h2, w2 = img2.shape[:2]
vis = np.zeros((start[1]+h1,start[0]+w1,3), np.uint8)
vis[start[1]:start[1]+h1, start[0]:start[0]+w1, :3] = img1
vis[:h2, :w2, :3] = img2
return vis
imgList = []
for it in os.scandir("images"):
imgList.append(cv2.imread(it.path))
vis = stitchImagesWithoutWarp(imgList[0],imgList[1])
for index in range(2,len(imgList)):
cv2.imshow("result", vis)
cv2.waitKey()
vis = stitchImagesWithoutWarp(vis,imgList[index])
By running this code I can successfully stitch the first four images together, such as this:
But once I stitch the fifth image it seems to have wrong match and incorrectly, but I always get the best match by distance on NORM_HAMMING, this is the result:
The thing is, this is the first image, in this order, that the best match point (var start) is negative in the x axis, here is the matching points in the imgur order:
(7, 422)
(786, 54)
(394, 462)
(-350, 383)
I attempted switching the top image, doing specific code for negative match but I've believe I was deviating the performance.
Also noting from the docs the first image should be the query and the second supposed to be the target, but I couldn't get it to work by inverting the vis variable in function param.
The main issue here was when recognized points weren't on the screen (negative values), it needs offsets to adjust, I also incremented a little bit to the code and verified if the matches were legit, as if all the calculated displacement were in average the around the matched first pick in brute force.
with the average of 2MB for each image, without preprocessing the images/downscaling/compressing, after stitching 9 images together I got the average of 1050ms in my PC, as for other algorithms tested (that warped the image) took around 2-3seconds for stitching 2 of those images.
here is the final result:
import os
import numpy as np
import cv2
def averageTuple(tupleList):
avgX, avgY = 0,0
for tuple in tupleList:
avgX += tuple[0]
avgY += tuple[1]
return (int(avgX/len(tupleList)),int(avgY/len(tupleList)))
def tupleInRange(t1, t2, dif=3):
if t1[0] + dif > t2[0] and t1[0] - dif < t2[0]:
if t1[1] + dif > t2[1] and t1[1] - dif < t2[1]:
return True
return False
def rgbToRGBA(img):
b_channel, g_channel, r_channel = cv2.split(img)
alpha_channel = np.ones(b_channel.shape, dtype=b_channel.dtype) * 255
return cv2.merge((b_channel, g_channel, r_channel, alpha_channel))
def cropAlpha(img,extraRange=0.05):
y, x = img[:, :, 3].nonzero() # get the nonzero alpha coordinates
minx = int(np.min(x)*(1-extraRange))
miny = int(np.min(y)*(1-extraRange))
maxx = int(np.max(x)*(1+extraRange))
maxy = int(np.max(y)*(1+extraRange))
return img[miny:maxy, minx:maxx]
def stitchImagesWithoutWarp(img1, img2):
if len(cv2.split(img1)) != 4:
img1 = rgbToRGBA(img1)
if len(cv2.split(img2)) != 4:
img2 = rgbToRGBA(img2)
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(img1,None)
kp2, des2 = orb.detectAndCompute(img2,None)
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1,des2)
matches = sorted(matches, key = lambda x:x.distance)
good_matches = matches[:10]
src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1,1,2)
dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1,1,2)
pointsList = []
for index in range(0,len(src_pts)):
curPoint = (int(dst_pts[index][0][0]-src_pts[index][0][0])), (int(dst_pts[index][0][1]-src_pts[index][0][1]))
pointsList.append(curPoint)
start = pointsList[0]
avgTuple = averageTuple(pointsList)
if not tupleInRange(start, avgTuple): return img1
h1, w1 = img1.shape[:2]
h2, w2 = img2.shape[:2]
ax = abs(start[0])
ay = abs(start[1])
vis = np.zeros((ay+h1,ax+w1,4), np.uint8)
ofst2 = (ax if start[0]<0 else 0, ay if start[1]<0 else 0)
ofst1 = (0 if start[0]<0 else ax, 0 if start[1]<0 else ay)
vis[ofst1[1]:ofst1[1]+h1, ofst1[0]:ofst1[0]+w1, :4] = img1
vis[ofst2[1]:ofst2[1]+h2, ofst2[0]:ofst2[0]+w2, :4] = img2
return cropAlpha(vis)
imgList = []
for it in os.scandir("images"):
imgList.append(cv2.imread(it.path))
vis = stitchImagesWithoutWarp(imgList[0],imgList[1])
for index in range(2,len(imgList)):
vis = stitchImagesWithoutWarp(vis,imgList[index])
cv2.imwrite("output.png", cropAlpha(vis,0))
here is the output image (compressed in JPEG for stackoverflow):
Related
I have created an alghoritm that detects the edges of an extruded colagen casing and draws a centerline between these edges on an image. Casing with a centerline.
Here is my code:
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
img = cv2.imread("C:/Users/5.jpg", cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (1500, 1200))
#ROI
fromCenter = False
r = cv2.selectROI(img, fromCenter)
imCrop = img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
#Operations on an image
_,thresh = cv2.threshold(imCrop,100,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
kernel = np.ones((5,5),np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
blur = cv2.GaussianBlur(opening,(7,7),0)
edges = cv2.Canny(blur, 0,20)
#Edges localization, packing coords into a list
indices = np.where(edges != [0])
coordinates = list(zip(indices[1], indices[0]))
num = len(coordinates)
#Separating into top and bot edge
bot_cor = coordinates[:int(num/2)]
top_cor = coordinates[-int(num/2):]
#Converting to arrays, sorting
a, b = np.array(top_cor), np.array(bot_cor)
a, b = a[a[:,0].argsort()], b[b[:,0].argsort()]
#Edges approximation by a 5th degree polynomial
min_a_x, max_a_x = np.min(a[:,0]), np.max(a[:,0])
new_a_x = np.linspace(min_a_x, max_a_x, imCrop.shape[1])
a_coefs = np.polyfit(a[:,0],a[:,1], 5)
new_a_y = np.polyval(a_coefs, new_a_x)
min_b_x, max_b_x = np.min(b[:,0]), np.max(b[:,0])
new_b_x = np.linspace(min_b_x, max_b_x, imCrop.shape[1])
b_coefs = np.polyfit(b[:,0],b[:,1], 5)
new_b_y = np.polyval(b_coefs, new_b_x)
#Defining a centerline
midx = [np.average([new_a_x[i], new_b_x[i]], axis = 0) for i in range(imCrop.shape[1])]
midy = [np.average([new_a_y[i], new_b_y[i]], axis = 0) for i in range(imCrop.shape[1])]
plt.figure(figsize=(16,8))
plt.title('Cross section')
plt.xlabel('Length of the casing', fontsize=18)
plt.ylabel('Width of the casing', fontsize=18)
plt.plot(new_a_x, new_a_y,c='black')
plt.plot(new_b_x, new_b_y,c='black')
plt.plot(midx, midy, '-', c='blue')
plt.show()
#Converting coords type to a list (plotting purposes)
coords = list(zip(midx, midy))
points = list(np.int_(coords))
mask = np.zeros((imCrop.shape[:2]), np.uint8)
mask = edges
#Plotting
for point in points:
cv2.circle(mask, tuple(point), 1, (255,255,255), -1)
for point in points:
cv2.circle(imCrop, tuple(point), 1, (255,255,255), -1)
cv2.imshow('imCrop', imCrop)
cv2.imshow('mask', mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
Now I would like to sum up the intensities of each pixel in a region between top edge and a centerline (same thing for a region between centerline and a bottom edge).
Is there any way to limit the ROI to the region between the detected edges and split it into two regions based on the calculated centerline?
Or is there any way to access the pixels which are contained between the edge and a centerline based on theirs coordinates?
(It's my very first post here, sorry in advance for all the mistakes)
I wrote a somewhat naïve code to get masks for the upper and lower part. My code considers that the source image will be always like yours: with horizontal stripes.
After applying Canny I get this:
Then I run some loops through image array to fill unwanted areas of your image. This is done separately for upper and lower part, creating masks. The results are:
Then you can use this masks to sum only the elements you're interested in, using cv.sumElems.
import cv2 as cv
#open as grayscale image
src = cv.imread("colagen.png",cv.IMREAD_GRAYSCALE)
# apply canny and find contours
threshold = 100
canny_output = cv.Canny(src, threshold, threshold * 2)
# find mask for upper part
mask1 = canny_output.copy()
x, y = canny_output.shape
area = 0
for j in range(y):
area = 0
for i in range(x):
if area == 0:
if mask1[i][j] > 0:
area = 1
continue
else:
mask1[i][j] = 255
elif area == 1:
if mask1[i][j] > 0:
area = 2
else:
continue
else:
mask1[i][j] = 255
mask1 = cv.bitwise_not(mask1)
# find mask for lower part
mask2 = canny_output.copy()
x, y = canny_output.shape
area = 0
for j in range(y):
area = 0
for i in range(x):
if area == 0:
if mask2[-i][j] > 0:
area = 1
continue
else:
mask2[-i][j] = 255
elif area == 1:
if mask2[-i][j] > 0:
area = 2
else:
continue
else:
mask2[-i][j] = 255
mask2 = cv.bitwise_not(mask2)
# apply masks and calculate sum of elements in upper and lower part
sums = [0,0]
(sums[0],_,_,_) = cv.sumElems(cv.bitwise_and(src,mask1))
(sums[1],_,_,_) = cv.sumElems(cv.bitwise_and(src,mask2))
cv.imshow('src',src)
cv.imshow('canny',canny_output)
cv.imshow('mask1',mask1)
cv.imshow('mask2',mask2)
cv.imshow('masked1',cv.bitwise_and(src,mask1))
cv.imshow('masked2',cv.bitwise_and(src,mask2))
cv.waitKey()
Alternatives...
Probably there exist some function that fill the areas of the Canny result. I tried cv.fillPoly and cv.floodFill, but didn't manage to make them work easily... But maybe someone else can help you with that...
Edit
Found another way to get the masks with a cleaner code. Using numpy np.add.accumulate then np.clip, and then a modulo operation:
# first divide canny_output by 255 to get 0's and 1's, then perform
# an accumulate addition for each column. Thus you'll get +1 for every
# line, "painting" areas with 1, 2, 3...
a = np.add.accumulate(canny_output/255,0)
# clip values: anything greater than 2 becomes 2
a = np.clip(a, 0, 2)
# performe a modulo, to get areas alternating with 0 or 1; then multiply by 255
a = a%2 * 255
# convert to uint8
mask1 = cv.convertScaleAbs(a)
# to get mask2 (the lower mask) flip the array then do the same as above
a = np.add.accumulate(np.flip(canny_output,0)/255,0)
a = np.clip(a, 0, 2)
a = a%2 * 255
mask2 = cv.convertScaleAbs(np.flip(a,0))
This returns almost the same result. The border of the mask is a little bit different...
I'm a dental student and currently trying to write a script for analyzing and extracting handwritten digits from dental records. I already have a rough version of the script finished but my recognition rate is pretty low. A big problem with analyzing the data is a grid that proves difficult to remove.
Scanned form that I want to analyse (white fields are for anonymity):
Empty form:
I've tried different solutions for this problem (Erosion/Dilation, HoughLineTransform and susbtraction of the Lines).
Using featurematching and substracting with an empty template currently give me the best results.
Results:
Eroding and dilating this image gives even better results
Results:
![][4]
But this needs a new calibration nearly every time i try it.
Do you know of a more elegant solution to my problem.
Could SURF matching give better results?
Thank you very much!
Here's my code so far:
GOOD_MATCH_PERCENT = 0.15
def match_img_to_template(input_img, template_img, MAX_FEATURES, GOOD_MATCH_PERCENT):
# blurring of the input image
template_img = cv2.GaussianBlur(template_img, (3, 3), cv2.BORDER_DEFAULT)
# equalizing the histogramm of the input image
img_preprocessed = cv2.equalizeHist(input_img)
# ORB Detector
orb = cv2.ORB_create(MAX_FEATURES)
kp1, des1 = orb.detectAndCompute(img_preprocessed, None)
kp2, des2 = orb.detectAndCompute(template_img, None)
# Brute Force Matching
matcher= cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = matcher.match(des1, des2, None)
matches.sort(key=lambda x:x.distance, reverse=False)
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:numGoodMatches]
# Remove not so good matches
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
matches = matches[:numGoodMatches]
# Extract location of good matches
points1 = np.zeros((len(matches), 2), dtype=np.float32)
points2 = np.zeros((len(matches), 2), dtype=np.float32)
for i, match in enumerate(matches):
points1[i, :] = kp1[match.queryIdx].pt
points2[i, :] = kp2[match.trainIdx].pt
# Find homography
h, mask = cv2.findHomography(points1, points2, cv2.RANSAC)
# Use homography
height, width = template_img.shape
input_warped = cv2.warpPerspective(input_img, h, (width, height))
ret1, input_warped_thresh = cv2.threshold(input_warped,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
diff = cv2.absdiff(template_img, input_warped_thresh)
ret, diff = cv2.threshold(diff, 20, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C + cv2.THRESH_BINARY)
diff = cv2.equalizeHist(diff)
# Create kernels
kernel1 = np.ones((3,3),np.uint8)
kernel2 = np.ones((6,6), np.uint8)
# erode dilate to remove the grid
diff_erode = cv2.erode(diff,kernel1)
diff_dilated = cv2.dilate(diff_erode,kernel2)
# invert diff_dilate
diff_dilated_inv = cv2.bitwise_not(diff_dilated)
return diff_dilated_inv
I am working for image comparison which are 99% same amd 1% difference.
I am capturing Image of the print using Vision camera (mounted over a fix stand).
I tried all Image comparison algorithm : opencv, ImageMagic, skimage. (result was 80 to 90 percent accuracy)
link : “Diff” an image using ImageMagick
link : How can I quantify difference between two images?
I implemented all the above solution from the above question to find the difference, but the problem with above algorithm is, they work pixel to pixel. non of this algorithm provided a smarter approach for image comparison.
After capturing image of two different prints of same type I do the following steps for image comparison:
my code for overlapping misplaced images for the maximum similarity is :
code for image aliment :
import cv2
import numpy as np
# load image
img = cv2.imread('./photo/image.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # convert to grayscale
retval, thresh_gray = cv2.threshold(gray, 100, maxval=255, type=cv2.THRESH_BINARY_INV)
contours, hierarchy = cv2.findContours(thresh_gray,cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
mx_rect = (0,0,0,0)
mx_area = 0
for cnt in contours:
arect = cv2.minAreaRect(cnt)
area = arect[1][0]*arect[1][1]
if area > mx_area:
mx_rect, mx_area = arect, area
x,y,w,h = cv2.boundingRect(cnt)
# cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),8)
roi_1 = img[y:y+h, x:x+w]
cv2.imwrite('./test/Image_rec.jpg', roi_1)
print("shape of cnt: {}".format(cnt.shape))
rect = cv2.minAreaRect(cnt)
print("rect: {}".format(rect))
box = cv2.boxPoints(rect)
box = np.int0(box)
width = int(rect[1][0])
height = int(rect[1][1])
src_pts = box.astype("float32")
dst_pts = np.array([[0, height-1],
[0, 0],
[width-1, 0],
[width-1, height-1]], dtype="float32")
M = cv2.getPerspectiveTransform(src_pts, dst_pts)
warped = cv2.warpPerspective(img, M, (width, height))
cv2.imwrite('./crop_Image_rortate.jpg', warped)
Above code gives the required image i.e. it tries to alien image, it crop the required image, but some time it fails as well (2/10, 2 out of 10 fails).
Once the image is crop, I compare it to find the difference using clustering techniques. my code for comparison is as follow :
from PIL import Image
import numpy as np
import cv2
import scipy.misc as smp
f1= './Image_1.png'
f2= './Image_2.png'
im1 = Image.open(f1)
im2 = Image.open(f2)
img1= cv2.imread(f1)
img2= cv2.imread(f2)
# print (img1.shape)
# print (img2.shape)
w_1= img1.shape[0]
h_1= img1.shape[1]
W_1 = w_1-1
H_1 = h_1-1
c = 0
X=[]
Y=[]
R=[]
G=[]
B=[]
rgb = im1.convert('RGB')
rgb2 = im2.convert('RGB')
for x in range(H_1):
for y in range(W_1):
r1, g1, b1, = rgb.getpixel((x,y))
t1= r1+g1+b1
i = x
j = y
r2, g2, b2, = rgb2.getpixel((i,j))
t2=r2+g2+b2
d= t1-t2
if d in range (-150,150):
# print (d)
pass
else:
c = c + 1
if (c == 1):
z=y
elif (y == z+1 ):
# print (x,y)
i = x+1
j = y+1
r2, g2, b2, = rgb2.getpixel((i,j))
t2=r2+g2+b2
d= t1-t2
if d in range (-150,150):
# print (d)
pass
else:
X.append(x)
Y.append(y)
R.append(r1)
G.append(g1)
B.append(b1)
z=y
z1=y # to make group of 2.
try:
data = np.zeros( (h_1,w_1,3), dtype=np.uint8 )
length = len(X)
print ("total pixel difference : ",length)
for i in range(length):
data[X[i],Y[i]] = [R[i],G[i],B[i]]
img = Image.fromarray( data, 'RGB' )
img.save('./test/new.png')
img.show()
except:
print ("Error during image creation. ")
above code tries to implement clustering base image comparison and it is also slow.
comparison skips the first pixel as difference even it is a difference for every row. it will look for major difference only.
But still the problem remains same, pixel to pixel comparison.
Is there a proper clustering technique which will target the proper difference.
I don't want to do pixel to pixel image comparison, as it provide incorrect result for me.
I am also open to other techniques for image comparison, If available and not listed.
Image Sample :
Image 1:
Image 1
Image 2:
Image 2
Accepted Output:
Accepted output
output after difference :
output
Thanks.
I am doing a college class project on image processing. This is my original image:
I want to join nearby/overlapping bounding boxes on individual text line images, but I don't know how. My code looks like this so far (thanks to #HansHirse for the help):
import os
import cv2
import numpy as np
from scipy import stats
image = cv2.imread('example.png')
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)
#dilation
kernel = np.ones((5,5), np.uint8)
img_dilation = cv2.dilate(thresh, kernel, iterations=1)
#find contours
ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# https://www.pyimagesearch.com/2015/04/20/sorting-contours-using-python-and-opencv/
def sort_contours(cnts, method="left-to-right"):
# initialize the reverse flag and sort index
reverse = False
i = 0
# handle if we need to sort in reverse
if method == "right-to-left" or method == "bottom-to-top":
reverse = True
# handle if we are sorting against the y-coordinate rather than
# the x-coordinate of the bounding box
if method == "top-to-bottom" or method == "bottom-to-top":
i = 1
# construct the list of bounding boxes and sort them from top to
# bottom
boundingBoxes = [cv2.boundingRect(c) for c in cnts]
(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
key=lambda b: b[1][i], reverse=reverse))
# return the list of sorted contours and bounding boxes
return (cnts, boundingBoxes)
sortedctrs,sortedbbs=sort_contours(ctrs)
xyminmax=[]
for cnt in sortedctrs:
x, y, w, h = cv2.boundingRect(cnt)
xyminmax.append([x,y,x+w,y+h])
distances=[]
for i in range(len(xyminmax)):
try:
first_xmax = xyminmax[i][2]
second_xmin = xyminmax[i + 1][0]
distance=abs(second_xmin-first_xmax)
distances.append(distance)
except IndexError:
pass
THRESHOLD=stats.mode(distances, axis=None)[0][0]
new_rects=[]
for i in range(len(xyminmax)):
try:
# [xmin,ymin,xmax,ymax]
first_ymin=xyminmax[i][1]
first_ymax=xyminmax[i][3]
second_ymin=xyminmax[i+1][1]
second_ymax=xyminmax[i+1][3]
first_xmax = xyminmax[i][2]
second_xmin = xyminmax[i+1][0]
firstheight=abs(first_ymax-first_ymin)
secondheight=abs(second_ymax-second_ymin)
distance=abs(second_xmin-first_xmax)
if distance<THRESHOLD:
new_xmin=xyminmax[i][0]
new_xmax=xyminmax[i+1][2]
if first_ymin>second_ymin:
new_ymin=second_ymin
else:
new_ymin = first_ymin
if firstheight>secondheight:
new_ymax = first_ymax
else:
new_ymax = second_ymax
new_rects.append([new_xmin,new_ymin,new_xmax,new_ymax])
else:
new_rects.append(xyminmax[i])
except IndexError:
pass
for rect in new_rects:
cv2.rectangle(image, (rect[0], rect[1]), (rect[2], rect[3]), (121, 11, 189), 2)
cv2.imwrite("result.png",image)
which produces this image as a result:
I want to join very close or overlapping bounding boxes such as these
into a single bounding box so the formula doesn't get separated into single characters. I have tried using cv2.groupRectangles but the print results were just NULL.
So, here comes my solution. I partially modified your (initial) code to my preferred naming, etc. Also, I commented all the stuff, I added.
import cv2
import numpy as np
image = cv2.imread('images/example.png')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
kernel = np.ones((5, 5), np.uint8)
img_dilated = cv2.dilate(thresh, kernel, iterations = 1)
cnts, _ = cv2.findContours(img_dilated.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Array of initial bounding rects
rects = []
# Bool array indicating which initial bounding rect has
# already been used
rectsUsed = []
# Just initialize bounding rects and set all bools to false
for cnt in cnts:
rects.append(cv2.boundingRect(cnt))
rectsUsed.append(False)
# Sort bounding rects by x coordinate
def getXFromRect(item):
return item[0]
rects.sort(key = getXFromRect)
# Array of accepted rects
acceptedRects = []
# Merge threshold for x coordinate distance
xThr = 5
# Iterate all initial bounding rects
for supIdx, supVal in enumerate(rects):
if (rectsUsed[supIdx] == False):
# Initialize current rect
currxMin = supVal[0]
currxMax = supVal[0] + supVal[2]
curryMin = supVal[1]
curryMax = supVal[1] + supVal[3]
# This bounding rect is used
rectsUsed[supIdx] = True
# Iterate all initial bounding rects
# starting from the next
for subIdx, subVal in enumerate(rects[(supIdx+1):], start = (supIdx+1)):
# Initialize merge candidate
candxMin = subVal[0]
candxMax = subVal[0] + subVal[2]
candyMin = subVal[1]
candyMax = subVal[1] + subVal[3]
# Check if x distance between current rect
# and merge candidate is small enough
if (candxMin <= currxMax + xThr):
# Reset coordinates of current rect
currxMax = candxMax
curryMin = min(curryMin, candyMin)
curryMax = max(curryMax, candyMax)
# Merge candidate (bounding rect) is used
rectsUsed[subIdx] = True
else:
break
# No more merge candidates possible, accept current rect
acceptedRects.append([currxMin, curryMin, currxMax - currxMin, curryMax - curryMin])
for rect in acceptedRects:
img = cv2.rectangle(image, (rect[0], rect[1]), (rect[0] + rect[2], rect[1] + rect[3]), (121, 11, 189), 2)
cv2.imwrite("images/result.png", image)
For your example
I get the following output
Now, you have to find a proper threshold to meet your expectations. Maybe, there is even some more work to do, especially to get the whole formula, since the distances don't vary that much.
Disclaimer: I'm new to Python in general, and specially to the Python API of OpenCV (C++ for the win). Comments, improvements, highlighting Python no-gos are highly welcome!
Here is a slightly different approach, using the OpenCV Wrapper library.
import cv2
import opencv_wrapper as cvw
image = cv2.imread("example.png")
gray = cvw.bgr2gray(image)
thresh = cvw.threshold_otsu(gray, inverse=True)
# dilation
img_dilation = cvw.dilate(thresh, 5)
# Find contours
contours = cvw.find_external_contours(img_dilation)
# Map contours to bounding rectangles, using bounding_rect property
rects = map(lambda c: c.bounding_rect, contours)
# Sort rects by top-left x (rect.x == rect.tl.x)
sorted_rects = sorted(rects, key=lambda r: r.x)
# Distance threshold
dt = 5
# List of final, joined rectangles
final_rects = [sorted_rects[0]]
for rect in sorted_rects[1:]:
prev_rect = final_rects[-1]
# Shift rectangle `dt` back, to find out if they overlap
shifted_rect = cvw.Rect(rect.tl.x - dt, rect.tl.y, rect.width, rect.height)
intersection = cvw.rect_intersection(prev_rect, shifted_rect)
if intersection is not None:
# Join the two rectangles
min_y = min((prev_rect.tl.y, rect.tl.y))
max_y = max((prev_rect.bl.y, rect.bl.y))
max_x = max((prev_rect.br.x, rect.br.x))
width = max_x - prev_rect.tl.x
height = max_y - min_y
new_rect = cvw.Rect(prev_rect.tl.x, min_y, width, height)
# Add new rectangle to final list, making it the new prev_rect
# in the next iteration
final_rects[-1] = new_rect
else:
# If no intersection, add the box
final_rects.append(rect)
for rect in sorted_rects:
cvw.rectangle(image, rect, cvw.Color.MAGENTA, line_style=cvw.LineStyle.DASHED)
for rect in final_rects:
cvw.rectangle(image, rect, cvw.Color.GREEN, thickness=2)
cv2.imwrite("result.png", image)
And the result
The green boxes are the final result, while the magenta boxes are the original ones.
I used the same threshold as #HansHirse.
The equals sign still needs some work. Either a higher dilation kernel size or use the same technique vertically.
Disclosure: I am the author of OpenCV Wrapper.
Easy-to-read solution:
contours = get_contours(frame)
boxes = [cv2.boundingRect(c) for c in contours]
boxes = merge_boxes(boxes, x_val=40, y_val=20) # Where x_val and y_val are axis thresholds
def get_contours(frame): # Returns a list of contours
contours = cv2.findContours(frame, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
return contours
def merge_boxes(boxes, x_val, y_val):
size = len(boxes)
if size < 2:
return boxes
if size == 2:
if boxes_mergeable(boxes[0], boxes[1], x_val, y_val):
boxes[0] = union(boxes[0], boxes[1])
del boxes[1]
return boxes
boxes = sorted(boxes, key=lambda r: r[0])
i = size - 2
while i >= 0:
if boxes_mergeable(boxes[i], boxes[i + 1], x_val, y_val):
boxes[i] = union(boxes[i], boxes[i + 1])
del boxes[i + 1]
i -= 1
return boxes
def boxes_mergeable(box1, box2, x_val, y_val):
(x1, y1, w1, h1) = box1
(x2, y2, w2, h2) = box2
return max(x1, x2) - min(x1, x2) - minx_w(x1, w1, x2, w2) < x_val \
and max(y1, y2) - min(y1, y2) - miny_h(y1, h1, y2, h2) < y_val
def minx_w(x1, w1, x2, w2):
return w1 if x1 <= x2 else w2
def miny_h(y1, h1, y2, h2):
return h1 if y1 <= y2 else h2
def union(a, b):
x = min(a[0], b[0])
y = min(a[1], b[1])
w = max(a[0] + a[2], b[0] + b[2]) - x
h = max(a[1] + a[3], b[1] + b[3]) - y
return x, y, w, h
--> If you have bounding boxes and want to merge along both X and Y directions, use this snippet
--> Adjust x_pixel_value and y_pixel_value to your preferences
--> But for this, you need to have the bounding boxes
import cv2
img = cv2.imread(your image path)
x_pixel_value = 5
y_pixel_value = 6
bboxes_list = [] # your bounding boxes list
rects_used = []
for i in bboxes_list:
rects_used.append(False)
end_bboxes_list = []
for enum,i in enumerate(bboxes_list):
if rects_used[enum] == True:
continue
xmin = i[0]
xmax = i[2]
ymin = i[1]
ymax = i[3]
for enum1,j in enumerate(bboxes_list[(enum+1):], start = (enum+1)):
i_xmin = j[0]
i_xmax = j[2]
i_ymin = j[1]
i_ymax = j[3]
if rects_used[enum1] == False:
if abs(ymin - i_ymin) < x_pixel_value:
if abs(xmin-i_xmax) < y_pixel_value or abs(xmax-i_xmin) < y_pixel_value:
rects_used[enum1] = True
xmin = min(xmin,i_xmin)
xmax = max(xmax,i_xmax)
ymin = min(ymin,i_ymin)
ymax = max(ymax,i_ymax)
final_box = [xmin,ymin,xmax,ymax]
end_bboxes_list.append(final_box)
for i in end_bboxes_list:
cv2.rectangle(img,(i[0],i[1]),(i[2],i[3]), color = [0,255,0], thickness = 2)
cv2.imshow("Image",img)
cv2.waitKey(10000)
cv2.destroyAllWindows()
I am trying to read 8 different .png images into a numpy array on python 3.5 opencv 2.
import numpy as np
import cv2
import os
Using the 'os' class methods, I am able to pick all the .png files from a directory and read them using cv2.imread.
imgTrainGray = list ()
mypath = os.path.dirname('image/directory')
for item in os.listdir(mypath):
if '.png' in item:
image = cv2.imread((os.path.join(mypath, item)),0)
if image is not None:
imgTrainGray.append(image)
itg = np.asarray(imgTrainGray)
cv2.imshow ('compiled',itg)
However, all these images are stored as a list. When I convert the list to a numpy array using
itg = np.asarray(imgTrainGray)
I do get an array of images, unfortunately which I am not able to display using
cv2.imshow ('compiled',itg)
The error I get is: 'mat data type = 17 is not supported' at the function cv2.imshow.
I am aware that if all the images are of uniform shape, I could have used numpy.concatenate. Anyhow, the aim is to combine 8 different images to one and to extract keypoints and feature descriptors from this image as a query image for detecting an object from a webcam stream.
I am not able to figure out how to solve the issue so that I have an usable composite image of many individual images.
Or, is there an entirely different way to accomplish what I mean to do?
Full code for reference is here:
import numpy as np
import cv2
import os
from matplotlib import pyplot as plt
ESC=27
camera = cv2.VideoCapture(0)
cv2.ocl.setUseOpenCL(False)# workaround
det = cv2.xfeatures2d.SURF_create(80)
bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=False)
thresfac = 1.5
imgTrainGray = list ()
mypath = os.path.dirname ('directoy/to/images')
for item in os.listdir(mypath):
if '.png' in item: # this could be more correctly done with os.path.splitext
image = cv2.imread((os.path.join(mypath, item)),0)
if image is not None:
imgTrainGray.append(image)
itg = np.asarray(imgTrainGray)
#itg = np.array(cv2.imread(item) for item in os.listdir(mypath) if '.png' in item )
cv2.imshow ('compiled',itg)
kpTrain, desTrain = det.detectAndCompute (imgTrainGray,None)
while True:
ret, imgCamColor = camera.read()
imgCamGray = cv2.cvtColor(imgCamColor, cv2.COLOR_BGR2GRAY)
kpCam, desCam = det.detectAndCompute (imgCamGray,None)
matches = bf.match(desTrain,desCam)
dist = [m.distance for m in matches]
#print ('distances = ',dist)
thres_dist = (sum(dist) / len(dist)) * thresfac
print('sum of distance = ',sum(dist))
print('length of distance = ',len(dist))
print('threshold distance = ',thres_dist)
good = [m for m in matches if m.distance < thres_dist]
MIN_MATCH_COUNT = 8
if len(good)>MIN_MATCH_COUNT:
src_pts = np.float32([ kpTrain[m.queryIdx].pt for m in good ]).reshape(-1,1,2)
dst_pts = np.float32([ kpCam[m.trainIdx].pt for m in good ]).reshape(-1,1,2)
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC,5.0)
matchesMask = mask.ravel().tolist()
h,w = imgTrainGray.shape[:2]
#pts = np.float32([ [0,0],[0,h-1],[w-1,h-1],[w-1,0] ]).reshape(-1,1,2)
pts = np.float32([ [0,0],[0,h],[w,h],[w,0] ]).reshape(-1,1,2)
dst = cv2.perspectiveTransform(pts,M)
imgCamGray = cv2.polylines(imgCamColor,[np.int32(dst)],True,(200,150,120),2, cv2.LINE_8)
else:
print ('Not enough matches are found - %d/%d' % (len(good),MIN_MATCH_COUNT))
matchesMask = None
draw_params = dict(matchColor = (0,255,0), # draw matches in green color
singlePointColor = None,
matchesMask = matchesMask, # draw only inliers
flags = 2)
img3 = cv2.drawMatches(imgTrainGray,kpTrain,imgCamColor,kpCam,good,None,**draw_params)
cv2.imshow('matches',img3)
key = cv2.waitKey(20)
if key == ESC:
break
cv2.destroyAllWindows()
camera.release()