I am writing a code on Jupyter notebook using python to recognize the number on the device with 7segment(FND).
I used opencv and got the edge of the image.
import cv2
import matplotlib.pyplot as plt
def detect_edge(image):
''' function Detecting Edges '''
image_with_edges = cv2.Canny(image , 100, 200)
images = [image , image_with_edges]
location = [121, 122]
for loc, img in zip(location, images):
plt.subplot(loc)
plt.imshow(img, cmap='gray')
plt.savefig('edge.png')
plt.show()
image = cv2.imread('/Users/USER/Desktop/test/test2.png', 0)
detect_edge(image)
This is the screenshot of the sample input and output data I got form the code above:
I am not sure how to proceed from here. I want to get the recognize the number
51.12 in this case.
Should I crop the FND part that the numbers are on first before I run deep learning?
And how should I proceed from here?
I feel like using a CNN is overkill for a problem like this. Especially given that this is a 7-segment display we should be able to solve this without resorting to that kind of complexity.
You've marked out the corners so I'll assume that you can reliably crop out and un-rotate (make it flat) the display.
We want to grab just the numbers. In this case I first converted to LAB and thresholded on the b-channel.
Then I used opencv's findContours to mark out the perimeters:
After that I cropped out each individual number:
and then I looked for each segment individually and determined the number based on which segments were active (I used a special case for 1 where I checked the ratio of the width and height).
Here's the code I used (two files)
segments.py
import numpy as np
class Segments:
def __init__(self):
# create a 7seg model
self.flags = [];
self.segments = [];
h1 = [[0, 1.0],[0, 0.1]]; # 0
h2 = [[0, 1.0],[0.45, 0.55]]; # 1
h3 = [[0, 1.0],[0.9, 1.0]]; # 2
vl1 = [[0, 0.2],[0, 0.5]]; # 3 # upper-left
vl2 = [[0, 0.2],[0.5, 1.0]]; # 4
vr1 = [[0.8, 1.0],[0, 0.5]]; # 5 # upper-right
vr2 = [[0.8, 1.0], [0.5, 1.0]]; # 6
self.segments.append(h1);
self.segments.append(h2);
self.segments.append(h3);
self.segments.append(vl1);
self.segments.append(vl2);
self.segments.append(vr1);
self.segments.append(vr2);
# process an image and set flags
def digest(self, number):
# reset flags
self.flags = [];
# check res to see if it's a one
h, w = number.shape[:2];
if w < 0.5 * h:
self.flags.append(5);
self.flags.append(6);
return;
# check for segments
for a in range(len(self.segments)):
seg = self.segments[a];
# get bounds
xl, xh = seg[0];
yl, yh = seg[1];
# convert to pix coords
xl = int(xl * w);
xh = int(xh * w);
yl = int(yl * h);
yh = int(yh * h);
sw = xh - xl;
sh = yh - yl;
# check
count = np.count_nonzero(number[yl:yh, xl:xh] == 255);
if count / (sh * sw) > 0.5: # 0.5 is a sensitivity measure
self.flags.append(a);
# returns the stored number (stored in self.flags)
def getNum(self):
# hardcoding outputs
if self.flags == [0,2,3,4,5,6]:
return 0;
if self.flags == [5,6]:
return 1;
if self.flags == [0,1,2,4,5]:
return 2;
if self.flags == [0,1,2,5,6]:
return 3;
if self.flags == [1,3,5,6]:
return 4;
if self.flags == [0,1,2,3,6]:
return 5;
if self.flags == [0,1,2,3,4,6]:
return 6;
if self.flags == [0,5,6]:
return 7;
if self.flags == [0,1,2,3,4,5,6]:
return 8;
if self.flags == [0,1,2,3,5,6]:
return 9;
# ERROR
return -1;
main.py
import cv2
import numpy as np
from segments import Segments
# load image
img = cv2.imread("seg7.jpg");
# crop
img = img[300:800,100:800,:];
# lab
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB);
l,a,b = cv2.split(lab);
# show
cv2.imshow("orig", img);
# closing operation
kernel = np.ones((5,5), np.uint8);
# threshold params
low = 165;
high = 200;
iters = 3;
# make copy
copy = b.copy();
# threshold
thresh = cv2.inRange(copy, low, high);
# dilate
for a in range(iters):
thresh = cv2.dilate(thresh, kernel);
# erode
for a in range(iters):
thresh = cv2.erode(thresh, kernel);
# show image
cv2.imshow("thresh", thresh);
cv2.imwrite("threshold.jpg", thresh);
# start processing
_, contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# draw
for contour in contours:
cv2.drawContours(img, [contour], 0, (0,255,0), 3);
# get res of each number
bounds = [];
h, w = img.shape[:2];
for contour in contours:
left = w;
right = 0;
top = h;
bottom = 0;
for point in contour:
point = point[0];
x, y = point;
if x < left:
left = x;
if x > right:
right = x;
if y < top:
top = y;
if y > bottom:
bottom = y;
tl = [left, top];
br = [right, bottom];
bounds.append([tl, br]);
# crop out each number
cuts = [];
number = 0;
for bound in bounds:
tl, br = bound;
cut_img = thresh[tl[1]:br[1], tl[0]:br[0]];
cuts.append(cut_img);
number += 1;
cv2.imshow(str(number), cut_img);
# font
font = cv2.FONT_HERSHEY_SIMPLEX;
# create a segment model
model = Segments();
index = 0;
for cut in cuts:
# save image
cv2.imwrite(str(index) + "_" + str(number) + ".jpg", cut);
# process
model.digest(cut);
number = model.getNum();
print(number);
cv2.imshow(str(index), cut);
# draw and save again
h, w = cut.shape[:2];
drawn = np.zeros((h, w, 3), np.uint8);
drawn[:, :, 0] = cut;
drawn = cv2.putText(drawn, str(number), (10,30), font, 1, (0,0,255), 2, cv2.LINE_AA);
cv2.imwrite("drawn" + str(index) + "_" + str(number) + ".jpg", drawn);
index += 1;
# cv2.waitKey(0);
# show
cv2.imshow("contours", img);
cv2.imwrite("contours.jpg", img);
cv2.waitKey(0);
I can't guarantee that this always works, but it should be usable given a little tweaking. Remember to un-rotate the image if it isn't flat. The segment model assumes the numbers are mostly upright.
If you want to use deep learning, one way to approach this would be to use a convolutional neural network (CNN). Whether you first want to crop the images depends on your application. Do you want to recognize the display from a picture like the one you attached? Then you should not crop the image manually. Furthermore you would need a lot of data to train your own CNN.
An alternative would be to use an off-the-shelf Optical Character Recognition engine such as tesseract pytesseract. These are already trained and can achieve good results. I have no experience with detecting 7 segment displays though, so it could be that they do not work for 7 segment displays. They have tried OCR with tesseract for 7 segment displays here: ocr + 7 segment display.
Last thing you could try is first detect the display from a large picture and then feed the cropped region that was detected to an OCR engine.
dot point Issue ~ the dot Point on right bottom of each Numbers seem to impact especially the recognition rate of right botton side variable vr2 #6 checking while checking Numpy.NonZero(in your sample code) when dot point was light on (while image threshed & findcontours)
Related
I have created an alghoritm that detects the edges of an extruded colagen casing and draws a centerline between these edges on an image. Casing with a centerline.
Here is my code:
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
img = cv2.imread("C:/Users/5.jpg", cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (1500, 1200))
#ROI
fromCenter = False
r = cv2.selectROI(img, fromCenter)
imCrop = img[int(r[1]):int(r[1]+r[3]), int(r[0]):int(r[0]+r[2])]
#Operations on an image
_,thresh = cv2.threshold(imCrop,100,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
kernel = np.ones((5,5),np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
blur = cv2.GaussianBlur(opening,(7,7),0)
edges = cv2.Canny(blur, 0,20)
#Edges localization, packing coords into a list
indices = np.where(edges != [0])
coordinates = list(zip(indices[1], indices[0]))
num = len(coordinates)
#Separating into top and bot edge
bot_cor = coordinates[:int(num/2)]
top_cor = coordinates[-int(num/2):]
#Converting to arrays, sorting
a, b = np.array(top_cor), np.array(bot_cor)
a, b = a[a[:,0].argsort()], b[b[:,0].argsort()]
#Edges approximation by a 5th degree polynomial
min_a_x, max_a_x = np.min(a[:,0]), np.max(a[:,0])
new_a_x = np.linspace(min_a_x, max_a_x, imCrop.shape[1])
a_coefs = np.polyfit(a[:,0],a[:,1], 5)
new_a_y = np.polyval(a_coefs, new_a_x)
min_b_x, max_b_x = np.min(b[:,0]), np.max(b[:,0])
new_b_x = np.linspace(min_b_x, max_b_x, imCrop.shape[1])
b_coefs = np.polyfit(b[:,0],b[:,1], 5)
new_b_y = np.polyval(b_coefs, new_b_x)
#Defining a centerline
midx = [np.average([new_a_x[i], new_b_x[i]], axis = 0) for i in range(imCrop.shape[1])]
midy = [np.average([new_a_y[i], new_b_y[i]], axis = 0) for i in range(imCrop.shape[1])]
plt.figure(figsize=(16,8))
plt.title('Cross section')
plt.xlabel('Length of the casing', fontsize=18)
plt.ylabel('Width of the casing', fontsize=18)
plt.plot(new_a_x, new_a_y,c='black')
plt.plot(new_b_x, new_b_y,c='black')
plt.plot(midx, midy, '-', c='blue')
plt.show()
#Converting coords type to a list (plotting purposes)
coords = list(zip(midx, midy))
points = list(np.int_(coords))
mask = np.zeros((imCrop.shape[:2]), np.uint8)
mask = edges
#Plotting
for point in points:
cv2.circle(mask, tuple(point), 1, (255,255,255), -1)
for point in points:
cv2.circle(imCrop, tuple(point), 1, (255,255,255), -1)
cv2.imshow('imCrop', imCrop)
cv2.imshow('mask', mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
Now I would like to sum up the intensities of each pixel in a region between top edge and a centerline (same thing for a region between centerline and a bottom edge).
Is there any way to limit the ROI to the region between the detected edges and split it into two regions based on the calculated centerline?
Or is there any way to access the pixels which are contained between the edge and a centerline based on theirs coordinates?
(It's my very first post here, sorry in advance for all the mistakes)
I wrote a somewhat naïve code to get masks for the upper and lower part. My code considers that the source image will be always like yours: with horizontal stripes.
After applying Canny I get this:
Then I run some loops through image array to fill unwanted areas of your image. This is done separately for upper and lower part, creating masks. The results are:
Then you can use this masks to sum only the elements you're interested in, using cv.sumElems.
import cv2 as cv
#open as grayscale image
src = cv.imread("colagen.png",cv.IMREAD_GRAYSCALE)
# apply canny and find contours
threshold = 100
canny_output = cv.Canny(src, threshold, threshold * 2)
# find mask for upper part
mask1 = canny_output.copy()
x, y = canny_output.shape
area = 0
for j in range(y):
area = 0
for i in range(x):
if area == 0:
if mask1[i][j] > 0:
area = 1
continue
else:
mask1[i][j] = 255
elif area == 1:
if mask1[i][j] > 0:
area = 2
else:
continue
else:
mask1[i][j] = 255
mask1 = cv.bitwise_not(mask1)
# find mask for lower part
mask2 = canny_output.copy()
x, y = canny_output.shape
area = 0
for j in range(y):
area = 0
for i in range(x):
if area == 0:
if mask2[-i][j] > 0:
area = 1
continue
else:
mask2[-i][j] = 255
elif area == 1:
if mask2[-i][j] > 0:
area = 2
else:
continue
else:
mask2[-i][j] = 255
mask2 = cv.bitwise_not(mask2)
# apply masks and calculate sum of elements in upper and lower part
sums = [0,0]
(sums[0],_,_,_) = cv.sumElems(cv.bitwise_and(src,mask1))
(sums[1],_,_,_) = cv.sumElems(cv.bitwise_and(src,mask2))
cv.imshow('src',src)
cv.imshow('canny',canny_output)
cv.imshow('mask1',mask1)
cv.imshow('mask2',mask2)
cv.imshow('masked1',cv.bitwise_and(src,mask1))
cv.imshow('masked2',cv.bitwise_and(src,mask2))
cv.waitKey()
Alternatives...
Probably there exist some function that fill the areas of the Canny result. I tried cv.fillPoly and cv.floodFill, but didn't manage to make them work easily... But maybe someone else can help you with that...
Edit
Found another way to get the masks with a cleaner code. Using numpy np.add.accumulate then np.clip, and then a modulo operation:
# first divide canny_output by 255 to get 0's and 1's, then perform
# an accumulate addition for each column. Thus you'll get +1 for every
# line, "painting" areas with 1, 2, 3...
a = np.add.accumulate(canny_output/255,0)
# clip values: anything greater than 2 becomes 2
a = np.clip(a, 0, 2)
# performe a modulo, to get areas alternating with 0 or 1; then multiply by 255
a = a%2 * 255
# convert to uint8
mask1 = cv.convertScaleAbs(a)
# to get mask2 (the lower mask) flip the array then do the same as above
a = np.add.accumulate(np.flip(canny_output,0)/255,0)
a = np.clip(a, 0, 2)
a = a%2 * 255
mask2 = cv.convertScaleAbs(np.flip(a,0))
This returns almost the same result. The border of the mask is a little bit different...
I am so new on Image Processing and what I'm trying to do is clearing the noise from captchas;
For captchas, I have different types of them:
For the first one what I did is :
Firstly, I converted every pixel that is not black to the black. Then, I found a pattern that is a noise from the image and deleted it. For the first captcha, it was easy to clear it and I found the text with tesseract.
But I am looking for a solution for the second and the third.
How this must go like? I mean what are the possible methods to clear it?
This is how I delete patterns:
def delete(searcher,h2,w2):
h = h2
w = w2
search = searcher
search = search.convert("RGBA")
herear = np.asarray(search)
bigar = np.asarray(imgCropped)
hereary, herearx = herear.shape[:2]
bigary, bigarx = bigar.shape[:2]
stopx = bigarx - herearx + 1
stopy = bigary - hereary + 1
pix = imgCropped.load()
for x in range(0, stopx):
for y in range(0, stopy):
x2 = x + herearx
y2 = y + hereary
pic = bigar[y:y2, x:x2]
test = (pic == herear)
if test.all():
for q in range(h):
for k in range(w):
pix[x+k,y+q] = (255,255,255,255)
Sorry for the variable names, I was just testing function.
Thanks..
This is as far as I can get:
You probably know about medianBlur function which finds the median value in every kernel and substitute that value to kernel's center. We can do something similar to that but instead of the median, use the max value then the min value. With a median bluring too, I got some results. I know they are not perfect but I hope it gives you some ideas ( you can play with the sizes of the input image and the kernels, it may make the results a little better).
I don't have python installed right now, so I share the exact C++ code that I have used:
Mat im1 = imread("E:/1/3.jpg", 0);
Mat im2, im3;
im2 = Mat::zeros(im1.size(), CV_8U);
for (size_t i = 1; i < im1.rows-1; i++)
{
for (size_t j = 1; j < im1.cols-1; j++)
{
double minVal, maxVal = 0;
minMaxIdx(im1(Rect(j - 1, i - 1, 3, 3)), &minVal, &maxVal);
im2.at<uchar>(i, j) = maxVal;
}
}
imshow("(1) max bluring", im2);
medianBlur(im2, im2, 3);
imshow("(2) median bluring", im2);
im2.copyTo(im1);
im2 = Mat::zeros(im1.size(), CV_8U);
for (size_t i = 1; i < im1.rows - 1; i++)
{
for (size_t j = 1; j < im1.cols - 1; j++)
{
double minVal, maxVal = 0;
minMaxIdx(im1(Rect(j - 1, i - 1, 3, 3)), &minVal, &maxVal);
im2.at<uchar>(i, j) = minVal;
}
}
imshow("(3) min bluring", im2);
Mat tmp;
double st = threshold(im2, tmp, 10, 255, THRESH_OTSU);
threshold(im2, im2, st + 14, 255, THRESH_BINARY_INV);
//dilate(im2, im2, Mat::ones(3, 3, CV_8U));
imshow("(4) final", im2);
waitKey(0);
By the way in such cases, deep Learning methods like YOLO and RCNN are the best methods. Try them too.
Here is my solution,
Firstly I got the background pattern(Edited on paint by hand). From:
After that, I created a blank image to fill it with differences between the pattern and image.
img = Image.open("x.png").convert("RGBA")
pattern = Image.open("y.png").convert("RGBA")
pixels = img.load()
pixelsPattern = pattern.load()
new = Image.new("RGBA", (150, 50))
pixelNew = new.load()
for i in range(img.size[0]):
for j in range(img.size[1]):
if(pixels[i,j] != pixelsPattern[i,j]):
pixelNew[i,j] = pixels[i,j]
new.save("differences.png")
Here are the differences..
and finally, I added blur and cleared the bits which are not black.
Result :
With pytesseract result is 2041, it is wrong for this image but the general rate is around %60.
You can use opencv library for image processing. Very usefull could be this opencv documentation page. Then try to extract your number through findCountour method like:
import cv2
import numpy as np
image = cv2.imread('C:\\E0snN.png')
cv2.waitKey(0)
# Grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bitwise_not(gray)
# Threshold
ret,thresh = cv2.threshold(gray,150,255,1)
# Get countours
contours,h = cv2.findContours(thresh,1,2)
# Draw
cv2.drawContours(image, contours, -1, (0, 255, 0), 3)
cv2.imshow('Contours', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
After that there is following result:
It's far from perfect but if you try with different threshold values e.g. :
ret,thresh = cv2.threshold(gray,127,255,1)
you can get better results.
Extracting table data from digital PDFs have been simple using camelot and tabula. However, the solution doesn't work with scanned images of the document pages specifically when the table doesn't have borders and inner grids. I have been trying to generate vertical and horizontal lines using OpenCV. However, since the scanned images will have slight rotation angles, it is difficult to proceed with the approach.
How can we utilize OpenCV to generate grids (horizontal and vertical lines) and borders for the scanned document page which contains table data (along with paragraphs of text)? If this is feasible, how to nullify the rotation angle of the scanned image?
I wrote some code to estimate the horizontal lines from the printed letters in the page. The same could be done for vertical ones I guess. The code below follows some general assumptions, here
some basic steps in pseudo code style:
prepare picture for contour detection
do contour detection
we assume most contours are letters
calc mean width of all contours
calc mean area of contours
filter all contours with two conditions:
a) contour (letter) heigths < meanHigh * 2
b) contour area > 4/5 meanArea
calc center point of all remaining contours
assume we have line regions (bins)
list all center point which are inside the region
do linear regression of region points
save slope and intercept
calc mean slope and intercept
here the full code:
import cv2
import numpy as np
from scipy import stats
def resizeImageByPercentage(img,scalePercent = 60):
width = int(img.shape[1] * scalePercent / 100)
height = int(img.shape[0] * scalePercent / 100)
dim = (width, height)
# resize image
return cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
def calcAverageContourWithAndHeigh(contourList):
hs = list()
ws = list()
for cnt in contourList:
(x, y, w, h) = cv2.boundingRect(cnt)
ws.append(w)
hs.append(h)
return np.mean(ws),np.mean(hs)
def calcAverageContourArea(contourList):
areaList = list()
for cnt in contourList:
a = cv2.minAreaRect(cnt)
areaList.append(a[2])
return np.mean(areaList)
def calcCentroid(contour):
houghMoments = cv2.moments(contour)
# calculate x,y coordinate of centroid
if houghMoments["m00"] != 0: #case no contour could be calculated
cX = int(houghMoments["m10"] / houghMoments["m00"])
cY = int(houghMoments["m01"] / houghMoments["m00"])
else:
# set values as what you need in the situation
cX, cY = -1, -1
return cX,cY
def getCentroidWhenSizeInRange(contourList,letterSizeWidth,letterSizeHigh,deltaOffset,minLetterArea=10.0):
centroidList=list()
for cnt in contourList:
(x, y, w, h) = cv2.boundingRect(cnt)
area = cv2.minAreaRect(cnt)
#calc diff
diffW = abs(w-letterSizeWidth)
diffH = abs(h-letterSizeHigh)
#thresold A: almost smaller than mean letter size +- offset
#when almost letterSize
if diffW < deltaOffset and diffH < deltaOffset:
#threshold B > min area
if area[2] > minLetterArea:
cX,cY = calcCentroid(cnt)
if cX!=-1 and cY!=-1:
centroidList.append((cX,cY))
return centroidList
DEBUGMODE = True
#read image, do git clone https://github.com/WZBSocialScienceCenter/pdftabextract.git for the example
img = cv2.imread('pdftabextract/examples/catalogue_30s/data/ALA1934_RR-excerpt.pdf-2_1.png')
#get some basic infos
imgHeigh, imgWidth, imgChannelAmount = img.shape
if DEBUGMODE:
cv2.imwrite("img00original.jpg",resizeImageByPercentage(img,30))
cv2.imshow("original",img)
# prepare img
imgGrey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# apply Gaussian filter
imgGaussianBlur = cv2.GaussianBlur(imgGrey,(5,5),0)
#make binary img, black or white
_, imgBinThres = cv2.threshold(imgGaussianBlur, 130, 255, cv2.THRESH_BINARY)
## detect contours
contours, _ = cv2.findContours(imgBinThres, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
#we get some letter parameter
averageLetterWidth, averageLetterHigh = calcAverageContourWithAndHeigh(contours)
threshold1AllowedLetterSizeOffset = averageLetterHigh * 2 # double size
averageContourAreaSizeOfMinRect = calcAverageContourArea(contours)
threshHold2MinArea = 4 * averageContourAreaSizeOfMinRect / 5 # 4/5 * mean
print("mean letter Width: ", averageLetterWidth)
print("mean letter High: ", averageLetterHigh)
print("threshold 1 tolerance: ", threshold1AllowedLetterSizeOffset)
print("mean letter area ", averageContourAreaSizeOfMinRect)
print("thresold 2 min letter area ", threshHold2MinArea)
#we get all centroid of letter sizes contours, the other we ignore
centroidList = getCentroidWhenSizeInRange(contours,averageLetterWidth,averageLetterHigh,threshold1AllowedLetterSizeOffset,threshHold2MinArea)
if DEBUGMODE:
#debug print all centers:
imgFilteredCenter = img.copy()
for cX,cY in centroidList:
#draw in red color as BGR
cv2.circle(imgFilteredCenter, (cX, cY), 5, (0, 0, 255), -1)
cv2.imwrite("img01letterCenters.jpg",resizeImageByPercentage(imgFilteredCenter,30))
cv2.imshow("letterCenters",imgFilteredCenter)
#we estimate a bin widths
amountPixelFreeSpace = averageLetterHigh #TODO get better estimate out of histogram
estimatedBinWidth = round( averageLetterHigh + amountPixelFreeSpace) #TODO round better ?
binCollection = dict() #range(0,imgHeigh,estimatedBinWidth)
#we do seperate the center points into bins by y coordinate
for i in range(0,imgHeigh,estimatedBinWidth):
listCenterPointsInBin = list()
yMin = i
yMax = i + estimatedBinWidth
for cX,cY in centroidList:
if yMin < cY < yMax:#if fits in bin
listCenterPointsInBin.append((cX,cY))
binCollection[i] = listCenterPointsInBin
#we assume all point are in one line ?
#model = slope (x) + intercept
#model = m (x) + n
mList = list() #slope abs in img
nList = list() #intercept abs in img
nListRelative = list() #intercept relative to bin start
minAmountRegressionElements = 12 #is also alias for letter amount we expect
#we do regression for every point in the bin
for startYOfBin, values in binCollection.items():
#we reform values
xValues = [] #TODO use more short transform
yValues = []
for x,y in values:
xValues.append(x)
yValues.append(y)
#we assume a min limit of point in bin
if len(xValues) >= minAmountRegressionElements :
slope, intercept, r, p, std_err = stats.linregress(xValues, yValues)
mList.append(slope)
nList.append(intercept)
#we calc the relative intercept
nRelativeToBinStart = intercept - startYOfBin
nListRelative.append(nRelativeToBinStart)
if DEBUGMODE:
#we debug print all lines in one picute
imgLines = img.copy()
colorOfLine = (0, 255, 0) #green
for i in range(0,len(mList)):
slope = mList[i]
intercept = nList[i]
startPoint = (0, int( intercept)) #better round ?
endPointY = int( (slope * imgWidth + intercept) )
if endPointY < 0:
endPointY = 0
endPoint = (imgHeigh,endPointY)
cv2.line(imgLines, startPoint, endPoint, colorOfLine, 2)
cv2.imwrite("img02lines.jpg",resizeImageByPercentage(imgLines,30))
cv2.imshow("linesOfLetters ",imgLines)
#we assume in mean we got it right
meanIntercept = np.mean(nListRelative)
meanSlope = np.mean(mList)
print("meanIntercept :", meanIntercept)
print("meanSlope ", meanSlope)
#TODO calc angle with math.atan(slope) ...
if DEBUGMODE:
cv2.waitKey(0)
original:
center point of letters:
lines:
I had the same problem some time ago and this tutorial is the solution to that. It explains using pdftabextract which is a Python library by Markus Konrad and leverages OpenCV’s Hough transform to detect the lines and works even if the scanned document is a bit tilted. The tutorial walks your through parsing a 1920s German newspaper
I am trying to detect the count of pipes in this picture. For this, I'm using OpenCV and Python-based detection. Based, on existing answers to similar questions, I was able to come up with the following steps
Open the image
Filter it
Apply Edge Detection
Use Contours
Check for the count
The total count of pipes is ~909 when we count it manually give or take 4.
After applying the filter
import cv2
import matplotlib.pyplot as plt
import numpy as np
img = cv2.imread('images/input-rectpipe-1.jpg')
blur_hor = cv2.filter2D(img[:, :, 0], cv2.CV_32F, kernel=np.ones((11,1,1), np.float32)/11.0, borderType=cv2.BORDER_CONSTANT)
blur_vert = cv2.filter2D(img[:, :, 0], cv2.CV_32F, kernel=np.ones((1,11,1), np.float32)/11.0, borderType=cv2.BORDER_CONSTANT)
mask = ((img[:,:,0]>blur_hor*1.2) | (img[:,:,0]>blur_vert*1.2)).astype(np.uint8)*255
I get this masked image
This looks fairly accurate in terms of the number of visible rectangles it shows. However, when I try to take the count and plot the bounding box on top of the picture, it picks a lot of unwanted regions as well. For circles, HoughCircles has a way of defining the max and min radius. Is there something similar for rectangles that can improve accuracy. Also, I'm open to suggestions for alternative approaches to this problem.
ret,thresh = cv2.threshold(mask,127,255,0)
contours,hierarchy = cv2.findContours(thresh, 1, 2)
count = 0
for i in range(len(contours)):
count = count+1
x,y,w,h = cv2.boundingRect(contours[i])
rect = cv2.minAreaRect(contours[i])
area = cv2.contourArea(contours[i])
box = cv2.boxPoints(rect)
ratio = w/h
M = cv2.moments(contours[i])
if M["m00"] == 0.0:
cX = int(M["m10"] / 1 )
cY = int(M["m01"] / 1 )
if M["m00"] != 0.0:
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
if (area > 50 and area < 220 and hierarchy[0][i][2] < 0 and (ratio > .5 and ratio < 2)):
#cv2.rectangle(img, (x,y), (x+w,y+h), (0,255,0), 2)
cv2.circle(img, (cX, cY), 1, (255, 255, 255), -1)
count = count + 1
print(count)
cv2.imshow("m",mask)
cv2.imshow("f",img)
cv2.waitKey(0)
UPDATE
Based on the second answer I have converted the c++ code to python code and got closer results but still missing out on a few obvious rectangles.
Of course you could filter them by their area. I took your binary image and continued the work as below:
1- Do a loop on all the contours you found from findContours
2- In the loop check if each contour, is an internal contour or not
3- From those which are internal contours, check their area and if the area is in the acceptable range, check the width/height ratio of each contour and finally if it is good too, count that contour as a pipe.
I did the above method on your binary image, and found 794 pipes:
(Some boxes are lost though, You should change the parameters of the edge detector to get more separable boxes in the image.)
and here is the code (It's c++ but easily convertible to python):
Mat img__1, img__2,img__ = imread("E:/R.jpg", 0);
threshold(img__, img__1, 128, 255, THRESH_BINARY);
vector<vector<Point>> contours;
vector< Vec4i > hierarchy;
findContours(img__1, contours, hierarchy, RETR_CCOMP, CHAIN_APPROX_NONE);
Mat tmp = Mat::zeros(img__1.size(), CV_8U);
int k = 0;
for (size_t i = 0; i < contours.size(); i++)
{
double area = contourArea(contours[i]);
Rect rec = boundingRect(contours[i]);
float ratio = rec.width / float(rec.height);
if (area > 50 && area < 220 && hierarchy[i][2]<0 && (ratio > .5 && ratio < 2) ) # hierarchy[i][2]<0 stands for internal contours
{
k++;
drawContours(tmp, contours, i, Scalar(255, 255, 255), -1);
}
}
cout << "k= " << k << "\n";
imshow("1", img__1);
imshow("2", tmp);
waitKey(0);
There are many methods to solve this problem but i doubt there will be a single method without some kind of ad-hod measures. Here is another attempt to this problem.
Instead of using the edge information, i suggest a LBP(local binary pattern)-like filter that compares the surrounding pixel with the center value. If a certain percentage of surrounding pixel is larger than the center pixel, the center pixel will be labeled 255. if the condition is not met, then the center pixel will be labeled 0.
This intensity based method is run on the assumption that the pipe center is always darker than the pipe edges. Since it is comparing intensity,it should work well as long as some contrast remains.
Through this process, you will obtain an image with binary blobs for every pipe and some noises. You will have to remove them with some pre-known condition such as, size, shape, fill_ratio, color and etc. The condition can be found in the given code.
import cv2
import matplotlib.pyplot as plt
import numpy as np
# Morphological function sets
def morph_operation(matinput):
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS,(3,3))
morph = cv2.erode(matinput,kernel,iterations=1)
morph = cv2.dilate(morph,kernel,iterations=2)
morph = cv2.erode(matinput,kernel,iterations=1)
morph = cv2.dilate(morph,kernel,iterations=1)
return morph
# Analyze blobs
def analyze_blob(matblobs,display_frame):
_,blobs,_ = cv2.findContours(matblobs,cv2.RETR_LIST ,cv2.CHAIN_APPROX_SIMPLE)
valid_blobs = []
for i,blob in enumerate(blobs):
rot_rect = cv2.minAreaRect(blob)
b_rect = cv2.boundingRect(blob)
(cx,cy),(sw,sh),angle = rot_rect
rx,ry,rw,rh = b_rect
box = cv2.boxPoints(rot_rect)
box = np.int0(box)
# Draw the segmented Box region
frame = cv2.drawContours(display_frame,[box],0,(0,0,255),1)
on_count = cv2.contourArea(blob)
total_count = sw*sh
if total_count <= 0:
continue
if sh > sw :
temp = sw
sw = sh
sh = temp
# minimum area
if sw * sh < 20:
continue
# maximum area
if sw * sh > 100:
continue
# ratio of box
rect_ratio = sw / sh
if rect_ratio <= 1 or rect_ratio >= 3.5:
continue
# ratio of fill
fill_ratio = on_count / total_count
if fill_ratio < 0.4 :
continue
# remove blob that is too bright
if display_frame[int(cy),int(cx),0] > 75:
continue
valid_blobs.append(blob)
if valid_blobs:
print("Number of Blobs : " ,len(valid_blobs))
cv2.imshow("display_frame_in",display_frame)
return valid_blobs
def lbp_like_method(matinput,radius,stren,off):
height, width = np.shape(matinput)
roi_radius = radius
peri = roi_radius * 8
matdst = np.zeros_like(matinput)
for y in range(height):
y_ = y - roi_radius
_y = y + roi_radius
if y_ < 0 or _y >= height:
continue
for x in range(width):
x_ = x - roi_radius
_x = x + roi_radius
if x_ < 0 or _x >= width:
continue
r1 = matinput[y_:_y,x_]
r2 = matinput[y_:_y,_x]
r3 = matinput[y_,x_:_x]
r4 = matinput[_y,x_:_x]
center = matinput[y,x]
valid_cell_1 = len(r1[r1 > center + off])
valid_cell_2 = len(r2[r2 > center + off])
valid_cell_3 = len(r3[r3 > center + off])
valid_cell_4 = len(r4[r4 > center + off])
total = valid_cell_1 + valid_cell_2 + valid_cell_3 + valid_cell_4
if total > stren * peri:
matdst[y,x] = 255
return matdst
def main_process():
img = cv2.imread('image.jpg')
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Blured to remove noise
blurred = cv2.GaussianBlur(gray,(3,3),-1)
# Parameter tuning
winsize = 5
peri = 0.6
off = 4
matlbp = lbp_like_method(gray,winsize,peri,off)
cv2.imshow("matlbp",matlbp)
cv2.waitKey(1)
matmorph = morph_operation(matlbp)
cv2.imshow("matmorph",matmorph)
cv2.waitKey(1)
display_color = cv2.cvtColor(gray,cv2.COLOR_GRAY2BGR)
valid_blobs = analyze_blob(matmorph,display_color)
for b in range(len(valid_blobs)):
cv2.drawContours(display_color,valid_blobs,b,(0,255,255),-1)
cv2.imshow("display_color",display_color)
cv2.waitKey(0)
if __name__ == '__main__':
main_process()
Result from the LBP-like processing
After cleaning with morphological process
Final result with the red boxes showing all the blob candidates and the yellow segments showing blobs that pass all the condition we set. There are some false alarms below and on top of the pipe bundle but they can be omitted with some boundary conditions.
Total pipe found : 943
Sample Images
The image can be more noisy at times where more objects intervene from the background. Right now I am using various techniques using the RGB colour space to detect the lines but it fails when there is change in the colour due to intervening obstacles from the background. I am using opencv and python.
I have read that HSV is better for colour detection and used but haven't been successful yet.
I am not able to find a generic solution to this problem. Any hints or clues in this direction would be of great help.
STILL IN PROGRESS
First of all, an RGB image consists of 3 grayscale images. Since you need the green color you will deal only with one channel. The green one. To do so, you can split the image, you can use b,g,r = cv2.split('Your Image'). You will get an output like that if you are showing the green channel:
After that you should threshold the image using your desired way. I prefer Otsu's thresholding in this case. The output after thresholding is:
It's obvious that the thresholded image is extremley noisy. So performing erosion will reduce the noise a little bit. The noise reduced image will be similar to the following:
I tried using closing instead of dilation, but closing preserves some unwanted noise. So I separately performed erosion followed by dilation. After dilation the output is:
Note that: You can do your own way in morphological operation. You can use opening instead of what I did. The results are subjective from
one person to another.
Now you can try one these two methods:
1. Blob Detection.
2. HoughLine Transform.
TODO
Try out these two methods and choose the best.
You should use the fact that you know you are trying to detect a line by using the line hough transform.
http://docs.opencv.org/2.4/doc/tutorials/imgproc/imgtrans/hough_lines/hough_lines.html
When the obstacle also look like a line use the fact that you know approximately what is the orientation of the green lines.
If you don't know the orientation of the line use hte fact that there are several green lines with the same orientation and only one line that is the obstacle
Here is a code for what i meant:
import cv2
import numpy as np
# Params
minLineCount = 300 # min number of point alogn line with the a specif orientation
minArea = 100
# Read img
img = cv2.imread('i.png')
greenChannel = img[:,:,1]
# Do noise reduction
iFilter = cv2.bilateralFilter(greenChannel,5,5,5)
# Threshold data
#ret,iThresh = cv2.threshold(iFilter,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
iThresh = (greenChannel > 4).astype(np.uint8)*255
# Remove small areas
se1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
iThreshRemove = cv2.morphologyEx(iThresh, cv2.MORPH_OPEN, se1)
# Find edges
iEdge = cv2.Canny(iThreshRemove,50,100)
# Hough line transform
lines = cv2.HoughLines(iEdge, 1, 3.14/180,75)
# Find the theta with the most lines
thetaCounter = dict()
for line in lines:
theta = line[0, 1]
if theta in thetaCounter:
thetaCounter[theta] += 1
else:
thetaCounter[theta] = 1
maxThetaCount = 0
maxTheta = 0
for theta in thetaCounter:
if thetaCounter[theta] > maxThetaCount:
maxThetaCount = thetaCounter[theta]
maxTheta = theta
# Find the rhos that corresponds to max theta
rhoValues = []
for line in lines:
rho = line[0, 0]
theta = line[0, 1]
if theta == maxTheta:
rhoValues.append(rho)
# Go over all the lines with the specific orientation and count the number of pixels on that line
# if the number is bigger than minLineCount draw the pixels in finaImage
lineImage = np.zeros_like(iThresh, np.uint8)
for rho in range(min(rhoValues), max(rhoValues), 1):
a = np.cos(maxTheta)
b = np.sin(maxTheta)
x0 = round(a*rho)
y0 = round(b*rho)
lineCount = 0
pixelList = []
for jump in range(-1000, 1000, 1):
x1 = int(x0 + jump * (-b))
y1 = int(y0 + jump * (a))
if x1 < 0 or y1 < 0 or x1 >= lineImage.shape[1] or y1 >= lineImage.shape[0]:
continue
if iThreshRemove[y1, x1] == int(255):
pixelList.append((y1, x1))
lineCount += 1
if lineCount > minLineCount:
for y,x in pixelList:
lineImage[y, x] = int(255)
# Remove small areas
## Opencv 2.4
im2, contours, hierarchy = cv2.findContours(lineImage,cv2.RETR_CCOMP,cv2.CHAIN_APPROX_NONE )
finalImage = np.zeros_like(lineImage)
finalShapes = []
for contour in contours:
if contour.size > minArea:
finalShapes.append(contour)
cv2.fillPoly(finalImage, finalShapes, 255)
## Opencv 3.0
# output = cv2.connectedComponentsWithStats(lineImage, 8, cv2.CV_32S)
#
# finalImage = np.zeros_like(output[1])
# finalImage = output[1]
# stat = output[2]
# for label in range(output[0]):
# if label == 0:
# continue
# cc = stat[label,:]
# if cc[cv2.CC_STAT_AREA] < minArea:
# finalImage[finalImage == label] = 0
# else:
# finalImage[finalImage == label] = 255
# Show image
#cv2.imwrite('finalImage2.jpg',finalImage)
cv2.imshow('a', finalImage.astype(np.uint8))
cv2.waitKey(0)
and the result for the images: